diff --git a/expat/expat.mak b/expat/expat.mak index cb54a2f3..959f2ab9 100755 --- a/expat/expat.mak +++ b/expat/expat.mak @@ -54,7 +54,6 @@ INTDIR=.\Release ALL : "$(OUTDIR)\xmltok.dll" CLEAN : - -@erase "$(INTDIR)\wxmltok.obj" -@erase "$(INTDIR)\xmltok.obj" -@erase "$(OUTDIR)\xmltok.dll" -@erase "$(OUTDIR)\xmltok.exp" @@ -112,7 +111,6 @@ LINK32_FLAGS=kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib\ /pdb:"$(OUTDIR)/xmltok.pdb" /machine:I386 /out:"$(OUTDIR)/xmltok.dll"\ /implib:"$(OUTDIR)/xmltok.lib" LINK32_OBJS= \ - "$(INTDIR)\wxmltok.obj" \ "$(INTDIR)\xmltok.obj" "$(OUTDIR)\xmltok.dll" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS) @@ -140,7 +138,6 @@ ALL : "$(OUTDIR)\xmltok.dll" CLEAN : -@erase "$(INTDIR)\vc40.idb" -@erase "$(INTDIR)\vc40.pdb" - -@erase "$(INTDIR)\wxmltok.obj" -@erase "$(INTDIR)\xmltok.obj" -@erase "$(OUTDIR)\xmltok.dll" -@erase "$(OUTDIR)\xmltok.exp" @@ -199,7 +196,6 @@ LINK32_FLAGS=kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib\ /pdb:"$(OUTDIR)/xmltok.pdb" /debug /machine:I386 /out:"$(OUTDIR)/xmltok.dll"\ /implib:"$(OUTDIR)/xmltok.lib" LINK32_OBJS= \ - "$(INTDIR)\wxmltok.obj" \ "$(INTDIR)\xmltok.obj" "$(OUTDIR)\xmltok.dll" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS) @@ -380,37 +376,40 @@ LINK32_OBJS= \ # Begin Source File SOURCE=.\xmltok.c -DEP_CPP_XMLTO=\ - ".\xmltok.h"\ - - -"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" - - -# End Source File -################################################################################ -# Begin Source File - -SOURCE=.\wxmltok.c !IF "$(CFG)" == "xmltok - Win32 Release" -DEP_CPP_WXMLT=\ - ".\xmltok.c"\ +DEP_CPP_XMLTO=\ + ".\asciitab.h"\ + ".\latin1tab.h"\ + ".\nametab.h"\ + ".\utf8tab.h"\ ".\xmltok.h"\ + ".\xmltok_impl.c"\ + ".\xmltok_impl.h"\ +# ADD CPP /Ob2 -"$(INTDIR)\wxmltok.obj" : $(SOURCE) $(DEP_CPP_WXMLT) "$(INTDIR)" ".\xmltok.c" +"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" + $(CPP) /nologo /MT /W3 /GX /O2 /Ob2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\ + /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /c $(SOURCE) !ELSEIF "$(CFG)" == "xmltok - Win32 Debug" -DEP_CPP_WXMLT=\ - ".\xmltok.c"\ +DEP_CPP_XMLTO=\ + ".\asciitab.h"\ + ".\latin1tab.h"\ + ".\nametab.h"\ + ".\utf8tab.h"\ ".\xmltok.h"\ + ".\xmltok_impl.c"\ + ".\xmltok_impl.h"\ -"$(INTDIR)\wxmltok.obj" : $(SOURCE) $(DEP_CPP_WXMLT) "$(INTDIR)" ".\xmltok.c" +"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" + $(CPP) /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS"\ + /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c $(SOURCE) !ENDIF @@ -451,6 +450,9 @@ DEP_CPP_WXMLT=\ # Begin Source File SOURCE=.\xmlec\xmlec.c + +!IF "$(CFG)" == "xmlec - Win32 Release" + DEP_CPP_XMLEC=\ ".\xmltok.h"\ @@ -459,6 +461,18 @@ DEP_CPP_XMLEC=\ $(CPP) $(CPP_PROJ) $(SOURCE) +!ELSEIF "$(CFG)" == "xmlec - Win32 Debug" + +DEP_CPP_XMLEC=\ + ".\xmltok.h"\ + + +"$(INTDIR)\xmlec.obj" : $(SOURCE) $(DEP_CPP_XMLEC) "$(INTDIR)" + $(CPP) $(CPP_PROJ) $(SOURCE) + + +!ENDIF + # End Source File # End Target # End Project diff --git a/expat/xmlec/xmlec.c b/expat/xmlec/xmlec.c index cb91f989..0d3524f0 100755 --- a/expat/xmlec/xmlec.c +++ b/expat/xmlec/xmlec.c @@ -11,7 +11,8 @@ #include static -int XmlSkipProlog(const char **s, const char *end, const char **nextTokP); +int XmlSkipProlog(const char **s, const char *end, const char **nextTokP, + const ENCODING **enc); int XmlParse(const char *s, size_t n, const char *filename) { @@ -19,7 +20,8 @@ int XmlParse(const char *s, size_t n, const char *filename) const char *start = s; const char *end = s + n; const char *next; - int tok = XmlSkipProlog(&s, end, &next); + const ENCODING *enc; + int tok = XmlSkipProlog(&s, end, &next, &enc); for (;;) { switch (tok) { case XML_TOK_NONE: @@ -31,12 +33,15 @@ int XmlParse(const char *s, size_t n, const char *filename) return 1; case XML_TOK_INVALID: fprintf(stderr, "%s: well-formedness error at byte %lu\n", - filename, (unsigned long)(s - start)); + filename, (unsigned long)(next - start)); return 0; case XML_TOK_PARTIAL: fprintf(stderr, "%s: unclosed token started at byte %lu\n", filename, (unsigned long)(s - start)); return 0; + case XML_TOK_PARTIAL_CHAR: + fprintf(stderr, "%s: malformed input\n", filename); + return 0; case XML_TOK_COMMENT: break; case XML_TOK_START_TAG: @@ -46,22 +51,25 @@ int XmlParse(const char *s, size_t n, const char *filename) break; } s = next; - tok = XmlContentTok(s, end, &next); + tok = XmlContentTok(enc, s, end, &next); } /* not reached */ } static -int XmlSkipProlog(const char **startp, const char *end, const char **nextTokP) +int XmlSkipProlog(const char **startp, const char *end, + const char **nextTokP, const ENCODING **enc) { const char *s = *startp; + INIT_ENCODING initEnc; + XmlInitEncoding(&initEnc, enc); for (;;) { - int tok = XmlPrologTok(s, end, nextTokP); + int tok = XmlPrologTok(*enc, s, end, nextTokP); switch (tok) { - case XML_TOK_NONE: - case XML_TOK_INVALID: - case XML_TOK_PARTIAL: case XML_TOK_START_TAG: + case XML_TOK_INVALID: + case XML_TOK_NONE: + case XML_TOK_PARTIAL: *startp = s; return tok; default: @@ -122,9 +130,11 @@ struct XmlTokBuffer { char *ptr; size_t size; int fd; - int doneProlog; + int state; int eof; unsigned long endOffset; + const ENCODING *enc; + INIT_ENCODING initEnc; }; #define XmlTokBufferOffset(tb) ((tb)->endOffset - ((tb)->end - (tb)->ptr)) @@ -145,9 +155,10 @@ void XmlTokBufferInit(struct XmlTokBuffer *tb, int fd) tb->ptr = tb->buf; tb->size = READSIZE; tb->fd = fd; - tb->doneProlog = 0; + tb->state = XML_PROLOG_STATE; tb->eof = 0; tb->endOffset = 0; + XmlInitEncoding(&(tb->initEnc), &(tb->enc)); } void XmlTokBufferFree(struct XmlTokBuffer *tb) @@ -161,14 +172,10 @@ int XmlGetToken(struct XmlTokBuffer *tb, const char **tokStart, size_t *tokLengt for (;;) { int nBytes; const char *start = tb->ptr; - if (!tb->doneProlog) { - tok = XmlPrologTok(start, tb->end, &tb->ptr); - if (tok == XML_TOK_START_TAG) - tb->doneProlog = 1; - } - else - tok = XmlContentTok(start, tb->end, &tb->ptr); + tok = XmlTok(tb->enc, tb->state, start, tb->end, &tb->ptr); if (tok >= 0) { + if (tok == XML_TOK_START_TAG) + tb->state = XML_CONTENT_STATE; *tokStart = start; *tokLength = tb->ptr - start; break; @@ -275,6 +282,7 @@ int main(int argc, char **argv) fprintf(stderr, "usage: %s filename ...\n", argv[0]); return 1; } + fprintf(stderr, "version 0.1\n"); for (i = 1; i < argc; i++) if (!doFile(argv[i])) ret = 1; diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index cedfbeda..f1a0d351 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -1,389 +1,221 @@ +/* TODO + +Provide methods to convert to any of UTF-8, UTF-18, UCS-4. + +Better prolog tokenization + +> 5)] & (1 << ((lo) & 0x1F))) -#define DIGIT_CASES \ - case '0': case '1': case '2': case '3': case '4': \ - case '5': case '6': case '7': case '8': case '9': +/* A 2 byte UTF-8 representation splits the characters 11 bits +between the bottom 5 and 6 bits of the bytes. +We need 8 bits to index into pages, 3 bits to add to that index and +5 bits to generate the mask. */ +#define UTF8_GET_NAMING2(pages, byte) \ + (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + + ((((byte)[0]) & 3) << 1) \ + + ((((byte)[1]) >> 5) & 1)] \ + & (1 << (((byte)[1]) & 0x1F))) -#define HEX_DIGIT_CASES DIGIT_CASES \ - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': \ - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': +/* A 3 byte UTF-8 representation splits the characters 16 bits +between the bottom 4, 6 and 6 bits of the bytes. +We need 8 bits to index into pages, 3 bits to add to that index and +5 bits to generate the mask. */ +#define UTF8_GET_NAMING3(pages, byte) \ + (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + + ((((byte)[1]) >> 2) & 0xF)] \ + << 3) \ + + ((((byte)[1]) & 3) << 1) \ + + ((((byte)[2]) >> 5) & 1)] \ + & (1 << (((byte)[2]) & 0x1F))) -#define S_CASES case ' ': case '\t': case '\r': case '\n': +#define UTF8_GET_NAMING(pages, p, n) \ + ((n) == 2 \ + ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ + : ((n) == 3 \ + ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ + : 0)) -/* ptr points to character following "type[(unsigned char)*(p)]) +#define IS_NAME_CHAR(enc, p, n) UTF8_GET_NAMING(namePages, p, n) +#define IS_NMSTRT_CHAR(enc, p, n) UTF8_GET_NAMING(nmstrtPages, p, n) + +/* c is an ASCII character */ +#define CHAR_MATCHES(enc, p, c) (*(p) == c) + +#define PREFIX(ident) normal_ ## ident +#include "xmltok_impl.c" + +#undef MINBPC +#undef BYTE_TYPE +#undef CHAR_MATCHES +#undef IS_NAME_CHAR +#undef IS_NMSTRT_CHAR + +const struct normal_encoding utf8_encoding = { + { { PREFIX(prologTok), PREFIX(contentTok) }, 1 }, +#include "asciitab.h" +#include "utf8tab.h" +}; + +#undef PREFIX + +static unsigned char latin1tab[256] = { +#include "asciitab.h" +#include "latin1tab.h" +}; + +static int unicode_byte_type(char hi, char lo) { - if (ptr != end) { - if (*ptr != '-') { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - for (++ptr; ptr != end; ptr++) { - if (*ptr == '-') { - if (++ptr == end) - return XML_TOK_PARTIAL; - if (*ptr == '-') { - if (++ptr == end) - return XML_TOK_PARTIAL; - if (*ptr != '>') { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + 1; - return XML_TOK_COMMENT; - } - } + switch ((unsigned char)hi) { + case 0xD8: case 0xD9: case 0xDA: case 0xDB: + return BT_LEAD4; + case 0xDC: case 0xDD: case 0xDE: case 0xDF: + return BT_TRAIL; + case 0xFF: + switch ((unsigned char)lo) { + case 0xFF: + case 0xFE: + return BT_NONXML; } + break; } - return XML_TOK_PARTIAL; + return BT_NONASCII; } -/* ptr points to character following "') { - *nextTokPtr = ptr + 2; - return XML_TOK_PI; - } + if (ptr == end) + return XML_TOK_NONE; + encPtr = ((const INIT_ENCODING *)enc)->encPtr; + if (ptr + 1 == end) { + switch ((unsigned char)*ptr) { + case 0xFE: + case 0xFF: + case 0x00: + case 0x3C: + return XML_TOK_PARTIAL; } } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "<" */ - -static -int scanStartTag(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr) -{ - for (; ptr != end; ++ptr) { - switch (*ptr) { - case '<': - *nextTokPtr = ptr; - return XML_TOK_INVALID; - case '>': - *nextTokPtr = ptr + 1; - return XML_TOK_START_TAG; - case '"': - for (++ptr;; ++ptr) { - if (ptr == end) - return XML_TOK_PARTIAL; - if (*ptr == '"') - break; - } - break; - case '\'': - for (++ptr;; ++ptr) { - if (ptr == end) - return XML_TOK_PARTIAL; - if (*ptr == '\'') - break; - } - break; - case '/': - if (++ptr == end) - return XML_TOK_PARTIAL; - if (*ptr != '>') { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + 1; - return XML_TOK_EMPTY_ELEMENT; + else { + switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { + case 0x003C: + *encPtr = &big2_encoding; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + case 0xFEFF: + *nextTokPtr = ptr + 2; + *encPtr = &big2_encoding; + return XML_TOK_BOM; + case 0x3C00: + *encPtr = &little2_encoding; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + case 0xFFFE: + *nextTokPtr = ptr + 2; + *encPtr = &little2_encoding; + return XML_TOK_BOM; } } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "': - *nextTokPtr = ptr + 1; - return XML_TOK_END_TAG; - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "&#X" */ - -static -int scanHexCharRef(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr) -{ - if (ptr != end) { - switch (*ptr) { - HEX_DIGIT_CASES - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - for (++ptr; ptr != end; ++ptr) { - switch (*ptr) { - HEX_DIGIT_CASES - break; - case ';': - *nextTokPtr = ptr + 1; - return XML_TOK_CHAR_REF; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "&#" */ - -static -int scanCharRef(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr) -{ - if (ptr != end) { - switch (*ptr) { - case 'x': - case 'X': - return scanHexCharRef(ptr + 1, end, nextTokPtr); - DIGIT_CASES - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - for (++ptr; ptr != end; ++ptr) { - switch (*ptr) { - DIGIT_CASES - break; - case ';': - *nextTokPtr = ptr + 1; - return XML_TOK_CHAR_REF; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - } - return XML_TOK_PARTIAL; + *encPtr = &utf8_encoding.enc; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } static -int scanEntityRef(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr) +int initScanProlog(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { - for (; ptr != end; ++ptr) { - switch (*ptr) { - case '<': - case '>': - case '&': - S_CASES - *nextTokPtr = ptr; - return XML_TOK_INVALID; - case ';': - *nextTokPtr = ptr + 1; - return XML_TOK_ENTITY_REF; - } - } - return XML_TOK_PARTIAL; + return initScan(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); } -/* ptr points to character following " */ - if (end - ptr < 9) - return XML_TOK_PARTIAL; - for (i = 0; i < 6; i++, ptr++) { - if (*ptr != "CDATA["[i]) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - end -= 2; - for (; ptr != end; ++ptr) { - if (*ptr == ']') { - if (ptr[1] == ']' && ptr[2] == '>') { - *nextTokPtr = ptr + 3; - return XML_TOK_CDATA_SECTION; - } - } - } - return XML_TOK_PARTIAL; - + return initScan(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); } -int XmlContentTok(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr) +void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr) { - if (ptr != end) { - switch (*ptr) { - case '<': - { - ++ptr; - if (ptr == end) - return XML_TOK_PARTIAL; - switch (*ptr) { - case '!': - if (++ptr == end) - return XML_TOK_PARTIAL; - switch (*ptr) { - case '-': - return scanComment(ptr + 1, end, nextTokPtr); - case '[': - return scanCdataSection(ptr + 1, end, nextTokPtr); - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; - case '?': - return scanPi(ptr + 1, end, nextTokPtr); - case '/': - return scanEndTag(ptr + 1, end, nextTokPtr); - case '>': - S_CASES - *nextTokPtr = ptr; - return XML_TOK_INVALID; - default: - return scanStartTag(ptr, end, nextTokPtr); - } - } - case '&': - { - ++ptr; - if (ptr == end) - return XML_TOK_PARTIAL; - switch (*ptr) { - case '#': - return scanCharRef(ptr + 1, end, nextTokPtr); - S_CASES - case ';': - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - return scanEntityRef(ptr + 1, end, nextTokPtr); - } - default: - { - for (++ptr; ptr != end; ++ptr) { - switch (*ptr) { - case '&': - case '<': - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - } - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - } - } - } - return XML_TOK_NONE; -} - -int XmlPrologTok(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr) -{ - if (ptr != end) { - switch (*ptr) { - case '"': - { - for (++ptr; ptr != end; ++ptr) { - if (*ptr == '"') { - *nextTokPtr = ptr + 1; - return XML_TOK_LITERAL; - } - } - return XML_TOK_PARTIAL; - } - case '\'': - { - for (++ptr; ptr != end; ++ptr) { - if (*ptr == '\'') { - *nextTokPtr = ptr + 1; - return XML_TOK_LITERAL; - } - } - return XML_TOK_PARTIAL; - } - case '<': - { - ++ptr; - if (ptr == end) - return XML_TOK_PARTIAL; - switch (*ptr) { - case '!': - return scanDecl(ptr + 1, end, nextTokPtr); - case '?': - return scanPi(ptr + 1, end, nextTokPtr); - case '/': - *nextTokPtr = ptr; - return XML_TOK_INVALID; - default: - return XmlContentTok(ptr - 1, end, nextTokPtr); - } - } - default: - { - for (++ptr; ptr != end; ++ptr) { - switch (*ptr) { - case '<': - case '"': - case '\'': - *nextTokPtr = ptr; - return XML_TOK_PROLOG_CHARS; - } - } - *nextTokPtr = ptr; - return XML_TOK_PROLOG_CHARS; - } - } - } - return XML_TOK_NONE; + p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog; + p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent; + p->initEnc.minBytesPerChar = 1; + p->encPtr = encPtr; + *encPtr = &(p->initEnc); } diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index 457979d2..511251e7 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -1,40 +1,53 @@ #ifndef XmlTok_INCLUDED #define XmlTok_INCLUDED 1 +#ifdef __cplusplus +extern "C" { +#endif + #ifndef XMLTOKAPI #define XMLTOKAPI /* as nothing */ #endif -#include - /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_NONE -2 /* The string to be scanned is empty */ -#define XML_TOK_PARTIAL -1 +#define XML_TOK_NONE -3 /* The string to be scanned is empty */ +#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ +#define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 -#define XML_TOK_COMMENT 1 -#define XML_TOK_PI 2 /* processing instruction */ +#define XML_TOK_BOM 1 /* Byte order mark */ +#define XML_TOK_COMMENT 2 +#define XML_TOK_PI 3 /* processing instruction */ /* The following tokens are returned only by XmlPrologTok */ -#define XML_TOK_LITERAL 3 -#define XML_TOK_PROLOG_CHARS 4 +#define XML_TOK_LITERAL 4 +#define XML_TOK_PROLOG_CHARS 5 +#define XML_TOK_PROLOG_S 6 /* The following token is returned by XmlPrologTok when it detects the end of the prolog and is also returned by XmlContentTok */ -#define XML_TOK_START_TAG 5 +#define XML_TOK_START_TAG 7 /* The following tokens are returned only by XmlContentTok */ -#define XML_TOK_END_TAG 6 -#define XML_TOK_EMPTY_ELEMENT 7 /* empty element tag */ -#define XML_TOK_DATA_CHARS 8 -#define XML_TOK_CDATA_SECTION 9 -#define XML_TOK_ENTITY_REF 10 -#define XML_TOK_CHAR_REF 11 /* numeric character reference */ +#define XML_TOK_END_TAG 8 +#define XML_TOK_EMPTY_ELEMENT 9 /* empty element tag */ +#define XML_TOK_DATA_CHARS 10 +#define XML_TOK_CDATA_SECTION 11 +#define XML_TOK_ENTITY_REF 12 +#define XML_TOK_CHAR_REF 13 /* numeric character reference */ -#ifdef __cplusplus -extern "C" { -#endif +#define XML_NSTATES 2 +#define XML_PROLOG_STATE 0 +#define XML_CONTENT_STATE 1 + +typedef struct encoding { + int (*scanners[XML_NSTATES])(const struct encoding *, + const char *, + const char *, + const char **); + int minBytesPerChar; +} ENCODING; /* Scan the string starting at ptr until the end of the next complete token, @@ -56,30 +69,25 @@ may be returned together. Similarly for characters in the prolog outside literals, comments and processing instructions. */ -int XMLTOKAPI XmlPrologTokA(const char *ptr, - const char *eptr, - const char **nextTokPtr); -int XMLTOKAPI XmlContentTokA(const char *ptr, - const char *eptr, - const char **nextTokPtr); -int XMLTOKAPI XmlPrologTokW(const wchar_t *ptr, - const wchar_t *eptr, - const wchar_t **nextTokPtr); -int XMLTOKAPI XmlContentTokW(const wchar_t *ptr, - const wchar_t *eptr, - const wchar_t **nextTokPtr); +#define XmlTok(enc, state, ptr, end, nextTokPtr) \ + (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) + +#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) + +#define XmlContentTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) + +typedef struct { + ENCODING initEnc; + const ENCODING **encPtr; +} INIT_ENCODING; + +void XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **); #ifdef __cplusplus } #endif -#ifdef UNICODE -#define XmlPrologTok XmlPrologTokW -#define XmlContentTok XmlContentTokW -#else -#define XmlPrologTok XmlPrologTokA -#define XmlContentTok XmlContentTokA -#endif - #endif /* not XmlTok_INCLUDED */