From 9f521719014f96bc22ba720a3fcc82dc7c6f0fb4 Mon Sep 17 00:00:00 2001 From: James Clark Date: Sat, 22 Aug 1998 22:40:45 +0000 Subject: [PATCH] Lexical support for namespaces --- expat/xmlparse/xmlparse.c | 78 +++++++-- expat/xmlparse/xmlparse.h | 3 + expat/xmltok/asciitab.h | 2 +- expat/xmltok/iasciitab.h | 2 +- expat/xmltok/xmlrole.c | 8 + expat/xmltok/xmltok.c | 339 +++++++++++++++++++++++++++++++++++-- expat/xmltok/xmltok.h | 21 +++ expat/xmltok/xmltok_impl.c | 99 ++++++++++- expat/xmltok/xmltok_impl.h | 1 + expat/xmlwf/xmlwf.c | 15 ++ 10 files changed, 530 insertions(+), 38 deletions(-) diff --git a/expat/xmlparse/xmlparse.c b/expat/xmlparse/xmlparse.c index 4709579a..8d2643d4 100755 --- a/expat/xmlparse/xmlparse.c +++ b/expat/xmlparse/xmlparse.c @@ -28,6 +28,7 @@ Contributor(s): #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX #define XmlConvert XmlUtf16Convert #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding +#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS #define XmlEncode XmlUtf16Encode #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) typedef unsigned short ICHAR; @@ -35,11 +36,23 @@ typedef unsigned short ICHAR; #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX #define XmlConvert XmlUtf8Convert #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding +#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS #define XmlEncode XmlUtf8Encode #define MUST_CONVERT(enc, s) (!(enc)->isUtf8) typedef char ICHAR; #endif + +#ifndef XMLNS + +#define XmlInitEncodingNS XmlInitEncoding +#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding +#undef XmlGetInternalEncodingNS +#define XmlGetInternalEncodingNS XmlGetInternalEncoding + +#endif + + #ifdef XML_UNICODE_WCHAR_T #define XML_T(x) L ## x #else @@ -225,6 +238,9 @@ typedef struct { const ENCODING *encoding; INIT_ENCODING initEncoding; const XML_Char *protocolEncodingName; +#ifdef XMLNS + int ns; +#endif void *unknownEncodingMem; void *unknownEncodingData; void *unknownEncodingHandlerData; @@ -274,6 +290,11 @@ typedef struct { (((Parser *)parser)->unknownEncodingHandlerData) #define unknownEncodingRelease (((Parser *)parser)->unknownEncodingRelease) #define protocolEncodingName (((Parser *)parser)->protocolEncodingName) +#ifdef XMLNS +#define ns (((Parser *)parser)->ns) +#else +#define ns (0) +#endif #define prologState (((Parser *)parser)->prologState) #define processor (((Parser *)parser)->processor) #define errorCode (((Parser *)parser)->errorCode) @@ -354,6 +375,9 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) unknownEncodingRelease = 0; unknownEncodingData = 0; unknownEncodingHandlerData = 0; +#ifdef XMLNS + ns = 0; +#endif poolInit(&tempPool); poolInit(&temp2Pool); protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0; @@ -367,6 +391,20 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) return parser; } +#ifdef XMLNS + +XML_Parser XML_ParserCreateNS(const XML_Char *encodingName) +{ + XML_Parser parser = XML_ParserCreate(encodingName); + if (parser) { + XmlInitEncodingNS(&initEncoding, &encoding, 0); + ns = 1; + } + return parser; +} + +#endif + XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *openEntityNames, const XML_Char *encodingName) @@ -383,7 +421,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, void *oldUserData = userData; void *oldHandlerArg = handlerArg; - parser = XML_ParserCreate(encodingName); + parser = (ns ? XML_ParserCreateNS : XML_ParserCreate)(encodingName); if (!parser) return 0; startElementHandler = oldStartElementHandler; @@ -798,7 +836,7 @@ doContent(XML_Parser parser, const char *end, const char **nextPtr) { - const ENCODING *internalEnc = XmlGetInternalEncoding(); + const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); const char *dummy; const char **eventPP; const char **eventEndPP; @@ -1395,7 +1433,7 @@ initializeEncoding(XML_Parser parser) #else s = protocolEncodingName; #endif - if (XmlInitEncoding(&initEncoding, &encoding, s)) + if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s)) return XML_ERROR_NONE; return handleUnknownEncoding(parser, protocolEncodingName); } @@ -1408,15 +1446,17 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const ENCODING *newEncoding = 0; const char *version; int standalone = -1; - if (!XmlParseXmlDecl(isGeneralTextEntity, - encoding, - s, - next, - &eventPtr, - &version, - &encodingName, - &newEncoding, - &standalone)) + if (!(ns + ? XmlParseXmlDeclNS + : XmlParseXmlDecl)(isGeneralTextEntity, + encoding, + s, + next, + &eventPtr, + &version, + &encodingName, + &newEncoding, + &standalone)) return XML_ERROR_SYNTAX; if (defaultHandler) reportDefault(parser, encoding, s, next); @@ -1468,10 +1508,12 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) info.release(info.data); return XML_ERROR_NO_MEMORY; } - enc = XmlInitUnknownEncoding(unknownEncodingMem, - info.map, - info.convert, - info.data); + enc = (ns + ? XmlInitUnknownEncodingNS + : XmlInitUnknownEncoding)(unknownEncodingMem, + info.map, + info.convert, + info.data); if (enc) { unknownEncodingData = info.data; unknownEncodingRelease = info.release; @@ -1876,7 +1918,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, const char *ptr, const char *end, STRING_POOL *pool) { - const ENCODING *internalEnc = XmlGetInternalEncoding(); + const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); for (;;) { const char *next; int tok = XmlAttributeValueTok(enc, ptr, end, &next); @@ -1997,7 +2039,7 @@ enum XML_Error storeEntityValue(XML_Parser parser, const char *entityTextPtr, const char *entityTextEnd) { - const ENCODING *internalEnc = XmlGetInternalEncoding(); + const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); STRING_POOL *pool = &(dtd.pool); entityTextPtr += encoding->minBytesPerChar; entityTextEnd -= encoding->minBytesPerChar; diff --git a/expat/xmlparse/xmlparse.h b/expat/xmlparse/xmlparse.h index 13d5885c..6ffcc744 100755 --- a/expat/xmlparse/xmlparse.h +++ b/expat/xmlparse/xmlparse.h @@ -70,6 +70,9 @@ protocol or null if there is none specified. */ XML_Parser XMLPARSEAPI XML_ParserCreate(const XML_Char *encoding); +XML_Parser XMLPARSEAPI +XML_ParserCreateNS(const XML_Char *encoding); + /* atts is array of name/value pairs, terminated by 0; names and values are 0 terminated. */ diff --git a/expat/xmltok/asciitab.h b/expat/xmltok/asciitab.h index f7d78da4..500ff66c 100755 --- a/expat/xmltok/asciitab.h +++ b/expat/xmltok/asciitab.h @@ -32,7 +32,7 @@ Contributor(s): /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI, +/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, diff --git a/expat/xmltok/iasciitab.h b/expat/xmltok/iasciitab.h index d8940747..c9ca02b7 100755 --- a/expat/xmltok/iasciitab.h +++ b/expat/xmltok/iasciitab.h @@ -33,7 +33,7 @@ Contributor(s): /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI, +/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, diff --git a/expat/xmltok/xmlrole.c b/expat/xmltok/xmlrole.c index 72be89bf..56c6d89f 100755 --- a/expat/xmltok/xmlrole.c +++ b/expat/xmltok/xmlrole.c @@ -149,6 +149,7 @@ int doctype0(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = doctype1; return XML_ROLE_DOCTYPE_NAME; } @@ -610,6 +611,7 @@ int attlist0(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = attlist1; return XML_ROLE_ATTLIST_ELEMENT_NAME; } @@ -630,6 +632,7 @@ int attlist1(PROLOG_STATE *state, state->handler = internalSubset; return XML_ROLE_NONE; case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = attlist2; return XML_ROLE_ATTRIBUTE_NAME; } @@ -689,6 +692,7 @@ int attlist3(PROLOG_STATE *state, return XML_ROLE_NONE; case XML_TOK_NMTOKEN: case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = attlist4; return XML_ROLE_ATTRIBUTE_ENUM_VALUE; } @@ -836,6 +840,7 @@ int element0(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = element1; return XML_ROLE_ELEMENT_NAME; } @@ -893,6 +898,7 @@ int element2(PROLOG_STATE *state, state->handler = element6; return XML_ROLE_GROUP_OPEN; case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT; case XML_TOK_NAME_QUESTION: @@ -940,6 +946,7 @@ int element4(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = element5; return XML_ROLE_CONTENT_ELEMENT; } @@ -980,6 +987,7 @@ int element6(PROLOG_STATE *state, state->level += 1; return XML_ROLE_GROUP_OPEN; case XML_TOK_NAME: + case XML_TOK_PREFIXED_NAME: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT; case XML_TOK_NAME_QUESTION: diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index bcd06eb9..f0b12996 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -252,7 +252,8 @@ void utf8_toUtf16(const ENCODING *enc, *toP = to; } -static const struct normal_encoding utf8_encoding = { +#ifdef XMLNS +static const struct normal_encoding utf8_encoding_ns = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { #include "asciitab.h" @@ -260,11 +261,38 @@ static const struct normal_encoding utf8_encoding = { }, NORMAL_VTABLE(utf8_) }; +#endif + +static const struct normal_encoding utf8_encoding = { + { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, + { +#define BT_COLON BT_NMSTRT +#include "asciitab.h" +#undef BT_COLON +#include "utf8tab.h" + }, + NORMAL_VTABLE(utf8_) +}; + +#ifdef XMLNS + +static const struct normal_encoding internal_utf8_encoding_ns = { + { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, + { +#include "iasciitab.h" +#include "utf8tab.h" + }, + NORMAL_VTABLE(utf8_) +}; + +#endif static const struct normal_encoding internal_utf8_encoding = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { +#define BT_COLON BT_NMSTRT #include "iasciitab.h" +#undef BT_COLON #include "utf8tab.h" }, NORMAL_VTABLE(utf8_) @@ -304,7 +332,9 @@ void latin1_toUtf16(const ENCODING *enc, *(*toP)++ = (unsigned char)*(*fromP)++; } -static const struct normal_encoding latin1_encoding = { +#ifdef XMLNS + +static const struct normal_encoding latin1_encoding_ns = { { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, { #include "asciitab.h" @@ -312,6 +342,18 @@ static const struct normal_encoding latin1_encoding = { } }; +#endif + +static const struct normal_encoding latin1_encoding = { + { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, + { +#define BT_COLON BT_NMSTRT +#include "asciitab.h" +#undef BT_COLON +#include "latin1tab.h" + } +}; + static void ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, @@ -321,7 +363,9 @@ void ascii_toUtf8(const ENCODING *enc, *(*toP)++ = *(*fromP)++; } -static const struct normal_encoding ascii_encoding = { +#ifdef XMLNS + +static const struct normal_encoding ascii_encoding_ns = { { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, { #include "asciitab.h" @@ -329,6 +373,18 @@ static const struct normal_encoding ascii_encoding = { } }; +#endif + +static const struct normal_encoding ascii_encoding = { + { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, + { +#define BT_COLON BT_NMSTRT +#include "asciitab.h" +#undef BT_COLON +/* BT_NONXML == 0 */ + } +}; + #undef PREFIX static int unicode_byte_type(char hi, char lo) @@ -464,6 +520,24 @@ DEFINE_UTF16_TO_UTF16 #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR +#ifdef XMLNS + +static const struct normal_encoding little2_encoding_ns = { + { VTABLE, 2, 0, +#if BYTE_ORDER == 12 + 1 +#else + 0 +#endif + }, + { +#include "asciitab.h" +#include "latin1tab.h" + } +}; + +#endif + static const struct normal_encoding little2_encoding = { { VTABLE, 2, 0, #if BYTE_ORDER == 12 @@ -472,16 +546,36 @@ static const struct normal_encoding little2_encoding = { 0 #endif }, + { +#define BT_COLON BT_NMSTRT #include "asciitab.h" +#undef BT_COLON #include "latin1tab.h" + } }; #if BYTE_ORDER != 21 -static const struct normal_encoding internal_little2_encoding = { +#ifdef XMLNS + +static const struct normal_encoding internal_little2_encoding_ns = { { VTABLE, 2, 0, 1 }, + { #include "iasciitab.h" #include "latin1tab.h" + } +}; + +#endif + +static const struct normal_encoding internal_little2_encoding = { + { VTABLE, 2, 0, 1 }, + { +#define BT_COLON BT_NMSTRT +#include "iasciitab.h" +#undef BT_COLON +#include "latin1tab.h" + } }; #endif @@ -527,6 +621,24 @@ DEFINE_UTF16_TO_UTF16 #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR +#ifdef XMLNS + +static const struct normal_encoding big2_encoding_ns = { + { VTABLE, 2, 0, +#if BYTE_ORDER == 21 + 1 +#else + 0 +#endif + }, + { +#include "asciitab.h" +#include "latin1tab.h" + } +}; + +#endif + static const struct normal_encoding big2_encoding = { { VTABLE, 2, 0, #if BYTE_ORDER == 21 @@ -535,16 +647,36 @@ static const struct normal_encoding big2_encoding = { 0 #endif }, + { +#define BT_COLON BT_NMSTRT #include "asciitab.h" +#undef BT_COLON #include "latin1tab.h" + } }; #if BYTE_ORDER != 12 -static const struct normal_encoding internal_big2_encoding = { +#ifdef XMLNS + +static const struct normal_encoding internal_big2_encoding_ns = { { VTABLE, 2, 0, 1 }, + { #include "iasciitab.h" #include "latin1tab.h" + } +}; + +#endif + +static const struct normal_encoding internal_big2_encoding = { + { VTABLE, 2, 0, 1 }, + { +#define BT_COLON BT_NMSTRT +#include "iasciitab.h" +#undef BT_COLON +#include "latin1tab.h" + } }; #endif @@ -812,15 +944,19 @@ const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *e return 0; } -int XmlParseXmlDecl(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) +static +int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, + const char *, + const char *), + int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) { const char *val = 0; const char *name = 0; @@ -855,7 +991,7 @@ int XmlParseXmlDecl(int isGeneralTextEntity, if (encodingName) *encodingName = val; if (encoding) - *encoding = findEncoding(enc, val, ptr - enc->minBytesPerChar); + *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { *badPtr = ptr; return 0; @@ -888,6 +1024,19 @@ int XmlParseXmlDecl(int isGeneralTextEntity, return 1; } +int XmlParseXmlDecl(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) +{ + return doParseXmlDecl(findEncoding, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone); +} + static int checkCharRefNumber(int result) { @@ -1133,3 +1282,163 @@ XmlInitUnknownEncoding(void *mem, e->normal.enc.utf16Convert = unknown_toUtf16; return &(e->normal.enc); } + +#ifdef XMLNS + +const ENCODING *XmlGetUtf8InternalEncodingNS() +{ + return &internal_utf8_encoding_ns.enc; +} + +const ENCODING *XmlGetUtf16InternalEncodingNS() +{ +#if BYTE_ORDER == 12 + return &internal_little2_encoding_ns.enc; +#elif BYTE_ORDER == 21 + return &internal_big2_encoding_ns.enc; +#else + const short n = 1; + return *(const char *)&n ? &internal_little2_encoding_ns.enc : &internal_big2_encoding_ns.enc; +#endif +} + +static +int initScanNS(const ENCODING *enc, int state, const char *ptr, const char *end, + const char **nextTokPtr) +{ + const ENCODING **encPtr; + + if (ptr == end) + return XML_TOK_NONE; + encPtr = ((const INIT_ENCODING *)enc)->encPtr; + if (ptr + 1 == end) { + switch ((unsigned char)*ptr) { + case 0xFE: + case 0xFF: + case 0x00: + case 0x3C: + return XML_TOK_PARTIAL; + } + } + else { + switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { + case 0x003C: + *encPtr = &big2_encoding_ns.enc; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + case 0xFEFF: + *nextTokPtr = ptr + 2; + *encPtr = &big2_encoding_ns.enc; + return XML_TOK_BOM; + case 0x3C00: + *encPtr = &little2_encoding_ns.enc; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + case 0xFFFE: + *nextTokPtr = ptr + 2; + *encPtr = &little2_encoding_ns.enc; + return XML_TOK_BOM; + } + } + *encPtr = &utf8_encoding_ns.enc; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); +} + + +static +int initScanPrologNS(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) +{ + return initScanNS(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); +} + +static +int initScanContentNS(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) +{ + return initScanNS(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); +} + +int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) +{ + if (name) { + if (streqci(name, "ISO-8859-1")) { + *encPtr = &latin1_encoding_ns.enc; + return 1; + } + if (streqci(name, "UTF-8")) { + *encPtr = &utf8_encoding_ns.enc; + return 1; + } + if (streqci(name, "US-ASCII")) { + *encPtr = &ascii_encoding_ns.enc; + return 1; + } + if (!streqci(name, "UTF-16")) + return 0; + } + p->initEnc.scanners[XML_PROLOG_STATE] = initScanPrologNS; + p->initEnc.scanners[XML_CONTENT_STATE] = initScanContentNS; + p->initEnc.updatePosition = initUpdatePosition; + p->initEnc.minBytesPerChar = 1; + p->encPtr = encPtr; + *encPtr = &(p->initEnc); + return 1; +} + + +static +const ENCODING *findEncodingNS(const ENCODING *enc, const char *ptr, const char *end) +{ +#define ENCODING_MAX 128 + char buf[ENCODING_MAX]; + char *p = buf; + int i; + XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); + if (ptr != end) + return 0; + *p = 0; + for (i = 0; buf[i]; i++) { + if ('a' <= buf[i] && buf[i] <= 'z') + buf[i] += 'A' - 'a'; + } + if (streqci(buf, "UTF-8")) + return &utf8_encoding_ns.enc; + if (streqci(buf, "ISO-8859-1")) + return &latin1_encoding_ns.enc; + if (streqci(buf, "US-ASCII")) + return &ascii_encoding_ns.enc; + if (streqci(buf, "UTF-16")) { + static const unsigned short n = 1; + if (enc->minBytesPerChar == 2) + return enc; + return &big2_encoding_ns.enc; + } + return 0; +} + + +int XmlParseXmlDeclNS(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) +{ + return doParseXmlDecl(findEncodingNS, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone); +} + +ENCODING * +XmlInitUnknownEncodingNS(void *mem, + int *table, + int (*convert)(void *userData, const char *p), + void *userData) +{ + ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); + if (enc) + ((struct normal_encoding *)enc)->type[':'] = BT_COLON; + return enc; +} + +#endif /* XMLNS */ diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index 6d0b91df..dc0364a5 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -94,6 +94,10 @@ extern "C" { /* The following token is returned only by XmlCdataSectionTok */ #define XML_TOK_CDATA_SECT_CLOSE 40 +/* With namespace processing this is returned by XmlPrologTok + for a name with a colon. */ +#define XML_TOK_PREFIXED_NAME 41 + #define XML_N_STATES 3 #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 @@ -269,6 +273,23 @@ XmlInitUnknownEncoding(void *mem, int (*convert)(void *userData, const char *p), void *userData); +int XMLTOKAPI XmlParseXmlDeclNS(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, + int *standalonePtr); +int XMLTOKAPI XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS(); +const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS(); +ENCODING XMLTOKAPI * +XmlInitUnknownEncodingNS(void *mem, + int *table, + int (*convert)(void *userData, const char *p), + void *userData); #ifdef __cplusplus } #endif diff --git a/expat/xmltok/xmltok_impl.c b/expat/xmltok/xmltok_impl.c index 513935ae..17aec86f 100755 --- a/expat/xmltok/xmltok_impl.c +++ b/expat/xmltok/xmltok_impl.c @@ -421,6 +421,12 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, } } return XML_TOK_PARTIAL; +#ifdef XMLNS + case BT_COLON: + /* no need to check qname syntax here, since end-tag must match exactly */ + ptr += MINBPC; + break; +#endif case BT_GT: *nextTokPtr = ptr + MINBPC; return XML_TOK_END_TAG; @@ -532,9 +538,30 @@ static int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { +#ifdef XMLNS + int hadColon = 0; +#endif while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) +#ifdef XMLNS + case BT_COLON: + if (hadColon) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + hadColon = 1; + ptr += MINBPC; + if (ptr == end) + return XML_TOK_PARTIAL; + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + break; +#endif case BT_S: case BT_CR: case BT_LF: for (;;) { int t; @@ -559,6 +586,9 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, case BT_EQUALS: { int open; +#ifdef XMLNS + hadColon = 0; +#endif for (;;) { ptr += MINBPC; @@ -668,6 +698,9 @@ static int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { +#ifdef XMLNS + int hadColon; +#endif if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { @@ -691,10 +724,31 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } +#ifdef XMLNS + hadColon = 0; +#endif /* we have a start-tag */ while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) +#ifdef XMLNS + case BT_COLON: + if (hadColon) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + hadColon = 1; + ptr += MINBPC; + if (ptr == end) + return XML_TOK_PARTIAL; + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + break; +#endif case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC; @@ -1078,6 +1132,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_DIGIT: case BT_NAME: case BT_MINUS: +#ifdef XMLNS + case BT_COLON: +#endif tok = XML_TOK_NMTOKEN; ptr += MINBPC; break; @@ -1105,22 +1162,43 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return tok; +#ifdef XMLNS + case BT_COLON: + ptr += MINBPC; + switch (tok) { + case XML_TOK_NAME: + if (ptr == end) + return XML_TOK_PARTIAL; + tok = XML_TOK_PREFIXED_NAME; + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + default: + tok = XML_TOK_NMTOKEN; + break; + } + break; + case XML_TOK_PREFIXED_NAME: + tok = XML_TOK_NMTOKEN; + break; + } + break; +#endif case BT_PLUS: - if (tok != XML_TOK_NAME) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_NAME_PLUS; case BT_AST: - if (tok != XML_TOK_NAME) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_NAME_ASTERISK; case BT_QUEST: - if (tok != XML_TOK_NAME) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -1270,6 +1348,9 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, case BT_AST: case BT_PERCNT: case BT_NUM: +#ifdef XMLNS + case BT_COLON: +#endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, '\t')) { @@ -1494,6 +1575,9 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) break; case BT_NONASCII: case BT_NMSTRT: +#ifdef XMLNS + case BT_COLON: +#endif case BT_HEX: case BT_DIGIT: case BT_NAME: @@ -1524,6 +1608,9 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT: +#ifdef XMLNS + case BT_COLON: +#endif case BT_HEX: case BT_DIGIT: case BT_NAME: @@ -1550,6 +1637,9 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char * case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT: +#ifdef XMLNS + case BT_COLON: +#endif case BT_HEX: case BT_DIGIT: case BT_NAME: @@ -1572,6 +1662,9 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: +#ifdef XMLNS + case BT_COLON: +#endif case BT_HEX: case BT_DIGIT: case BT_NAME: diff --git a/expat/xmltok/xmltok_impl.h b/expat/xmltok/xmltok_impl.h index 3b0444ad..4c652545 100755 --- a/expat/xmltok/xmltok_impl.h +++ b/expat/xmltok/xmltok_impl.h @@ -42,6 +42,7 @@ enum { BT_LSQB, BT_S, BT_NMSTRT, + BT_COLON, BT_HEX, BT_DIGIT, BT_NAME, diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index 9903e399..b43af715 100755 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -509,6 +509,9 @@ int tmain(int argc, XML_Char **argv) int processExternalEntities = 0; int windowsCodePages = 0; int outputType = 0; +#ifdef XMLNS + int enforceNamespaceSyntax = 0; +#endif #ifdef _MSC_VER _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF); @@ -526,6 +529,12 @@ int tmain(int argc, XML_Char **argv) useFilemap = 0; j++; } +#ifdef XMLNS + if (argv[i][j] == T('n')) { + enforceNamespaceSyntax = 1; + j++; + } +#endif if (argv[i][j] == T('x')) { processExternalEntities = 1; j++; @@ -573,7 +582,13 @@ int tmain(int argc, XML_Char **argv) FILE *fp = 0; XML_Char *outName = 0; int result; +#ifdef XMLNS + XML_Parser parser = (enforceNamespaceSyntax + ? XML_ParserCreateNS + : XML_ParserCreate)(encoding); +#else XML_Parser parser = XML_ParserCreate(encoding); +#endif if (outputDir) { const XML_Char *file = argv[i]; if (tcsrchr(file, T('/')))