Lexical support for namespaces

This commit is contained in:
James Clark 1998-08-22 22:40:45 +00:00
parent dac8f8295d
commit 9f52171901
10 changed files with 530 additions and 38 deletions

View file

@ -28,6 +28,7 @@ Contributor(s):
#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
#define XmlConvert XmlUtf16Convert
#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
#define XmlEncode XmlUtf16Encode
#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
typedef unsigned short ICHAR;
@ -35,11 +36,23 @@ typedef unsigned short ICHAR;
#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
#define XmlConvert XmlUtf8Convert
#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
#define XmlEncode XmlUtf8Encode
#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
typedef char ICHAR;
#endif
#ifndef XMLNS
#define XmlInitEncodingNS XmlInitEncoding
#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
#undef XmlGetInternalEncodingNS
#define XmlGetInternalEncodingNS XmlGetInternalEncoding
#endif
#ifdef XML_UNICODE_WCHAR_T
#define XML_T(x) L ## x
#else
@ -225,6 +238,9 @@ typedef struct {
const ENCODING *encoding;
INIT_ENCODING initEncoding;
const XML_Char *protocolEncodingName;
#ifdef XMLNS
int ns;
#endif
void *unknownEncodingMem;
void *unknownEncodingData;
void *unknownEncodingHandlerData;
@ -274,6 +290,11 @@ typedef struct {
(((Parser *)parser)->unknownEncodingHandlerData)
#define unknownEncodingRelease (((Parser *)parser)->unknownEncodingRelease)
#define protocolEncodingName (((Parser *)parser)->protocolEncodingName)
#ifdef XMLNS
#define ns (((Parser *)parser)->ns)
#else
#define ns (0)
#endif
#define prologState (((Parser *)parser)->prologState)
#define processor (((Parser *)parser)->processor)
#define errorCode (((Parser *)parser)->errorCode)
@ -354,6 +375,9 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName)
unknownEncodingRelease = 0;
unknownEncodingData = 0;
unknownEncodingHandlerData = 0;
#ifdef XMLNS
ns = 0;
#endif
poolInit(&tempPool);
poolInit(&temp2Pool);
protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
@ -367,6 +391,20 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName)
return parser;
}
#ifdef XMLNS
XML_Parser XML_ParserCreateNS(const XML_Char *encodingName)
{
XML_Parser parser = XML_ParserCreate(encodingName);
if (parser) {
XmlInitEncodingNS(&initEncoding, &encoding, 0);
ns = 1;
}
return parser;
}
#endif
XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
const XML_Char *openEntityNames,
const XML_Char *encodingName)
@ -383,7 +421,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
void *oldUserData = userData;
void *oldHandlerArg = handlerArg;
parser = XML_ParserCreate(encodingName);
parser = (ns ? XML_ParserCreateNS : XML_ParserCreate)(encodingName);
if (!parser)
return 0;
startElementHandler = oldStartElementHandler;
@ -798,7 +836,7 @@ doContent(XML_Parser parser,
const char *end,
const char **nextPtr)
{
const ENCODING *internalEnc = XmlGetInternalEncoding();
const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
const char *dummy;
const char **eventPP;
const char **eventEndPP;
@ -1395,7 +1433,7 @@ initializeEncoding(XML_Parser parser)
#else
s = protocolEncodingName;
#endif
if (XmlInitEncoding(&initEncoding, &encoding, s))
if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
return XML_ERROR_NONE;
return handleUnknownEncoding(parser, protocolEncodingName);
}
@ -1408,15 +1446,17 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
const ENCODING *newEncoding = 0;
const char *version;
int standalone = -1;
if (!XmlParseXmlDecl(isGeneralTextEntity,
encoding,
s,
next,
&eventPtr,
&version,
&encodingName,
&newEncoding,
&standalone))
if (!(ns
? XmlParseXmlDeclNS
: XmlParseXmlDecl)(isGeneralTextEntity,
encoding,
s,
next,
&eventPtr,
&version,
&encodingName,
&newEncoding,
&standalone))
return XML_ERROR_SYNTAX;
if (defaultHandler)
reportDefault(parser, encoding, s, next);
@ -1468,10 +1508,12 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
info.release(info.data);
return XML_ERROR_NO_MEMORY;
}
enc = XmlInitUnknownEncoding(unknownEncodingMem,
info.map,
info.convert,
info.data);
enc = (ns
? XmlInitUnknownEncodingNS
: XmlInitUnknownEncoding)(unknownEncodingMem,
info.map,
info.convert,
info.data);
if (enc) {
unknownEncodingData = info.data;
unknownEncodingRelease = info.release;
@ -1876,7 +1918,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
const char *ptr, const char *end,
STRING_POOL *pool)
{
const ENCODING *internalEnc = XmlGetInternalEncoding();
const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
for (;;) {
const char *next;
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
@ -1997,7 +2039,7 @@ enum XML_Error storeEntityValue(XML_Parser parser,
const char *entityTextPtr,
const char *entityTextEnd)
{
const ENCODING *internalEnc = XmlGetInternalEncoding();
const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
STRING_POOL *pool = &(dtd.pool);
entityTextPtr += encoding->minBytesPerChar;
entityTextEnd -= encoding->minBytesPerChar;

View file

@ -70,6 +70,9 @@ protocol or null if there is none specified. */
XML_Parser XMLPARSEAPI
XML_ParserCreate(const XML_Char *encoding);
XML_Parser XMLPARSEAPI
XML_ParserCreateNS(const XML_Char *encoding);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated. */

View file

@ -32,7 +32,7 @@ Contributor(s):
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,

View file

@ -33,7 +33,7 @@ Contributor(s):
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,

View file

@ -149,6 +149,7 @@ int doctype0(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = doctype1;
return XML_ROLE_DOCTYPE_NAME;
}
@ -610,6 +611,7 @@ int attlist0(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = attlist1;
return XML_ROLE_ATTLIST_ELEMENT_NAME;
}
@ -630,6 +632,7 @@ int attlist1(PROLOG_STATE *state,
state->handler = internalSubset;
return XML_ROLE_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = attlist2;
return XML_ROLE_ATTRIBUTE_NAME;
}
@ -689,6 +692,7 @@ int attlist3(PROLOG_STATE *state,
return XML_ROLE_NONE;
case XML_TOK_NMTOKEN:
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = attlist4;
return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
}
@ -836,6 +840,7 @@ int element0(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = element1;
return XML_ROLE_ELEMENT_NAME;
}
@ -893,6 +898,7 @@ int element2(PROLOG_STATE *state,
state->handler = element6;
return XML_ROLE_GROUP_OPEN;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT;
case XML_TOK_NAME_QUESTION:
@ -940,6 +946,7 @@ int element4(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = element5;
return XML_ROLE_CONTENT_ELEMENT;
}
@ -980,6 +987,7 @@ int element6(PROLOG_STATE *state,
state->level += 1;
return XML_ROLE_GROUP_OPEN;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT;
case XML_TOK_NAME_QUESTION:

View file

@ -252,7 +252,8 @@ void utf8_toUtf16(const ENCODING *enc,
*toP = to;
}
static const struct normal_encoding utf8_encoding = {
#ifdef XMLNS
static const struct normal_encoding utf8_encoding_ns = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
#include "asciitab.h"
@ -260,11 +261,38 @@ static const struct normal_encoding utf8_encoding = {
},
NORMAL_VTABLE(utf8_)
};
#endif
static const struct normal_encoding utf8_encoding = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "utf8tab.h"
},
NORMAL_VTABLE(utf8_)
};
#ifdef XMLNS
static const struct normal_encoding internal_utf8_encoding_ns = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
#include "iasciitab.h"
#include "utf8tab.h"
},
NORMAL_VTABLE(utf8_)
};
#endif
static const struct normal_encoding internal_utf8_encoding = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
#define BT_COLON BT_NMSTRT
#include "iasciitab.h"
#undef BT_COLON
#include "utf8tab.h"
},
NORMAL_VTABLE(utf8_)
@ -304,7 +332,9 @@ void latin1_toUtf16(const ENCODING *enc,
*(*toP)++ = (unsigned char)*(*fromP)++;
}
static const struct normal_encoding latin1_encoding = {
#ifdef XMLNS
static const struct normal_encoding latin1_encoding_ns = {
{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
{
#include "asciitab.h"
@ -312,6 +342,18 @@ static const struct normal_encoding latin1_encoding = {
}
};
#endif
static const struct normal_encoding latin1_encoding = {
{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "latin1tab.h"
}
};
static
void ascii_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
@ -321,7 +363,9 @@ void ascii_toUtf8(const ENCODING *enc,
*(*toP)++ = *(*fromP)++;
}
static const struct normal_encoding ascii_encoding = {
#ifdef XMLNS
static const struct normal_encoding ascii_encoding_ns = {
{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
{
#include "asciitab.h"
@ -329,6 +373,18 @@ static const struct normal_encoding ascii_encoding = {
}
};
#endif
static const struct normal_encoding ascii_encoding = {
{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
/* BT_NONXML == 0 */
}
};
#undef PREFIX
static int unicode_byte_type(char hi, char lo)
@ -464,6 +520,24 @@ DEFINE_UTF16_TO_UTF16
#undef IS_NMSTRT_CHAR_MINBPC
#undef IS_INVALID_CHAR
#ifdef XMLNS
static const struct normal_encoding little2_encoding_ns = {
{ VTABLE, 2, 0,
#if BYTE_ORDER == 12
1
#else
0
#endif
},
{
#include "asciitab.h"
#include "latin1tab.h"
}
};
#endif
static const struct normal_encoding little2_encoding = {
{ VTABLE, 2, 0,
#if BYTE_ORDER == 12
@ -472,16 +546,36 @@ static const struct normal_encoding little2_encoding = {
0
#endif
},
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "latin1tab.h"
}
};
#if BYTE_ORDER != 21
static const struct normal_encoding internal_little2_encoding = {
#ifdef XMLNS
static const struct normal_encoding internal_little2_encoding_ns = {
{ VTABLE, 2, 0, 1 },
{
#include "iasciitab.h"
#include "latin1tab.h"
}
};
#endif
static const struct normal_encoding internal_little2_encoding = {
{ VTABLE, 2, 0, 1 },
{
#define BT_COLON BT_NMSTRT
#include "iasciitab.h"
#undef BT_COLON
#include "latin1tab.h"
}
};
#endif
@ -527,6 +621,24 @@ DEFINE_UTF16_TO_UTF16
#undef IS_NMSTRT_CHAR_MINBPC
#undef IS_INVALID_CHAR
#ifdef XMLNS
static const struct normal_encoding big2_encoding_ns = {
{ VTABLE, 2, 0,
#if BYTE_ORDER == 21
1
#else
0
#endif
},
{
#include "asciitab.h"
#include "latin1tab.h"
}
};
#endif
static const struct normal_encoding big2_encoding = {
{ VTABLE, 2, 0,
#if BYTE_ORDER == 21
@ -535,16 +647,36 @@ static const struct normal_encoding big2_encoding = {
0
#endif
},
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "latin1tab.h"
}
};
#if BYTE_ORDER != 12
static const struct normal_encoding internal_big2_encoding = {
#ifdef XMLNS
static const struct normal_encoding internal_big2_encoding_ns = {
{ VTABLE, 2, 0, 1 },
{
#include "iasciitab.h"
#include "latin1tab.h"
}
};
#endif
static const struct normal_encoding internal_big2_encoding = {
{ VTABLE, 2, 0, 1 },
{
#define BT_COLON BT_NMSTRT
#include "iasciitab.h"
#undef BT_COLON
#include "latin1tab.h"
}
};
#endif
@ -812,15 +944,19 @@ const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *e
return 0;
}
int XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
static
int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const char *,
const char *),
int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
const char *val = 0;
const char *name = 0;
@ -855,7 +991,7 @@ int XmlParseXmlDecl(int isGeneralTextEntity,
if (encodingName)
*encodingName = val;
if (encoding)
*encoding = findEncoding(enc, val, ptr - enc->minBytesPerChar);
*encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
*badPtr = ptr;
return 0;
@ -888,6 +1024,19 @@ int XmlParseXmlDecl(int isGeneralTextEntity,
return 1;
}
int XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
return doParseXmlDecl(findEncoding, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone);
}
static
int checkCharRefNumber(int result)
{
@ -1133,3 +1282,163 @@ XmlInitUnknownEncoding(void *mem,
e->normal.enc.utf16Convert = unknown_toUtf16;
return &(e->normal.enc);
}
#ifdef XMLNS
const ENCODING *XmlGetUtf8InternalEncodingNS()
{
return &internal_utf8_encoding_ns.enc;
}
const ENCODING *XmlGetUtf16InternalEncodingNS()
{
#if BYTE_ORDER == 12
return &internal_little2_encoding_ns.enc;
#elif BYTE_ORDER == 21
return &internal_big2_encoding_ns.enc;
#else
const short n = 1;
return *(const char *)&n ? &internal_little2_encoding_ns.enc : &internal_big2_encoding_ns.enc;
#endif
}
static
int initScanNS(const ENCODING *enc, int state, const char *ptr, const char *end,
const char **nextTokPtr)
{
const ENCODING **encPtr;
if (ptr == end)
return XML_TOK_NONE;
encPtr = ((const INIT_ENCODING *)enc)->encPtr;
if (ptr + 1 == end) {
switch ((unsigned char)*ptr) {
case 0xFE:
case 0xFF:
case 0x00:
case 0x3C:
return XML_TOK_PARTIAL;
}
}
else {
switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
case 0x003C:
*encPtr = &big2_encoding_ns.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFEFF:
*nextTokPtr = ptr + 2;
*encPtr = &big2_encoding_ns.enc;
return XML_TOK_BOM;
case 0x3C00:
*encPtr = &little2_encoding_ns.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFFFE:
*nextTokPtr = ptr + 2;
*encPtr = &little2_encoding_ns.enc;
return XML_TOK_BOM;
}
}
*encPtr = &utf8_encoding_ns.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
static
int initScanPrologNS(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScanNS(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
}
static
int initScanContentNS(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScanNS(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
}
int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
{
if (name) {
if (streqci(name, "ISO-8859-1")) {
*encPtr = &latin1_encoding_ns.enc;
return 1;
}
if (streqci(name, "UTF-8")) {
*encPtr = &utf8_encoding_ns.enc;
return 1;
}
if (streqci(name, "US-ASCII")) {
*encPtr = &ascii_encoding_ns.enc;
return 1;
}
if (!streqci(name, "UTF-16"))
return 0;
}
p->initEnc.scanners[XML_PROLOG_STATE] = initScanPrologNS;
p->initEnc.scanners[XML_CONTENT_STATE] = initScanContentNS;
p->initEnc.updatePosition = initUpdatePosition;
p->initEnc.minBytesPerChar = 1;
p->encPtr = encPtr;
*encPtr = &(p->initEnc);
return 1;
}
static
const ENCODING *findEncodingNS(const ENCODING *enc, const char *ptr, const char *end)
{
#define ENCODING_MAX 128
char buf[ENCODING_MAX];
char *p = buf;
int i;
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
if (ptr != end)
return 0;
*p = 0;
for (i = 0; buf[i]; i++) {
if ('a' <= buf[i] && buf[i] <= 'z')
buf[i] += 'A' - 'a';
}
if (streqci(buf, "UTF-8"))
return &utf8_encoding_ns.enc;
if (streqci(buf, "ISO-8859-1"))
return &latin1_encoding_ns.enc;
if (streqci(buf, "US-ASCII"))
return &ascii_encoding_ns.enc;
if (streqci(buf, "UTF-16")) {
static const unsigned short n = 1;
if (enc->minBytesPerChar == 2)
return enc;
return &big2_encoding_ns.enc;
}
return 0;
}
int XmlParseXmlDeclNS(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
return doParseXmlDecl(findEncodingNS, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone);
}
ENCODING *
XmlInitUnknownEncodingNS(void *mem,
int *table,
int (*convert)(void *userData, const char *p),
void *userData)
{
ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
if (enc)
((struct normal_encoding *)enc)->type[':'] = BT_COLON;
return enc;
}
#endif /* XMLNS */

View file

@ -94,6 +94,10 @@ extern "C" {
/* The following token is returned only by XmlCdataSectionTok */
#define XML_TOK_CDATA_SECT_CLOSE 40
/* With namespace processing this is returned by XmlPrologTok
for a name with a colon. */
#define XML_TOK_PREFIXED_NAME 41
#define XML_N_STATES 3
#define XML_PROLOG_STATE 0
#define XML_CONTENT_STATE 1
@ -269,6 +273,23 @@ XmlInitUnknownEncoding(void *mem,
int (*convert)(void *userData, const char *p),
void *userData);
int XMLTOKAPI XmlParseXmlDeclNS(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
int XMLTOKAPI XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS();
const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS();
ENCODING XMLTOKAPI *
XmlInitUnknownEncodingNS(void *mem,
int *table,
int (*convert)(void *userData, const char *p),
void *userData);
#ifdef __cplusplus
}
#endif

View file

@ -421,6 +421,12 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
}
}
return XML_TOK_PARTIAL;
#ifdef XMLNS
case BT_COLON:
/* no need to check qname syntax here, since end-tag must match exactly */
ptr += MINBPC;
break;
#endif
case BT_GT:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_END_TAG;
@ -532,9 +538,30 @@ static
int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
#ifdef XMLNS
int hadColon = 0;
#endif
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XMLNS
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
hadColon = 1;
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
#endif
case BT_S: case BT_CR: case BT_LF:
for (;;) {
int t;
@ -559,6 +586,9 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
case BT_EQUALS:
{
int open;
#ifdef XMLNS
hadColon = 0;
#endif
for (;;) {
ptr += MINBPC;
@ -668,6 +698,9 @@ static
int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
#ifdef XMLNS
int hadColon;
#endif
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
@ -691,10 +724,31 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
#ifdef XMLNS
hadColon = 0;
#endif
/* we have a start-tag */
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XMLNS
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
hadColon = 1;
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
#endif
case BT_S: case BT_CR: case BT_LF:
{
ptr += MINBPC;
@ -1078,6 +1132,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
#ifdef XMLNS
case BT_COLON:
#endif
tok = XML_TOK_NMTOKEN;
ptr += MINBPC;
break;
@ -1105,22 +1162,43 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_CR: case BT_LF:
*nextTokPtr = ptr;
return tok;
#ifdef XMLNS
case BT_COLON:
ptr += MINBPC;
switch (tok) {
case XML_TOK_NAME:
if (ptr == end)
return XML_TOK_PARTIAL;
tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
default:
tok = XML_TOK_NMTOKEN;
break;
}
break;
case XML_TOK_PREFIXED_NAME:
tok = XML_TOK_NMTOKEN;
break;
}
break;
#endif
case BT_PLUS:
if (tok != XML_TOK_NAME) {
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC;
return XML_TOK_NAME_PLUS;
case BT_AST:
if (tok != XML_TOK_NAME) {
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC;
return XML_TOK_NAME_ASTERISK;
case BT_QUEST:
if (tok != XML_TOK_NAME) {
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
@ -1270,6 +1348,9 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
case BT_AST:
case BT_PERCNT:
case BT_NUM:
#ifdef XMLNS
case BT_COLON:
#endif
break;
case BT_S:
if (CHAR_MATCHES(enc, ptr, '\t')) {
@ -1494,6 +1575,9 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
break;
case BT_NONASCII:
case BT_NMSTRT:
#ifdef XMLNS
case BT_COLON:
#endif
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
@ -1524,6 +1608,9 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
case BT_LEAD4:
case BT_NONASCII:
case BT_NMSTRT:
#ifdef XMLNS
case BT_COLON:
#endif
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
@ -1550,6 +1637,9 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *
case BT_LEAD4:
case BT_NONASCII:
case BT_NMSTRT:
#ifdef XMLNS
case BT_COLON:
#endif
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
@ -1572,6 +1662,9 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
#undef LEAD_CASE
case BT_NONASCII:
case BT_NMSTRT:
#ifdef XMLNS
case BT_COLON:
#endif
case BT_HEX:
case BT_DIGIT:
case BT_NAME:

View file

@ -42,6 +42,7 @@ enum {
BT_LSQB,
BT_S,
BT_NMSTRT,
BT_COLON,
BT_HEX,
BT_DIGIT,
BT_NAME,

View file

@ -509,6 +509,9 @@ int tmain(int argc, XML_Char **argv)
int processExternalEntities = 0;
int windowsCodePages = 0;
int outputType = 0;
#ifdef XMLNS
int enforceNamespaceSyntax = 0;
#endif
#ifdef _MSC_VER
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
@ -526,6 +529,12 @@ int tmain(int argc, XML_Char **argv)
useFilemap = 0;
j++;
}
#ifdef XMLNS
if (argv[i][j] == T('n')) {
enforceNamespaceSyntax = 1;
j++;
}
#endif
if (argv[i][j] == T('x')) {
processExternalEntities = 1;
j++;
@ -573,7 +582,13 @@ int tmain(int argc, XML_Char **argv)
FILE *fp = 0;
XML_Char *outName = 0;
int result;
#ifdef XMLNS
XML_Parser parser = (enforceNamespaceSyntax
? XML_ParserCreateNS
: XML_ParserCreate)(encoding);
#else
XML_Parser parser = XML_ParserCreate(encoding);
#endif
if (outputDir) {
const XML_Char *file = argv[i];
if (tcsrchr(file, T('/')))