mirror of
https://github.com/libexpat/libexpat.git
synced 2025-04-05 13:14:59 +00:00
Lexical support for namespaces
This commit is contained in:
parent
dac8f8295d
commit
9f52171901
10 changed files with 530 additions and 38 deletions
|
@ -28,6 +28,7 @@ Contributor(s):
|
|||
#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
|
||||
#define XmlConvert XmlUtf16Convert
|
||||
#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
|
||||
#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
|
||||
#define XmlEncode XmlUtf16Encode
|
||||
#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
|
||||
typedef unsigned short ICHAR;
|
||||
|
@ -35,11 +36,23 @@ typedef unsigned short ICHAR;
|
|||
#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
|
||||
#define XmlConvert XmlUtf8Convert
|
||||
#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
|
||||
#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
|
||||
#define XmlEncode XmlUtf8Encode
|
||||
#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
|
||||
typedef char ICHAR;
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef XMLNS
|
||||
|
||||
#define XmlInitEncodingNS XmlInitEncoding
|
||||
#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
|
||||
#undef XmlGetInternalEncodingNS
|
||||
#define XmlGetInternalEncodingNS XmlGetInternalEncoding
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XML_UNICODE_WCHAR_T
|
||||
#define XML_T(x) L ## x
|
||||
#else
|
||||
|
@ -225,6 +238,9 @@ typedef struct {
|
|||
const ENCODING *encoding;
|
||||
INIT_ENCODING initEncoding;
|
||||
const XML_Char *protocolEncodingName;
|
||||
#ifdef XMLNS
|
||||
int ns;
|
||||
#endif
|
||||
void *unknownEncodingMem;
|
||||
void *unknownEncodingData;
|
||||
void *unknownEncodingHandlerData;
|
||||
|
@ -274,6 +290,11 @@ typedef struct {
|
|||
(((Parser *)parser)->unknownEncodingHandlerData)
|
||||
#define unknownEncodingRelease (((Parser *)parser)->unknownEncodingRelease)
|
||||
#define protocolEncodingName (((Parser *)parser)->protocolEncodingName)
|
||||
#ifdef XMLNS
|
||||
#define ns (((Parser *)parser)->ns)
|
||||
#else
|
||||
#define ns (0)
|
||||
#endif
|
||||
#define prologState (((Parser *)parser)->prologState)
|
||||
#define processor (((Parser *)parser)->processor)
|
||||
#define errorCode (((Parser *)parser)->errorCode)
|
||||
|
@ -354,6 +375,9 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName)
|
|||
unknownEncodingRelease = 0;
|
||||
unknownEncodingData = 0;
|
||||
unknownEncodingHandlerData = 0;
|
||||
#ifdef XMLNS
|
||||
ns = 0;
|
||||
#endif
|
||||
poolInit(&tempPool);
|
||||
poolInit(&temp2Pool);
|
||||
protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
|
||||
|
@ -367,6 +391,20 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName)
|
|||
return parser;
|
||||
}
|
||||
|
||||
#ifdef XMLNS
|
||||
|
||||
XML_Parser XML_ParserCreateNS(const XML_Char *encodingName)
|
||||
{
|
||||
XML_Parser parser = XML_ParserCreate(encodingName);
|
||||
if (parser) {
|
||||
XmlInitEncodingNS(&initEncoding, &encoding, 0);
|
||||
ns = 1;
|
||||
}
|
||||
return parser;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
|
||||
const XML_Char *openEntityNames,
|
||||
const XML_Char *encodingName)
|
||||
|
@ -383,7 +421,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
|
|||
void *oldUserData = userData;
|
||||
void *oldHandlerArg = handlerArg;
|
||||
|
||||
parser = XML_ParserCreate(encodingName);
|
||||
parser = (ns ? XML_ParserCreateNS : XML_ParserCreate)(encodingName);
|
||||
if (!parser)
|
||||
return 0;
|
||||
startElementHandler = oldStartElementHandler;
|
||||
|
@ -798,7 +836,7 @@ doContent(XML_Parser parser,
|
|||
const char *end,
|
||||
const char **nextPtr)
|
||||
{
|
||||
const ENCODING *internalEnc = XmlGetInternalEncoding();
|
||||
const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
|
||||
const char *dummy;
|
||||
const char **eventPP;
|
||||
const char **eventEndPP;
|
||||
|
@ -1395,7 +1433,7 @@ initializeEncoding(XML_Parser parser)
|
|||
#else
|
||||
s = protocolEncodingName;
|
||||
#endif
|
||||
if (XmlInitEncoding(&initEncoding, &encoding, s))
|
||||
if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
|
||||
return XML_ERROR_NONE;
|
||||
return handleUnknownEncoding(parser, protocolEncodingName);
|
||||
}
|
||||
|
@ -1408,15 +1446,17 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
|
|||
const ENCODING *newEncoding = 0;
|
||||
const char *version;
|
||||
int standalone = -1;
|
||||
if (!XmlParseXmlDecl(isGeneralTextEntity,
|
||||
encoding,
|
||||
s,
|
||||
next,
|
||||
&eventPtr,
|
||||
&version,
|
||||
&encodingName,
|
||||
&newEncoding,
|
||||
&standalone))
|
||||
if (!(ns
|
||||
? XmlParseXmlDeclNS
|
||||
: XmlParseXmlDecl)(isGeneralTextEntity,
|
||||
encoding,
|
||||
s,
|
||||
next,
|
||||
&eventPtr,
|
||||
&version,
|
||||
&encodingName,
|
||||
&newEncoding,
|
||||
&standalone))
|
||||
return XML_ERROR_SYNTAX;
|
||||
if (defaultHandler)
|
||||
reportDefault(parser, encoding, s, next);
|
||||
|
@ -1468,10 +1508,12 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
|
|||
info.release(info.data);
|
||||
return XML_ERROR_NO_MEMORY;
|
||||
}
|
||||
enc = XmlInitUnknownEncoding(unknownEncodingMem,
|
||||
info.map,
|
||||
info.convert,
|
||||
info.data);
|
||||
enc = (ns
|
||||
? XmlInitUnknownEncodingNS
|
||||
: XmlInitUnknownEncoding)(unknownEncodingMem,
|
||||
info.map,
|
||||
info.convert,
|
||||
info.data);
|
||||
if (enc) {
|
||||
unknownEncodingData = info.data;
|
||||
unknownEncodingRelease = info.release;
|
||||
|
@ -1876,7 +1918,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
|
|||
const char *ptr, const char *end,
|
||||
STRING_POOL *pool)
|
||||
{
|
||||
const ENCODING *internalEnc = XmlGetInternalEncoding();
|
||||
const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
|
||||
for (;;) {
|
||||
const char *next;
|
||||
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
|
||||
|
@ -1997,7 +2039,7 @@ enum XML_Error storeEntityValue(XML_Parser parser,
|
|||
const char *entityTextPtr,
|
||||
const char *entityTextEnd)
|
||||
{
|
||||
const ENCODING *internalEnc = XmlGetInternalEncoding();
|
||||
const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
|
||||
STRING_POOL *pool = &(dtd.pool);
|
||||
entityTextPtr += encoding->minBytesPerChar;
|
||||
entityTextEnd -= encoding->minBytesPerChar;
|
||||
|
|
|
@ -70,6 +70,9 @@ protocol or null if there is none specified. */
|
|||
XML_Parser XMLPARSEAPI
|
||||
XML_ParserCreate(const XML_Char *encoding);
|
||||
|
||||
XML_Parser XMLPARSEAPI
|
||||
XML_ParserCreateNS(const XML_Char *encoding);
|
||||
|
||||
|
||||
/* atts is array of name/value pairs, terminated by 0;
|
||||
names and values are 0 terminated. */
|
||||
|
|
|
@ -32,7 +32,7 @@ Contributor(s):
|
|||
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
|
||||
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
|
||||
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
|
||||
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI,
|
||||
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
|
||||
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
|
||||
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
|
||||
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
|
||||
|
|
|
@ -33,7 +33,7 @@ Contributor(s):
|
|||
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
|
||||
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
|
||||
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
|
||||
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI,
|
||||
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
|
||||
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
|
||||
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
|
||||
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
|
||||
|
|
|
@ -149,6 +149,7 @@ int doctype0(PROLOG_STATE *state,
|
|||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = doctype1;
|
||||
return XML_ROLE_DOCTYPE_NAME;
|
||||
}
|
||||
|
@ -610,6 +611,7 @@ int attlist0(PROLOG_STATE *state,
|
|||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = attlist1;
|
||||
return XML_ROLE_ATTLIST_ELEMENT_NAME;
|
||||
}
|
||||
|
@ -630,6 +632,7 @@ int attlist1(PROLOG_STATE *state,
|
|||
state->handler = internalSubset;
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = attlist2;
|
||||
return XML_ROLE_ATTRIBUTE_NAME;
|
||||
}
|
||||
|
@ -689,6 +692,7 @@ int attlist3(PROLOG_STATE *state,
|
|||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NMTOKEN:
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = attlist4;
|
||||
return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
|
||||
}
|
||||
|
@ -836,6 +840,7 @@ int element0(PROLOG_STATE *state,
|
|||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = element1;
|
||||
return XML_ROLE_ELEMENT_NAME;
|
||||
}
|
||||
|
@ -893,6 +898,7 @@ int element2(PROLOG_STATE *state,
|
|||
state->handler = element6;
|
||||
return XML_ROLE_GROUP_OPEN;
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = element7;
|
||||
return XML_ROLE_CONTENT_ELEMENT;
|
||||
case XML_TOK_NAME_QUESTION:
|
||||
|
@ -940,6 +946,7 @@ int element4(PROLOG_STATE *state,
|
|||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = element5;
|
||||
return XML_ROLE_CONTENT_ELEMENT;
|
||||
}
|
||||
|
@ -980,6 +987,7 @@ int element6(PROLOG_STATE *state,
|
|||
state->level += 1;
|
||||
return XML_ROLE_GROUP_OPEN;
|
||||
case XML_TOK_NAME:
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
state->handler = element7;
|
||||
return XML_ROLE_CONTENT_ELEMENT;
|
||||
case XML_TOK_NAME_QUESTION:
|
||||
|
|
|
@ -252,7 +252,8 @@ void utf8_toUtf16(const ENCODING *enc,
|
|||
*toP = to;
|
||||
}
|
||||
|
||||
static const struct normal_encoding utf8_encoding = {
|
||||
#ifdef XMLNS
|
||||
static const struct normal_encoding utf8_encoding_ns = {
|
||||
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
|
||||
{
|
||||
#include "asciitab.h"
|
||||
|
@ -260,11 +261,38 @@ static const struct normal_encoding utf8_encoding = {
|
|||
},
|
||||
NORMAL_VTABLE(utf8_)
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding utf8_encoding = {
|
||||
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "asciitab.h"
|
||||
#undef BT_COLON
|
||||
#include "utf8tab.h"
|
||||
},
|
||||
NORMAL_VTABLE(utf8_)
|
||||
};
|
||||
|
||||
#ifdef XMLNS
|
||||
|
||||
static const struct normal_encoding internal_utf8_encoding_ns = {
|
||||
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
|
||||
{
|
||||
#include "iasciitab.h"
|
||||
#include "utf8tab.h"
|
||||
},
|
||||
NORMAL_VTABLE(utf8_)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding internal_utf8_encoding = {
|
||||
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "iasciitab.h"
|
||||
#undef BT_COLON
|
||||
#include "utf8tab.h"
|
||||
},
|
||||
NORMAL_VTABLE(utf8_)
|
||||
|
@ -304,7 +332,9 @@ void latin1_toUtf16(const ENCODING *enc,
|
|||
*(*toP)++ = (unsigned char)*(*fromP)++;
|
||||
}
|
||||
|
||||
static const struct normal_encoding latin1_encoding = {
|
||||
#ifdef XMLNS
|
||||
|
||||
static const struct normal_encoding latin1_encoding_ns = {
|
||||
{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
|
||||
{
|
||||
#include "asciitab.h"
|
||||
|
@ -312,6 +342,18 @@ static const struct normal_encoding latin1_encoding = {
|
|||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding latin1_encoding = {
|
||||
{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "asciitab.h"
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
static
|
||||
void ascii_toUtf8(const ENCODING *enc,
|
||||
const char **fromP, const char *fromLim,
|
||||
|
@ -321,7 +363,9 @@ void ascii_toUtf8(const ENCODING *enc,
|
|||
*(*toP)++ = *(*fromP)++;
|
||||
}
|
||||
|
||||
static const struct normal_encoding ascii_encoding = {
|
||||
#ifdef XMLNS
|
||||
|
||||
static const struct normal_encoding ascii_encoding_ns = {
|
||||
{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
|
||||
{
|
||||
#include "asciitab.h"
|
||||
|
@ -329,6 +373,18 @@ static const struct normal_encoding ascii_encoding = {
|
|||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding ascii_encoding = {
|
||||
{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "asciitab.h"
|
||||
#undef BT_COLON
|
||||
/* BT_NONXML == 0 */
|
||||
}
|
||||
};
|
||||
|
||||
#undef PREFIX
|
||||
|
||||
static int unicode_byte_type(char hi, char lo)
|
||||
|
@ -464,6 +520,24 @@ DEFINE_UTF16_TO_UTF16
|
|||
#undef IS_NMSTRT_CHAR_MINBPC
|
||||
#undef IS_INVALID_CHAR
|
||||
|
||||
#ifdef XMLNS
|
||||
|
||||
static const struct normal_encoding little2_encoding_ns = {
|
||||
{ VTABLE, 2, 0,
|
||||
#if BYTE_ORDER == 12
|
||||
1
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
},
|
||||
{
|
||||
#include "asciitab.h"
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding little2_encoding = {
|
||||
{ VTABLE, 2, 0,
|
||||
#if BYTE_ORDER == 12
|
||||
|
@ -472,16 +546,36 @@ static const struct normal_encoding little2_encoding = {
|
|||
0
|
||||
#endif
|
||||
},
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "asciitab.h"
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#if BYTE_ORDER != 21
|
||||
|
||||
static const struct normal_encoding internal_little2_encoding = {
|
||||
#ifdef XMLNS
|
||||
|
||||
static const struct normal_encoding internal_little2_encoding_ns = {
|
||||
{ VTABLE, 2, 0, 1 },
|
||||
{
|
||||
#include "iasciitab.h"
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding internal_little2_encoding = {
|
||||
{ VTABLE, 2, 0, 1 },
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "iasciitab.h"
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -527,6 +621,24 @@ DEFINE_UTF16_TO_UTF16
|
|||
#undef IS_NMSTRT_CHAR_MINBPC
|
||||
#undef IS_INVALID_CHAR
|
||||
|
||||
#ifdef XMLNS
|
||||
|
||||
static const struct normal_encoding big2_encoding_ns = {
|
||||
{ VTABLE, 2, 0,
|
||||
#if BYTE_ORDER == 21
|
||||
1
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
},
|
||||
{
|
||||
#include "asciitab.h"
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding big2_encoding = {
|
||||
{ VTABLE, 2, 0,
|
||||
#if BYTE_ORDER == 21
|
||||
|
@ -535,16 +647,36 @@ static const struct normal_encoding big2_encoding = {
|
|||
0
|
||||
#endif
|
||||
},
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "asciitab.h"
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#if BYTE_ORDER != 12
|
||||
|
||||
static const struct normal_encoding internal_big2_encoding = {
|
||||
#ifdef XMLNS
|
||||
|
||||
static const struct normal_encoding internal_big2_encoding_ns = {
|
||||
{ VTABLE, 2, 0, 1 },
|
||||
{
|
||||
#include "iasciitab.h"
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static const struct normal_encoding internal_big2_encoding = {
|
||||
{ VTABLE, 2, 0, 1 },
|
||||
{
|
||||
#define BT_COLON BT_NMSTRT
|
||||
#include "iasciitab.h"
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -812,15 +944,19 @@ const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *e
|
|||
return 0;
|
||||
}
|
||||
|
||||
int XmlParseXmlDecl(int isGeneralTextEntity,
|
||||
const ENCODING *enc,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const char **badPtr,
|
||||
const char **versionPtr,
|
||||
const char **encodingName,
|
||||
const ENCODING **encoding,
|
||||
int *standalone)
|
||||
static
|
||||
int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
|
||||
const char *,
|
||||
const char *),
|
||||
int isGeneralTextEntity,
|
||||
const ENCODING *enc,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const char **badPtr,
|
||||
const char **versionPtr,
|
||||
const char **encodingName,
|
||||
const ENCODING **encoding,
|
||||
int *standalone)
|
||||
{
|
||||
const char *val = 0;
|
||||
const char *name = 0;
|
||||
|
@ -855,7 +991,7 @@ int XmlParseXmlDecl(int isGeneralTextEntity,
|
|||
if (encodingName)
|
||||
*encodingName = val;
|
||||
if (encoding)
|
||||
*encoding = findEncoding(enc, val, ptr - enc->minBytesPerChar);
|
||||
*encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
|
||||
if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
|
||||
*badPtr = ptr;
|
||||
return 0;
|
||||
|
@ -888,6 +1024,19 @@ int XmlParseXmlDecl(int isGeneralTextEntity,
|
|||
return 1;
|
||||
}
|
||||
|
||||
int XmlParseXmlDecl(int isGeneralTextEntity,
|
||||
const ENCODING *enc,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const char **badPtr,
|
||||
const char **versionPtr,
|
||||
const char **encodingName,
|
||||
const ENCODING **encoding,
|
||||
int *standalone)
|
||||
{
|
||||
return doParseXmlDecl(findEncoding, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone);
|
||||
}
|
||||
|
||||
static
|
||||
int checkCharRefNumber(int result)
|
||||
{
|
||||
|
@ -1133,3 +1282,163 @@ XmlInitUnknownEncoding(void *mem,
|
|||
e->normal.enc.utf16Convert = unknown_toUtf16;
|
||||
return &(e->normal.enc);
|
||||
}
|
||||
|
||||
#ifdef XMLNS
|
||||
|
||||
const ENCODING *XmlGetUtf8InternalEncodingNS()
|
||||
{
|
||||
return &internal_utf8_encoding_ns.enc;
|
||||
}
|
||||
|
||||
const ENCODING *XmlGetUtf16InternalEncodingNS()
|
||||
{
|
||||
#if BYTE_ORDER == 12
|
||||
return &internal_little2_encoding_ns.enc;
|
||||
#elif BYTE_ORDER == 21
|
||||
return &internal_big2_encoding_ns.enc;
|
||||
#else
|
||||
const short n = 1;
|
||||
return *(const char *)&n ? &internal_little2_encoding_ns.enc : &internal_big2_encoding_ns.enc;
|
||||
#endif
|
||||
}
|
||||
|
||||
static
|
||||
int initScanNS(const ENCODING *enc, int state, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
const ENCODING **encPtr;
|
||||
|
||||
if (ptr == end)
|
||||
return XML_TOK_NONE;
|
||||
encPtr = ((const INIT_ENCODING *)enc)->encPtr;
|
||||
if (ptr + 1 == end) {
|
||||
switch ((unsigned char)*ptr) {
|
||||
case 0xFE:
|
||||
case 0xFF:
|
||||
case 0x00:
|
||||
case 0x3C:
|
||||
return XML_TOK_PARTIAL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
|
||||
case 0x003C:
|
||||
*encPtr = &big2_encoding_ns.enc;
|
||||
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
|
||||
case 0xFEFF:
|
||||
*nextTokPtr = ptr + 2;
|
||||
*encPtr = &big2_encoding_ns.enc;
|
||||
return XML_TOK_BOM;
|
||||
case 0x3C00:
|
||||
*encPtr = &little2_encoding_ns.enc;
|
||||
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
|
||||
case 0xFFFE:
|
||||
*nextTokPtr = ptr + 2;
|
||||
*encPtr = &little2_encoding_ns.enc;
|
||||
return XML_TOK_BOM;
|
||||
}
|
||||
}
|
||||
*encPtr = &utf8_encoding_ns.enc;
|
||||
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int initScanPrologNS(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
return initScanNS(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
|
||||
}
|
||||
|
||||
static
|
||||
int initScanContentNS(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
return initScanNS(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
|
||||
}
|
||||
|
||||
int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
|
||||
{
|
||||
if (name) {
|
||||
if (streqci(name, "ISO-8859-1")) {
|
||||
*encPtr = &latin1_encoding_ns.enc;
|
||||
return 1;
|
||||
}
|
||||
if (streqci(name, "UTF-8")) {
|
||||
*encPtr = &utf8_encoding_ns.enc;
|
||||
return 1;
|
||||
}
|
||||
if (streqci(name, "US-ASCII")) {
|
||||
*encPtr = &ascii_encoding_ns.enc;
|
||||
return 1;
|
||||
}
|
||||
if (!streqci(name, "UTF-16"))
|
||||
return 0;
|
||||
}
|
||||
p->initEnc.scanners[XML_PROLOG_STATE] = initScanPrologNS;
|
||||
p->initEnc.scanners[XML_CONTENT_STATE] = initScanContentNS;
|
||||
p->initEnc.updatePosition = initUpdatePosition;
|
||||
p->initEnc.minBytesPerChar = 1;
|
||||
p->encPtr = encPtr;
|
||||
*encPtr = &(p->initEnc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
const ENCODING *findEncodingNS(const ENCODING *enc, const char *ptr, const char *end)
|
||||
{
|
||||
#define ENCODING_MAX 128
|
||||
char buf[ENCODING_MAX];
|
||||
char *p = buf;
|
||||
int i;
|
||||
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
|
||||
if (ptr != end)
|
||||
return 0;
|
||||
*p = 0;
|
||||
for (i = 0; buf[i]; i++) {
|
||||
if ('a' <= buf[i] && buf[i] <= 'z')
|
||||
buf[i] += 'A' - 'a';
|
||||
}
|
||||
if (streqci(buf, "UTF-8"))
|
||||
return &utf8_encoding_ns.enc;
|
||||
if (streqci(buf, "ISO-8859-1"))
|
||||
return &latin1_encoding_ns.enc;
|
||||
if (streqci(buf, "US-ASCII"))
|
||||
return &ascii_encoding_ns.enc;
|
||||
if (streqci(buf, "UTF-16")) {
|
||||
static const unsigned short n = 1;
|
||||
if (enc->minBytesPerChar == 2)
|
||||
return enc;
|
||||
return &big2_encoding_ns.enc;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int XmlParseXmlDeclNS(int isGeneralTextEntity,
|
||||
const ENCODING *enc,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const char **badPtr,
|
||||
const char **versionPtr,
|
||||
const char **encodingName,
|
||||
const ENCODING **encoding,
|
||||
int *standalone)
|
||||
{
|
||||
return doParseXmlDecl(findEncodingNS, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone);
|
||||
}
|
||||
|
||||
ENCODING *
|
||||
XmlInitUnknownEncodingNS(void *mem,
|
||||
int *table,
|
||||
int (*convert)(void *userData, const char *p),
|
||||
void *userData)
|
||||
{
|
||||
ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
|
||||
if (enc)
|
||||
((struct normal_encoding *)enc)->type[':'] = BT_COLON;
|
||||
return enc;
|
||||
}
|
||||
|
||||
#endif /* XMLNS */
|
||||
|
|
|
@ -94,6 +94,10 @@ extern "C" {
|
|||
/* The following token is returned only by XmlCdataSectionTok */
|
||||
#define XML_TOK_CDATA_SECT_CLOSE 40
|
||||
|
||||
/* With namespace processing this is returned by XmlPrologTok
|
||||
for a name with a colon. */
|
||||
#define XML_TOK_PREFIXED_NAME 41
|
||||
|
||||
#define XML_N_STATES 3
|
||||
#define XML_PROLOG_STATE 0
|
||||
#define XML_CONTENT_STATE 1
|
||||
|
@ -269,6 +273,23 @@ XmlInitUnknownEncoding(void *mem,
|
|||
int (*convert)(void *userData, const char *p),
|
||||
void *userData);
|
||||
|
||||
int XMLTOKAPI XmlParseXmlDeclNS(int isGeneralTextEntity,
|
||||
const ENCODING *enc,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const char **badPtr,
|
||||
const char **versionPtr,
|
||||
const char **encodingNamePtr,
|
||||
const ENCODING **namedEncodingPtr,
|
||||
int *standalonePtr);
|
||||
int XMLTOKAPI XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
|
||||
const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS();
|
||||
const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS();
|
||||
ENCODING XMLTOKAPI *
|
||||
XmlInitUnknownEncodingNS(void *mem,
|
||||
int *table,
|
||||
int (*convert)(void *userData, const char *p),
|
||||
void *userData);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -421,6 +421,12 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
}
|
||||
}
|
||||
return XML_TOK_PARTIAL;
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
/* no need to check qname syntax here, since end-tag must match exactly */
|
||||
ptr += MINBPC;
|
||||
break;
|
||||
#endif
|
||||
case BT_GT:
|
||||
*nextTokPtr = ptr + MINBPC;
|
||||
return XML_TOK_END_TAG;
|
||||
|
@ -532,9 +538,30 @@ static
|
|||
int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
#ifdef XMLNS
|
||||
int hadColon = 0;
|
||||
#endif
|
||||
while (ptr != end) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
if (hadColon) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
hadColon = 1;
|
||||
ptr += MINBPC;
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
for (;;) {
|
||||
int t;
|
||||
|
@ -559,6 +586,9 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
case BT_EQUALS:
|
||||
{
|
||||
int open;
|
||||
#ifdef XMLNS
|
||||
hadColon = 0;
|
||||
#endif
|
||||
for (;;) {
|
||||
|
||||
ptr += MINBPC;
|
||||
|
@ -668,6 +698,9 @@ static
|
|||
int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
#ifdef XMLNS
|
||||
int hadColon;
|
||||
#endif
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
|
@ -691,10 +724,31 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
#ifdef XMLNS
|
||||
hadColon = 0;
|
||||
#endif
|
||||
/* we have a start-tag */
|
||||
while (ptr != end) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
if (hadColon) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
hadColon = 1;
|
||||
ptr += MINBPC;
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
{
|
||||
ptr += MINBPC;
|
||||
|
@ -1078,6 +1132,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
case BT_DIGIT:
|
||||
case BT_NAME:
|
||||
case BT_MINUS:
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
#endif
|
||||
tok = XML_TOK_NMTOKEN;
|
||||
ptr += MINBPC;
|
||||
break;
|
||||
|
@ -1105,22 +1162,43 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
case BT_S: case BT_CR: case BT_LF:
|
||||
*nextTokPtr = ptr;
|
||||
return tok;
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
ptr += MINBPC;
|
||||
switch (tok) {
|
||||
case XML_TOK_NAME:
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
tok = XML_TOK_PREFIXED_NAME;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
tok = XML_TOK_NMTOKEN;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case XML_TOK_PREFIXED_NAME:
|
||||
tok = XML_TOK_NMTOKEN;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case BT_PLUS:
|
||||
if (tok != XML_TOK_NAME) {
|
||||
if (tok == XML_TOK_NMTOKEN) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
*nextTokPtr = ptr + MINBPC;
|
||||
return XML_TOK_NAME_PLUS;
|
||||
case BT_AST:
|
||||
if (tok != XML_TOK_NAME) {
|
||||
if (tok == XML_TOK_NMTOKEN) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
*nextTokPtr = ptr + MINBPC;
|
||||
return XML_TOK_NAME_ASTERISK;
|
||||
case BT_QUEST:
|
||||
if (tok != XML_TOK_NAME) {
|
||||
if (tok == XML_TOK_NMTOKEN) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
|
@ -1270,6 +1348,9 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
case BT_AST:
|
||||
case BT_PERCNT:
|
||||
case BT_NUM:
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
#endif
|
||||
break;
|
||||
case BT_S:
|
||||
if (CHAR_MATCHES(enc, ptr, '\t')) {
|
||||
|
@ -1494,6 +1575,9 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
|||
break;
|
||||
case BT_NONASCII:
|
||||
case BT_NMSTRT:
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
#endif
|
||||
case BT_HEX:
|
||||
case BT_DIGIT:
|
||||
case BT_NAME:
|
||||
|
@ -1524,6 +1608,9 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
|||
case BT_LEAD4:
|
||||
case BT_NONASCII:
|
||||
case BT_NMSTRT:
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
#endif
|
||||
case BT_HEX:
|
||||
case BT_DIGIT:
|
||||
case BT_NAME:
|
||||
|
@ -1550,6 +1637,9 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *
|
|||
case BT_LEAD4:
|
||||
case BT_NONASCII:
|
||||
case BT_NMSTRT:
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
#endif
|
||||
case BT_HEX:
|
||||
case BT_DIGIT:
|
||||
case BT_NAME:
|
||||
|
@ -1572,6 +1662,9 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
|
|||
#undef LEAD_CASE
|
||||
case BT_NONASCII:
|
||||
case BT_NMSTRT:
|
||||
#ifdef XMLNS
|
||||
case BT_COLON:
|
||||
#endif
|
||||
case BT_HEX:
|
||||
case BT_DIGIT:
|
||||
case BT_NAME:
|
||||
|
|
|
@ -42,6 +42,7 @@ enum {
|
|||
BT_LSQB,
|
||||
BT_S,
|
||||
BT_NMSTRT,
|
||||
BT_COLON,
|
||||
BT_HEX,
|
||||
BT_DIGIT,
|
||||
BT_NAME,
|
||||
|
|
|
@ -509,6 +509,9 @@ int tmain(int argc, XML_Char **argv)
|
|||
int processExternalEntities = 0;
|
||||
int windowsCodePages = 0;
|
||||
int outputType = 0;
|
||||
#ifdef XMLNS
|
||||
int enforceNamespaceSyntax = 0;
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
|
||||
|
@ -526,6 +529,12 @@ int tmain(int argc, XML_Char **argv)
|
|||
useFilemap = 0;
|
||||
j++;
|
||||
}
|
||||
#ifdef XMLNS
|
||||
if (argv[i][j] == T('n')) {
|
||||
enforceNamespaceSyntax = 1;
|
||||
j++;
|
||||
}
|
||||
#endif
|
||||
if (argv[i][j] == T('x')) {
|
||||
processExternalEntities = 1;
|
||||
j++;
|
||||
|
@ -573,7 +582,13 @@ int tmain(int argc, XML_Char **argv)
|
|||
FILE *fp = 0;
|
||||
XML_Char *outName = 0;
|
||||
int result;
|
||||
#ifdef XMLNS
|
||||
XML_Parser parser = (enforceNamespaceSyntax
|
||||
? XML_ParserCreateNS
|
||||
: XML_ParserCreate)(encoding);
|
||||
#else
|
||||
XML_Parser parser = XML_ParserCreate(encoding);
|
||||
#endif
|
||||
if (outputDir) {
|
||||
const XML_Char *file = argv[i];
|
||||
if (tcsrchr(file, T('/')))
|
||||
|
|
Loading…
Add table
Reference in a new issue