mirror of
https://github.com/libexpat/libexpat.git
synced 2025-04-05 13:14:59 +00:00
New token XML_TOK_XML_DECL
This commit is contained in:
parent
db6a73f853
commit
b2eeefe571
4 changed files with 305 additions and 177 deletions
|
@ -38,10 +38,14 @@ int prolog0(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
state->handler = prolog1;
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_XML_DECL:
|
||||
state->handler = prolog1;
|
||||
return XML_ROLE_XML_DECL;
|
||||
case XML_TOK_PI:
|
||||
state->handler = prolog1;
|
||||
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
|
||||
return XML_ROLE_XML_DECL;
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_COMMENT:
|
||||
state->handler = prolog1;
|
||||
|
@ -69,9 +73,9 @@ int prolog1(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_PI:
|
||||
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
|
||||
return syntaxError(state);
|
||||
case XML_TOK_COMMENT:
|
||||
case XML_TOK_BOM:
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -97,9 +101,9 @@ int prolog2(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_PI:
|
||||
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
|
||||
return syntaxError(state);
|
||||
case XML_TOK_COMMENT:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_INSTANCE_START:
|
||||
|
@ -117,6 +121,8 @@ int doctype0(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = doctype1;
|
||||
return XML_ROLE_DOCTYPE_NAME;
|
||||
|
@ -132,6 +138,8 @@ int doctype1(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_OPEN_BRACKET:
|
||||
state->handler = internalSubset;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -160,6 +168,8 @@ int doctype2(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = doctype3;
|
||||
return XML_ROLE_DOCTYPE_PUBLIC_ID;
|
||||
|
@ -175,6 +185,8 @@ int doctype3(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = doctype4;
|
||||
return XML_ROLE_DOCTYPE_SYSTEM_ID;
|
||||
|
@ -190,6 +202,8 @@ int doctype4(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_OPEN_BRACKET:
|
||||
state->handler = internalSubset;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -208,6 +222,8 @@ int doctype5(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_DECL_CLOSE:
|
||||
state->handler = prolog2;
|
||||
return XML_ROLE_DOCTYPE_CLOSE;
|
||||
|
@ -223,6 +239,8 @@ int internalSubset(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_DECL_OPEN:
|
||||
if (XmlNameMatchesAscii(enc,
|
||||
ptr + 2 * enc->minBytesPerChar,
|
||||
|
@ -250,8 +268,6 @@ int internalSubset(PROLOG_STATE *state,
|
|||
}
|
||||
break;
|
||||
case XML_TOK_PI:
|
||||
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
|
||||
return syntaxError(state);
|
||||
case XML_TOK_COMMENT:
|
||||
case XML_TOK_PARAM_ENTITY_REF:
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -270,6 +286,8 @@ int entity0(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_PERCENT:
|
||||
state->handler = entity1;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -288,6 +306,8 @@ int entity1(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = entity7;
|
||||
return XML_ROLE_PARAM_ENTITY_NAME;
|
||||
|
@ -303,6 +323,8 @@ int entity2(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
|
||||
state->handler = entity4;
|
||||
|
@ -328,6 +350,8 @@ int entity3(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = entity4;
|
||||
return XML_ROLE_ENTITY_PUBLIC_ID;
|
||||
|
@ -344,6 +368,8 @@ int entity4(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = entity5;
|
||||
return XML_ROLE_ENTITY_SYSTEM_ID;
|
||||
|
@ -359,6 +385,8 @@ int entity5(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_DECL_CLOSE:
|
||||
state->handler = internalSubset;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -380,6 +408,8 @@ int entity6(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = declClose;
|
||||
return XML_ROLE_ENTITY_NOTATION_NAME;
|
||||
|
@ -395,6 +425,8 @@ int entity7(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
|
||||
state->handler = entity9;
|
||||
|
@ -420,6 +452,8 @@ int entity8(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = entity9;
|
||||
return XML_ROLE_ENTITY_PUBLIC_ID;
|
||||
|
@ -435,6 +469,8 @@ int entity9(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = declClose;
|
||||
return XML_ROLE_ENTITY_SYSTEM_ID;
|
||||
|
@ -450,6 +486,8 @@ int notation0(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = notation1;
|
||||
return XML_ROLE_NOTATION_NAME;
|
||||
|
@ -465,6 +503,8 @@ int notation1(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
|
||||
state->handler = notation3;
|
||||
|
@ -487,6 +527,8 @@ int notation2(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = notation4;
|
||||
return XML_ROLE_NOTATION_PUBLIC_ID;
|
||||
|
@ -502,6 +544,8 @@ int notation3(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = declClose;
|
||||
return XML_ROLE_NOTATION_SYSTEM_ID;
|
||||
|
@ -517,6 +561,8 @@ int notation4(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = declClose;
|
||||
return XML_ROLE_NOTATION_SYSTEM_ID;
|
||||
|
@ -535,6 +581,8 @@ int attlist0(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = attlist1;
|
||||
return XML_ROLE_ATTLIST_ELEMENT_NAME;
|
||||
|
@ -550,6 +598,8 @@ int attlist1(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_DECL_CLOSE:
|
||||
state->handler = internalSubset;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -568,6 +618,8 @@ int attlist2(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
{
|
||||
static const char *types[] = {
|
||||
|
@ -607,6 +659,8 @@ int attlist3(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NMTOKEN:
|
||||
case XML_TOK_NAME:
|
||||
state->handler = attlist4;
|
||||
|
@ -623,6 +677,8 @@ int attlist4(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_CLOSE_PAREN:
|
||||
state->handler = attlist8;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -641,6 +697,8 @@ int attlist5(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_OPEN_PAREN:
|
||||
state->handler = attlist6;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -657,6 +715,8 @@ int attlist6(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = attlist7;
|
||||
return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
|
||||
|
@ -672,6 +732,8 @@ int attlist7(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_CLOSE_PAREN:
|
||||
state->handler = attlist8;
|
||||
return XML_ROLE_NONE;
|
||||
|
@ -691,6 +753,8 @@ int attlist8(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_POUND_NAME:
|
||||
if (XmlNameMatchesAscii(enc,
|
||||
ptr + enc->minBytesPerChar,
|
||||
|
@ -726,6 +790,8 @@ int attlist9(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_LITERAL:
|
||||
state->handler = attlist1;
|
||||
return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
|
||||
|
@ -741,6 +807,8 @@ int element0(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = element1;
|
||||
return XML_ROLE_ELEMENT_NAME;
|
||||
|
@ -756,6 +824,8 @@ int element1(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
if (XmlNameMatchesAscii(enc, ptr, "EMPTY")) {
|
||||
state->handler = declClose;
|
||||
|
@ -782,6 +852,8 @@ int element2(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_POUND_NAME:
|
||||
if (XmlNameMatchesAscii(enc,
|
||||
ptr + enc->minBytesPerChar,
|
||||
|
@ -818,6 +890,8 @@ int element3(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_CLOSE_PAREN:
|
||||
case XML_TOK_CLOSE_PAREN_ASTERISK:
|
||||
state->handler = declClose;
|
||||
|
@ -837,6 +911,8 @@ int element4(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_NAME:
|
||||
state->handler = element5;
|
||||
return XML_ROLE_CONTENT_ELEMENT;
|
||||
|
@ -852,6 +928,8 @@ int element5(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_CLOSE_PAREN_ASTERISK:
|
||||
state->handler = declClose;
|
||||
return XML_ROLE_GROUP_CLOSE_REP;
|
||||
|
@ -870,6 +948,8 @@ int element6(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_OPEN_PAREN:
|
||||
state->level += 1;
|
||||
return XML_ROLE_GROUP_OPEN;
|
||||
|
@ -897,6 +977,8 @@ int element7(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_CLOSE_PAREN:
|
||||
state->level -= 1;
|
||||
if (state->level == 0)
|
||||
|
@ -935,6 +1017,8 @@ int declClose(PROLOG_STATE *state,
|
|||
const ENCODING *enc)
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
return XML_ROLE_NONE;
|
||||
case XML_TOK_DECL_CLOSE:
|
||||
state->handler = internalSubset;
|
||||
return XML_ROLE_NONE;
|
||||
|
|
|
@ -33,11 +33,11 @@ extern "C" {
|
|||
|
||||
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
|
||||
#define XML_TOK_PI 11 /* processing instruction */
|
||||
#define XML_TOK_COMMENT 12
|
||||
#define XML_TOK_BOM 13 /* Byte order mark */
|
||||
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
|
||||
#define XML_TOK_COMMENT 13
|
||||
#define XML_TOK_BOM 14 /* Byte order mark */
|
||||
|
||||
/* The following tokens are returned only by XmlPrologTok */
|
||||
#define XML_TOK_INSTANCE_START 14
|
||||
#define XML_TOK_PROLOG_S 15
|
||||
#define XML_TOK_DECL_OPEN 16 /* <!foo */
|
||||
#define XML_TOK_DECL_CLOSE 17 /* > */
|
||||
|
@ -52,9 +52,9 @@ extern "C" {
|
|||
#define XML_TOK_CLOSE_BRACKET 26
|
||||
#define XML_TOK_LITERAL 27
|
||||
#define XML_TOK_PARAM_ENTITY_REF 28
|
||||
#define XML_TOK_INSTANCE_START 29
|
||||
|
||||
/* The following occur only in element type declarations */
|
||||
#define XML_TOK_COMMA 29
|
||||
#define XML_TOK_NAME_QUESTION 30 /* name? */
|
||||
#define XML_TOK_NAME_ASTERISK 31 /* name* */
|
||||
#define XML_TOK_NAME_PLUS 32 /* name+ */
|
||||
|
@ -63,7 +63,7 @@ extern "C" {
|
|||
#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
|
||||
#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
|
||||
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
|
||||
|
||||
#define XML_TOK_COMMA 38
|
||||
|
||||
#define XML_N_STATES 2
|
||||
#define XML_PROLOG_STATE 0
|
||||
|
|
|
@ -162,12 +162,55 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
return XML_TOK_PARTIAL;
|
||||
}
|
||||
|
||||
static
|
||||
int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
|
||||
{
|
||||
int upper = 0;
|
||||
if (end - ptr != MINBPC*3)
|
||||
return 1;
|
||||
switch (BYTE_TO_ASCII(enc, ptr)) {
|
||||
case 'x':
|
||||
break;
|
||||
case 'X':
|
||||
upper = 1;
|
||||
break;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
ptr += MINBPC;
|
||||
switch (BYTE_TO_ASCII(enc, ptr)) {
|
||||
case 'm':
|
||||
break;
|
||||
case 'M':
|
||||
upper = 1;
|
||||
break;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
ptr += MINBPC;
|
||||
switch (BYTE_TO_ASCII(enc, ptr)) {
|
||||
case 'l':
|
||||
break;
|
||||
case 'L':
|
||||
upper = 1;
|
||||
break;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
if (upper)
|
||||
return 0;
|
||||
*tokPtr = XML_TOK_XML_DECL;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ptr points to character following "<?" */
|
||||
|
||||
static
|
||||
int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
int tok;
|
||||
const char *target = ptr;
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
|
@ -180,6 +223,10 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
ptr += MINBPC;
|
||||
while (ptr != end) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
|
@ -191,7 +238,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
return XML_TOK_PARTIAL;
|
||||
if (CHAR_MATCHES(enc, ptr, '>')) {
|
||||
*nextTokPtr = ptr + MINBPC;
|
||||
return XML_TOK_PI;
|
||||
return tok;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -201,12 +248,16 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
|
|||
}
|
||||
return XML_TOK_PARTIAL;
|
||||
case BT_QUEST:
|
||||
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
ptr += MINBPC;
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
if (CHAR_MATCHES(enc, ptr, '>')) {
|
||||
*nextTokPtr = ptr + MINBPC;
|
||||
return XML_TOK_PI;
|
||||
return tok;
|
||||
}
|
||||
/* fall through */
|
||||
default:
|
||||
|
|
|
@ -293,12 +293,9 @@ checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
|
|||
return badCharRef;
|
||||
}
|
||||
break;
|
||||
case XML_TOK_PI:
|
||||
if (XmlNameMatchesAscii(enc, s + 2 * enc->minBytesPerChar, "xml")) {
|
||||
*badPtr = s;
|
||||
return misplacedXmlPi;
|
||||
}
|
||||
break;
|
||||
case XML_TOK_XML_DECL:
|
||||
*badPtr = s;
|
||||
return misplacedXmlPi;
|
||||
}
|
||||
s = next;
|
||||
if (level == 0) {
|
||||
|
@ -411,166 +408,163 @@ checkProlog(DTD *dtd, const char *s, const char *end,
|
|||
for (;;) {
|
||||
const char *next;
|
||||
int tok = XmlPrologTok(*enc, s, end, &next);
|
||||
if (tok != XML_TOK_PROLOG_S) {
|
||||
switch (XmlTokenRole(&state, tok, s, next, *enc)) {
|
||||
case XML_ROLE_XML_DECL:
|
||||
{
|
||||
const char *encodingName = 0;
|
||||
const ENCODING *encoding = 0;
|
||||
const char *version;
|
||||
int standalone = -1;
|
||||
if (!XmlParseXmlDecl(0,
|
||||
*enc,
|
||||
s,
|
||||
next,
|
||||
nextPtr,
|
||||
&version,
|
||||
&encodingName,
|
||||
&encoding,
|
||||
&standalone))
|
||||
return syntaxError;
|
||||
if (encoding) {
|
||||
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
|
||||
*nextPtr = encodingName;
|
||||
return incorrectEncoding;
|
||||
}
|
||||
*enc = encoding;
|
||||
}
|
||||
else if (encodingName) {
|
||||
switch (XmlTokenRole(&state, tok, s, next, *enc)) {
|
||||
case XML_ROLE_XML_DECL:
|
||||
{
|
||||
const char *encodingName = 0;
|
||||
const ENCODING *encoding = 0;
|
||||
const char *version;
|
||||
int standalone = -1;
|
||||
if (!XmlParseXmlDecl(0,
|
||||
*enc,
|
||||
s,
|
||||
next,
|
||||
nextPtr,
|
||||
&version,
|
||||
&encodingName,
|
||||
&encoding,
|
||||
&standalone))
|
||||
return syntaxError;
|
||||
if (encoding) {
|
||||
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
|
||||
*nextPtr = encodingName;
|
||||
return unknownEncoding;
|
||||
return incorrectEncoding;
|
||||
}
|
||||
if (standalone == 1)
|
||||
dtd->standalone = 1;
|
||||
break;
|
||||
*enc = encoding;
|
||||
}
|
||||
case XML_ROLE_DOCTYPE_SYSTEM_ID:
|
||||
dtd->containsRef = 1;
|
||||
break;
|
||||
case XML_ROLE_DOCTYPE_PUBLIC_ID:
|
||||
case XML_ROLE_ENTITY_PUBLIC_ID:
|
||||
case XML_ROLE_NOTATION_PUBLIC_ID:
|
||||
if (!XmlIsPublicId(*enc, s, next, nextPtr))
|
||||
return syntaxError;
|
||||
break;
|
||||
case XML_ROLE_INSTANCE_START:
|
||||
*nextPtr = s;
|
||||
return wellFormed;
|
||||
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
|
||||
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
|
||||
{
|
||||
const char *tem = 0;
|
||||
enum WfCheckResult result
|
||||
= checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
|
||||
next - (*enc)->minBytesPerChar,
|
||||
&tem);
|
||||
if (result) {
|
||||
if (tem)
|
||||
*nextPtr = tem;
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
else if (encodingName) {
|
||||
*nextPtr = encodingName;
|
||||
return unknownEncoding;
|
||||
}
|
||||
case XML_ROLE_ENTITY_VALUE:
|
||||
{
|
||||
enum WfCheckResult result
|
||||
= storeEntity(dtd,
|
||||
*enc,
|
||||
entityNamePtr,
|
||||
entityNameEnd,
|
||||
s,
|
||||
next,
|
||||
nextPtr);
|
||||
if (result != wellFormed)
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
case XML_ROLE_ENTITY_SYSTEM_ID:
|
||||
if (entityNamePtr) {
|
||||
const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
|
||||
entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY));
|
||||
if (entity->name != name) {
|
||||
poolDiscard(&dtd->pool);
|
||||
entity = 0;
|
||||
}
|
||||
else {
|
||||
poolFinish(&dtd->pool);
|
||||
entity->systemId = poolStoreString(&dtd->pool, *enc,
|
||||
s + (*enc)->minBytesPerChar,
|
||||
next - (*enc)->minBytesPerChar);
|
||||
poolFinish(&dtd->pool);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case XML_ROLE_ENTITY_NOTATION_NAME:
|
||||
if (entity) {
|
||||
entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
|
||||
poolFinish(&dtd->pool);
|
||||
}
|
||||
break;
|
||||
case XML_ROLE_GENERAL_ENTITY_NAME:
|
||||
entityNamePtr = s;
|
||||
entityNameEnd = next;
|
||||
break;
|
||||
case XML_ROLE_PARAM_ENTITY_NAME:
|
||||
entityNamePtr = 0;
|
||||
entityNameEnd = 0;
|
||||
break;
|
||||
case XML_ROLE_ERROR:
|
||||
*nextPtr = s;
|
||||
switch (tok) {
|
||||
case XML_TOK_COND_SECT_OPEN:
|
||||
return condSect;
|
||||
case XML_TOK_PARAM_ENTITY_REF:
|
||||
return paramEntityRef;
|
||||
case XML_TOK_INVALID:
|
||||
*nextPtr = next;
|
||||
return invalidToken;
|
||||
case XML_TOK_NONE:
|
||||
return noElements;
|
||||
case XML_TOK_PARTIAL:
|
||||
return unclosedToken;
|
||||
case XML_TOK_PARTIAL_CHAR:
|
||||
return partialChar;
|
||||
case XML_TOK_TRAILING_CR:
|
||||
*nextPtr = s + (*enc)->minBytesPerChar;
|
||||
return noElements;
|
||||
case XML_TOK_PI:
|
||||
if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml"))
|
||||
return misplacedXmlPi;
|
||||
default:
|
||||
return syntaxError;
|
||||
}
|
||||
case XML_ROLE_GROUP_OPEN:
|
||||
if (state.level >= dtd->groupSize) {
|
||||
if (dtd->groupSize)
|
||||
dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
|
||||
else
|
||||
dtd->groupConnector = malloc(dtd->groupSize = 32);
|
||||
if (!dtd->groupConnector)
|
||||
return noMemory;
|
||||
}
|
||||
dtd->groupConnector[state.level] = 0;
|
||||
break;
|
||||
case XML_ROLE_GROUP_SEQUENCE:
|
||||
if (dtd->groupConnector[state.level] == '|') {
|
||||
*nextPtr = s;
|
||||
return syntaxError;
|
||||
}
|
||||
dtd->groupConnector[state.level] = ',';
|
||||
break;
|
||||
case XML_ROLE_GROUP_CHOICE:
|
||||
if (dtd->groupConnector[state.level] == ',') {
|
||||
*nextPtr = s;
|
||||
return syntaxError;
|
||||
}
|
||||
dtd->groupConnector[state.level] = '|';
|
||||
break;
|
||||
case XML_ROLE_NONE:
|
||||
if (tok == XML_TOK_PARAM_ENTITY_REF)
|
||||
dtd->containsRef = 1;
|
||||
if (standalone == 1)
|
||||
dtd->standalone = 1;
|
||||
break;
|
||||
}
|
||||
case XML_ROLE_DOCTYPE_SYSTEM_ID:
|
||||
dtd->containsRef = 1;
|
||||
break;
|
||||
case XML_ROLE_DOCTYPE_PUBLIC_ID:
|
||||
case XML_ROLE_ENTITY_PUBLIC_ID:
|
||||
case XML_ROLE_NOTATION_PUBLIC_ID:
|
||||
if (!XmlIsPublicId(*enc, s, next, nextPtr))
|
||||
return syntaxError;
|
||||
break;
|
||||
case XML_ROLE_INSTANCE_START:
|
||||
*nextPtr = s;
|
||||
return wellFormed;
|
||||
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
|
||||
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
|
||||
{
|
||||
const char *tem = 0;
|
||||
enum WfCheckResult result
|
||||
= checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
|
||||
next - (*enc)->minBytesPerChar,
|
||||
&tem);
|
||||
if (result) {
|
||||
if (tem)
|
||||
*nextPtr = tem;
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case XML_ROLE_ENTITY_VALUE:
|
||||
{
|
||||
enum WfCheckResult result
|
||||
= storeEntity(dtd,
|
||||
*enc,
|
||||
entityNamePtr,
|
||||
entityNameEnd,
|
||||
s,
|
||||
next,
|
||||
nextPtr);
|
||||
if (result != wellFormed)
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
case XML_ROLE_ENTITY_SYSTEM_ID:
|
||||
if (entityNamePtr) {
|
||||
const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
|
||||
entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY));
|
||||
if (entity->name != name) {
|
||||
poolDiscard(&dtd->pool);
|
||||
entity = 0;
|
||||
}
|
||||
else {
|
||||
poolFinish(&dtd->pool);
|
||||
entity->systemId = poolStoreString(&dtd->pool, *enc,
|
||||
s + (*enc)->minBytesPerChar,
|
||||
next - (*enc)->minBytesPerChar);
|
||||
poolFinish(&dtd->pool);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case XML_ROLE_ENTITY_NOTATION_NAME:
|
||||
if (entity) {
|
||||
entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
|
||||
poolFinish(&dtd->pool);
|
||||
}
|
||||
break;
|
||||
case XML_ROLE_GENERAL_ENTITY_NAME:
|
||||
entityNamePtr = s;
|
||||
entityNameEnd = next;
|
||||
break;
|
||||
case XML_ROLE_PARAM_ENTITY_NAME:
|
||||
entityNamePtr = 0;
|
||||
entityNameEnd = 0;
|
||||
break;
|
||||
case XML_ROLE_ERROR:
|
||||
*nextPtr = s;
|
||||
switch (tok) {
|
||||
case XML_TOK_COND_SECT_OPEN:
|
||||
return condSect;
|
||||
case XML_TOK_PARAM_ENTITY_REF:
|
||||
return paramEntityRef;
|
||||
case XML_TOK_INVALID:
|
||||
*nextPtr = next;
|
||||
return invalidToken;
|
||||
case XML_TOK_NONE:
|
||||
return noElements;
|
||||
case XML_TOK_PARTIAL:
|
||||
return unclosedToken;
|
||||
case XML_TOK_PARTIAL_CHAR:
|
||||
return partialChar;
|
||||
case XML_TOK_TRAILING_CR:
|
||||
*nextPtr = s + (*enc)->minBytesPerChar;
|
||||
return noElements;
|
||||
case XML_TOK_XML_DECL:
|
||||
return misplacedXmlPi;
|
||||
default:
|
||||
return syntaxError;
|
||||
}
|
||||
case XML_ROLE_GROUP_OPEN:
|
||||
if (state.level >= dtd->groupSize) {
|
||||
if (dtd->groupSize)
|
||||
dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
|
||||
else
|
||||
dtd->groupConnector = malloc(dtd->groupSize = 32);
|
||||
if (!dtd->groupConnector)
|
||||
return noMemory;
|
||||
}
|
||||
dtd->groupConnector[state.level] = 0;
|
||||
break;
|
||||
case XML_ROLE_GROUP_SEQUENCE:
|
||||
if (dtd->groupConnector[state.level] == '|') {
|
||||
*nextPtr = s;
|
||||
return syntaxError;
|
||||
}
|
||||
dtd->groupConnector[state.level] = ',';
|
||||
break;
|
||||
case XML_ROLE_GROUP_CHOICE:
|
||||
if (dtd->groupConnector[state.level] == ',') {
|
||||
*nextPtr = s;
|
||||
return syntaxError;
|
||||
}
|
||||
dtd->groupConnector[state.level] = '|';
|
||||
break;
|
||||
case XML_ROLE_NONE:
|
||||
if (tok == XML_TOK_PARAM_ENTITY_REF)
|
||||
dtd->containsRef = 1;
|
||||
break;
|
||||
}
|
||||
s = next;
|
||||
}
|
||||
|
@ -621,8 +615,7 @@ checkGeneralTextEntity(CONTEXT *context,
|
|||
s = next;
|
||||
tok = XmlContentTok(*enc, s, end, &next);
|
||||
}
|
||||
if (tok == XML_TOK_PI
|
||||
&& XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) {
|
||||
if (tok == XML_TOK_XML_DECL) {
|
||||
const char *encodingName = 0;
|
||||
const ENCODING *encoding = 0;
|
||||
const char *version;
|
||||
|
|
Loading…
Add table
Reference in a new issue