diff --git a/expat/xmltok/xmlrole.c b/expat/xmltok/xmlrole.c index 715fef29..9c34ad53 100755 --- a/expat/xmltok/xmlrole.c +++ b/expat/xmltok/xmlrole.c @@ -38,10 +38,14 @@ int prolog0(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + state->handler = prolog1; + return XML_ROLE_NONE; + case XML_TOK_XML_DECL: + state->handler = prolog1; + return XML_ROLE_XML_DECL; case XML_TOK_PI: state->handler = prolog1; - if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) - return XML_ROLE_XML_DECL; return XML_ROLE_NONE; case XML_TOK_COMMENT: state->handler = prolog1; @@ -69,9 +73,9 @@ int prolog1(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_PI: - if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) - return syntaxError(state); case XML_TOK_COMMENT: case XML_TOK_BOM: return XML_ROLE_NONE; @@ -97,9 +101,9 @@ int prolog2(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_PI: - if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) - return syntaxError(state); case XML_TOK_COMMENT: return XML_ROLE_NONE; case XML_TOK_INSTANCE_START: @@ -117,6 +121,8 @@ int doctype0(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = doctype1; return XML_ROLE_DOCTYPE_NAME; @@ -132,6 +138,8 @@ int doctype1(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_NONE; @@ -160,6 +168,8 @@ int doctype2(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = doctype3; return XML_ROLE_DOCTYPE_PUBLIC_ID; @@ -175,6 +185,8 @@ int doctype3(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = doctype4; return XML_ROLE_DOCTYPE_SYSTEM_ID; @@ -190,6 +202,8 @@ int doctype4(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_NONE; @@ -208,6 +222,8 @@ int doctype5(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; @@ -223,6 +239,8 @@ int internalSubset(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, @@ -250,8 +268,6 @@ int internalSubset(PROLOG_STATE *state, } break; case XML_TOK_PI: - if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) - return syntaxError(state); case XML_TOK_COMMENT: case XML_TOK_PARAM_ENTITY_REF: return XML_ROLE_NONE; @@ -270,6 +286,8 @@ int entity0(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_PERCENT: state->handler = entity1; return XML_ROLE_NONE; @@ -288,6 +306,8 @@ int entity1(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; @@ -303,6 +323,8 @@ int entity2(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { state->handler = entity4; @@ -328,6 +350,8 @@ int entity3(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = entity4; return XML_ROLE_ENTITY_PUBLIC_ID; @@ -344,6 +368,8 @@ int entity4(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = entity5; return XML_ROLE_ENTITY_SYSTEM_ID; @@ -359,6 +385,8 @@ int entity5(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: state->handler = internalSubset; return XML_ROLE_NONE; @@ -380,6 +408,8 @@ int entity6(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = declClose; return XML_ROLE_ENTITY_NOTATION_NAME; @@ -395,6 +425,8 @@ int entity7(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { state->handler = entity9; @@ -420,6 +452,8 @@ int entity8(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = entity9; return XML_ROLE_ENTITY_PUBLIC_ID; @@ -435,6 +469,8 @@ int entity9(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_ENTITY_SYSTEM_ID; @@ -450,6 +486,8 @@ int notation0(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = notation1; return XML_ROLE_NOTATION_NAME; @@ -465,6 +503,8 @@ int notation1(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { state->handler = notation3; @@ -487,6 +527,8 @@ int notation2(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; @@ -502,6 +544,8 @@ int notation3(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; @@ -517,6 +561,8 @@ int notation4(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; @@ -535,6 +581,8 @@ int attlist0(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = attlist1; return XML_ROLE_ATTLIST_ELEMENT_NAME; @@ -550,6 +598,8 @@ int attlist1(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: state->handler = internalSubset; return XML_ROLE_NONE; @@ -568,6 +618,8 @@ int attlist2(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: { static const char *types[] = { @@ -607,6 +659,8 @@ int attlist3(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NMTOKEN: case XML_TOK_NAME: state->handler = attlist4; @@ -623,6 +677,8 @@ int attlist4(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; return XML_ROLE_NONE; @@ -641,6 +697,8 @@ int attlist5(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_OPEN_PAREN: state->handler = attlist6; return XML_ROLE_NONE; @@ -657,6 +715,8 @@ int attlist6(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = attlist7; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; @@ -672,6 +732,8 @@ int attlist7(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; return XML_ROLE_NONE; @@ -691,6 +753,8 @@ int attlist8(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, ptr + enc->minBytesPerChar, @@ -726,6 +790,8 @@ int attlist9(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = attlist1; return XML_ROLE_FIXED_ATTRIBUTE_VALUE; @@ -741,6 +807,8 @@ int element0(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = element1; return XML_ROLE_ELEMENT_NAME; @@ -756,6 +824,8 @@ int element1(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, "EMPTY")) { state->handler = declClose; @@ -782,6 +852,8 @@ int element2(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, ptr + enc->minBytesPerChar, @@ -818,6 +890,8 @@ int element3(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; @@ -837,6 +911,8 @@ int element4(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = element5; return XML_ROLE_CONTENT_ELEMENT; @@ -852,6 +928,8 @@ int element5(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; return XML_ROLE_GROUP_CLOSE_REP; @@ -870,6 +948,8 @@ int element6(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_OPEN_PAREN: state->level += 1; return XML_ROLE_GROUP_OPEN; @@ -897,6 +977,8 @@ int element7(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: state->level -= 1; if (state->level == 0) @@ -935,6 +1017,8 @@ int declClose(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: state->handler = internalSubset; return XML_ROLE_NONE; diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index 0634b4dd..2d22275b 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -33,11 +33,11 @@ extern "C" { /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ #define XML_TOK_PI 11 /* processing instruction */ -#define XML_TOK_COMMENT 12 -#define XML_TOK_BOM 13 /* Byte order mark */ +#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ +#define XML_TOK_COMMENT 13 +#define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ -#define XML_TOK_INSTANCE_START 14 #define XML_TOK_PROLOG_S 15 #define XML_TOK_DECL_OPEN 16 /* */ @@ -52,9 +52,9 @@ extern "C" { #define XML_TOK_CLOSE_BRACKET 26 #define XML_TOK_LITERAL 27 #define XML_TOK_PARAM_ENTITY_REF 28 +#define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_COMMA 29 #define XML_TOK_NAME_QUESTION 30 /* name? */ #define XML_TOK_NAME_ASTERISK 31 /* name* */ #define XML_TOK_NAME_PLUS 32 /* name+ */ @@ -63,7 +63,7 @@ extern "C" { #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ - +#define XML_TOK_COMMA 38 #define XML_N_STATES 2 #define XML_PROLOG_STATE 0 diff --git a/expat/xmltok/xmltok_impl.c b/expat/xmltok/xmltok_impl.c index 854fe613..ebbbc5c0 100755 --- a/expat/xmltok/xmltok_impl.c +++ b/expat/xmltok/xmltok_impl.c @@ -162,12 +162,55 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } +static +int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) +{ + int upper = 0; + if (end - ptr != MINBPC*3) + return 1; + switch (BYTE_TO_ASCII(enc, ptr)) { + case 'x': + break; + case 'X': + upper = 1; + break; + default: + return 1; + } + ptr += MINBPC; + switch (BYTE_TO_ASCII(enc, ptr)) { + case 'm': + break; + case 'M': + upper = 1; + break; + default: + return 1; + } + ptr += MINBPC; + switch (BYTE_TO_ASCII(enc, ptr)) { + case 'l': + break; + case 'L': + upper = 1; + break; + default: + return 1; + } + if (upper) + return 0; + *tokPtr = XML_TOK_XML_DECL; + return 1; +} + /* ptr points to character following "')) { *nextTokPtr = ptr + MINBPC; - return XML_TOK_PI; + return tok; } break; default: @@ -201,12 +248,16 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, } return XML_TOK_PARTIAL; case BT_QUEST: + if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, '>')) { *nextTokPtr = ptr + MINBPC; - return XML_TOK_PI; + return tok; } /* fall through */ default: diff --git a/expat/xmlwf/wfcheck.c b/expat/xmlwf/wfcheck.c index 0e808f5e..b527869e 100755 --- a/expat/xmlwf/wfcheck.c +++ b/expat/xmlwf/wfcheck.c @@ -293,12 +293,9 @@ checkContent(size_t level, CONTEXT *context, const ENCODING *enc, return badCharRef; } break; - case XML_TOK_PI: - if (XmlNameMatchesAscii(enc, s + 2 * enc->minBytesPerChar, "xml")) { - *badPtr = s; - return misplacedXmlPi; - } - break; + case XML_TOK_XML_DECL: + *badPtr = s; + return misplacedXmlPi; } s = next; if (level == 0) { @@ -411,166 +408,163 @@ checkProlog(DTD *dtd, const char *s, const char *end, for (;;) { const char *next; int tok = XmlPrologTok(*enc, s, end, &next); - if (tok != XML_TOK_PROLOG_S) { - switch (XmlTokenRole(&state, tok, s, next, *enc)) { - case XML_ROLE_XML_DECL: - { - const char *encodingName = 0; - const ENCODING *encoding = 0; - const char *version; - int standalone = -1; - if (!XmlParseXmlDecl(0, - *enc, - s, - next, - nextPtr, - &version, - &encodingName, - &encoding, - &standalone)) - return syntaxError; - if (encoding) { - if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) { - *nextPtr = encodingName; - return incorrectEncoding; - } - *enc = encoding; - } - else if (encodingName) { + switch (XmlTokenRole(&state, tok, s, next, *enc)) { + case XML_ROLE_XML_DECL: + { + const char *encodingName = 0; + const ENCODING *encoding = 0; + const char *version; + int standalone = -1; + if (!XmlParseXmlDecl(0, + *enc, + s, + next, + nextPtr, + &version, + &encodingName, + &encoding, + &standalone)) + return syntaxError; + if (encoding) { + if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) { *nextPtr = encodingName; - return unknownEncoding; + return incorrectEncoding; } - if (standalone == 1) - dtd->standalone = 1; - break; + *enc = encoding; } - case XML_ROLE_DOCTYPE_SYSTEM_ID: - dtd->containsRef = 1; - break; - case XML_ROLE_DOCTYPE_PUBLIC_ID: - case XML_ROLE_ENTITY_PUBLIC_ID: - case XML_ROLE_NOTATION_PUBLIC_ID: - if (!XmlIsPublicId(*enc, s, next, nextPtr)) - return syntaxError; - break; - case XML_ROLE_INSTANCE_START: - *nextPtr = s; - return wellFormed; - case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: - case XML_ROLE_FIXED_ATTRIBUTE_VALUE: - { - const char *tem = 0; - enum WfCheckResult result - = checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar, - next - (*enc)->minBytesPerChar, - &tem); - if (result) { - if (tem) - *nextPtr = tem; - return result; - } - break; + else if (encodingName) { + *nextPtr = encodingName; + return unknownEncoding; } - case XML_ROLE_ENTITY_VALUE: - { - enum WfCheckResult result - = storeEntity(dtd, - *enc, - entityNamePtr, - entityNameEnd, - s, - next, - nextPtr); - if (result != wellFormed) - return result; - } - break; - case XML_ROLE_ENTITY_SYSTEM_ID: - if (entityNamePtr) { - const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd); - entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY)); - if (entity->name != name) { - poolDiscard(&dtd->pool); - entity = 0; - } - else { - poolFinish(&dtd->pool); - entity->systemId = poolStoreString(&dtd->pool, *enc, - s + (*enc)->minBytesPerChar, - next - (*enc)->minBytesPerChar); - poolFinish(&dtd->pool); - } - } - break; - case XML_ROLE_ENTITY_NOTATION_NAME: - if (entity) { - entity->notation = poolStoreString(&dtd->pool, *enc, s, next); - poolFinish(&dtd->pool); - } - break; - case XML_ROLE_GENERAL_ENTITY_NAME: - entityNamePtr = s; - entityNameEnd = next; - break; - case XML_ROLE_PARAM_ENTITY_NAME: - entityNamePtr = 0; - entityNameEnd = 0; - break; - case XML_ROLE_ERROR: - *nextPtr = s; - switch (tok) { - case XML_TOK_COND_SECT_OPEN: - return condSect; - case XML_TOK_PARAM_ENTITY_REF: - return paramEntityRef; - case XML_TOK_INVALID: - *nextPtr = next; - return invalidToken; - case XML_TOK_NONE: - return noElements; - case XML_TOK_PARTIAL: - return unclosedToken; - case XML_TOK_PARTIAL_CHAR: - return partialChar; - case XML_TOK_TRAILING_CR: - *nextPtr = s + (*enc)->minBytesPerChar; - return noElements; - case XML_TOK_PI: - if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) - return misplacedXmlPi; - default: - return syntaxError; - } - case XML_ROLE_GROUP_OPEN: - if (state.level >= dtd->groupSize) { - if (dtd->groupSize) - dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2); - else - dtd->groupConnector = malloc(dtd->groupSize = 32); - if (!dtd->groupConnector) - return noMemory; - } - dtd->groupConnector[state.level] = 0; - break; - case XML_ROLE_GROUP_SEQUENCE: - if (dtd->groupConnector[state.level] == '|') { - *nextPtr = s; - return syntaxError; - } - dtd->groupConnector[state.level] = ','; - break; - case XML_ROLE_GROUP_CHOICE: - if (dtd->groupConnector[state.level] == ',') { - *nextPtr = s; - return syntaxError; - } - dtd->groupConnector[state.level] = '|'; - break; - case XML_ROLE_NONE: - if (tok == XML_TOK_PARAM_ENTITY_REF) - dtd->containsRef = 1; + if (standalone == 1) + dtd->standalone = 1; break; } + case XML_ROLE_DOCTYPE_SYSTEM_ID: + dtd->containsRef = 1; + break; + case XML_ROLE_DOCTYPE_PUBLIC_ID: + case XML_ROLE_ENTITY_PUBLIC_ID: + case XML_ROLE_NOTATION_PUBLIC_ID: + if (!XmlIsPublicId(*enc, s, next, nextPtr)) + return syntaxError; + break; + case XML_ROLE_INSTANCE_START: + *nextPtr = s; + return wellFormed; + case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: + case XML_ROLE_FIXED_ATTRIBUTE_VALUE: + { + const char *tem = 0; + enum WfCheckResult result + = checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar, + next - (*enc)->minBytesPerChar, + &tem); + if (result) { + if (tem) + *nextPtr = tem; + return result; + } + break; + } + case XML_ROLE_ENTITY_VALUE: + { + enum WfCheckResult result + = storeEntity(dtd, + *enc, + entityNamePtr, + entityNameEnd, + s, + next, + nextPtr); + if (result != wellFormed) + return result; + } + break; + case XML_ROLE_ENTITY_SYSTEM_ID: + if (entityNamePtr) { + const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd); + entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY)); + if (entity->name != name) { + poolDiscard(&dtd->pool); + entity = 0; + } + else { + poolFinish(&dtd->pool); + entity->systemId = poolStoreString(&dtd->pool, *enc, + s + (*enc)->minBytesPerChar, + next - (*enc)->minBytesPerChar); + poolFinish(&dtd->pool); + } + } + break; + case XML_ROLE_ENTITY_NOTATION_NAME: + if (entity) { + entity->notation = poolStoreString(&dtd->pool, *enc, s, next); + poolFinish(&dtd->pool); + } + break; + case XML_ROLE_GENERAL_ENTITY_NAME: + entityNamePtr = s; + entityNameEnd = next; + break; + case XML_ROLE_PARAM_ENTITY_NAME: + entityNamePtr = 0; + entityNameEnd = 0; + break; + case XML_ROLE_ERROR: + *nextPtr = s; + switch (tok) { + case XML_TOK_COND_SECT_OPEN: + return condSect; + case XML_TOK_PARAM_ENTITY_REF: + return paramEntityRef; + case XML_TOK_INVALID: + *nextPtr = next; + return invalidToken; + case XML_TOK_NONE: + return noElements; + case XML_TOK_PARTIAL: + return unclosedToken; + case XML_TOK_PARTIAL_CHAR: + return partialChar; + case XML_TOK_TRAILING_CR: + *nextPtr = s + (*enc)->minBytesPerChar; + return noElements; + case XML_TOK_XML_DECL: + return misplacedXmlPi; + default: + return syntaxError; + } + case XML_ROLE_GROUP_OPEN: + if (state.level >= dtd->groupSize) { + if (dtd->groupSize) + dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2); + else + dtd->groupConnector = malloc(dtd->groupSize = 32); + if (!dtd->groupConnector) + return noMemory; + } + dtd->groupConnector[state.level] = 0; + break; + case XML_ROLE_GROUP_SEQUENCE: + if (dtd->groupConnector[state.level] == '|') { + *nextPtr = s; + return syntaxError; + } + dtd->groupConnector[state.level] = ','; + break; + case XML_ROLE_GROUP_CHOICE: + if (dtd->groupConnector[state.level] == ',') { + *nextPtr = s; + return syntaxError; + } + dtd->groupConnector[state.level] = '|'; + break; + case XML_ROLE_NONE: + if (tok == XML_TOK_PARAM_ENTITY_REF) + dtd->containsRef = 1; + break; } s = next; } @@ -621,8 +615,7 @@ checkGeneralTextEntity(CONTEXT *context, s = next; tok = XmlContentTok(*enc, s, end, &next); } - if (tok == XML_TOK_PI - && XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) { + if (tok == XML_TOK_XML_DECL) { const char *encodingName = 0; const ENCODING *encoding = 0; const char *version;