New token XML_TOK_XML_DECL

This commit is contained in:
James Clark 1997-12-12 00:48:27 +00:00
parent db6a73f853
commit b2eeefe571
4 changed files with 305 additions and 177 deletions

View file

@ -38,10 +38,14 @@ int prolog0(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
state->handler = prolog1;
return XML_ROLE_NONE;
case XML_TOK_XML_DECL:
state->handler = prolog1;
return XML_ROLE_XML_DECL;
case XML_TOK_PI:
state->handler = prolog1;
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
return XML_ROLE_XML_DECL;
return XML_ROLE_NONE;
case XML_TOK_COMMENT:
state->handler = prolog1;
@ -69,9 +73,9 @@ int prolog1(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_PI:
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
return syntaxError(state);
case XML_TOK_COMMENT:
case XML_TOK_BOM:
return XML_ROLE_NONE;
@ -97,9 +101,9 @@ int prolog2(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_PI:
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
return syntaxError(state);
case XML_TOK_COMMENT:
return XML_ROLE_NONE;
case XML_TOK_INSTANCE_START:
@ -117,6 +121,8 @@ int doctype0(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = doctype1;
return XML_ROLE_DOCTYPE_NAME;
@ -132,6 +138,8 @@ int doctype1(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_OPEN_BRACKET:
state->handler = internalSubset;
return XML_ROLE_NONE;
@ -160,6 +168,8 @@ int doctype2(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = doctype3;
return XML_ROLE_DOCTYPE_PUBLIC_ID;
@ -175,6 +185,8 @@ int doctype3(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = doctype4;
return XML_ROLE_DOCTYPE_SYSTEM_ID;
@ -190,6 +202,8 @@ int doctype4(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_OPEN_BRACKET:
state->handler = internalSubset;
return XML_ROLE_NONE;
@ -208,6 +222,8 @@ int doctype5(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
@ -223,6 +239,8 @@ int internalSubset(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN:
if (XmlNameMatchesAscii(enc,
ptr + 2 * enc->minBytesPerChar,
@ -250,8 +268,6 @@ int internalSubset(PROLOG_STATE *state,
}
break;
case XML_TOK_PI:
if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
return syntaxError(state);
case XML_TOK_COMMENT:
case XML_TOK_PARAM_ENTITY_REF:
return XML_ROLE_NONE;
@ -270,6 +286,8 @@ int entity0(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_PERCENT:
state->handler = entity1;
return XML_ROLE_NONE;
@ -288,6 +306,8 @@ int entity1(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = entity7;
return XML_ROLE_PARAM_ENTITY_NAME;
@ -303,6 +323,8 @@ int entity2(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
state->handler = entity4;
@ -328,6 +350,8 @@ int entity3(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = entity4;
return XML_ROLE_ENTITY_PUBLIC_ID;
@ -344,6 +368,8 @@ int entity4(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = entity5;
return XML_ROLE_ENTITY_SYSTEM_ID;
@ -359,6 +385,8 @@ int entity5(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
return XML_ROLE_NONE;
@ -380,6 +408,8 @@ int entity6(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = declClose;
return XML_ROLE_ENTITY_NOTATION_NAME;
@ -395,6 +425,8 @@ int entity7(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
state->handler = entity9;
@ -420,6 +452,8 @@ int entity8(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = entity9;
return XML_ROLE_ENTITY_PUBLIC_ID;
@ -435,6 +469,8 @@ int entity9(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = declClose;
return XML_ROLE_ENTITY_SYSTEM_ID;
@ -450,6 +486,8 @@ int notation0(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = notation1;
return XML_ROLE_NOTATION_NAME;
@ -465,6 +503,8 @@ int notation1(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
state->handler = notation3;
@ -487,6 +527,8 @@ int notation2(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = notation4;
return XML_ROLE_NOTATION_PUBLIC_ID;
@ -502,6 +544,8 @@ int notation3(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = declClose;
return XML_ROLE_NOTATION_SYSTEM_ID;
@ -517,6 +561,8 @@ int notation4(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = declClose;
return XML_ROLE_NOTATION_SYSTEM_ID;
@ -535,6 +581,8 @@ int attlist0(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = attlist1;
return XML_ROLE_ATTLIST_ELEMENT_NAME;
@ -550,6 +598,8 @@ int attlist1(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
return XML_ROLE_NONE;
@ -568,6 +618,8 @@ int attlist2(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
{
static const char *types[] = {
@ -607,6 +659,8 @@ int attlist3(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NMTOKEN:
case XML_TOK_NAME:
state->handler = attlist4;
@ -623,6 +677,8 @@ int attlist4(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_CLOSE_PAREN:
state->handler = attlist8;
return XML_ROLE_NONE;
@ -641,6 +697,8 @@ int attlist5(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_OPEN_PAREN:
state->handler = attlist6;
return XML_ROLE_NONE;
@ -657,6 +715,8 @@ int attlist6(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = attlist7;
return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
@ -672,6 +732,8 @@ int attlist7(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_CLOSE_PAREN:
state->handler = attlist8;
return XML_ROLE_NONE;
@ -691,6 +753,8 @@ int attlist8(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc,
ptr + enc->minBytesPerChar,
@ -726,6 +790,8 @@ int attlist9(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_LITERAL:
state->handler = attlist1;
return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
@ -741,6 +807,8 @@ int element0(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = element1;
return XML_ROLE_ELEMENT_NAME;
@ -756,6 +824,8 @@ int element1(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, "EMPTY")) {
state->handler = declClose;
@ -782,6 +852,8 @@ int element2(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc,
ptr + enc->minBytesPerChar,
@ -818,6 +890,8 @@ int element3(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_CLOSE_PAREN:
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose;
@ -837,6 +911,8 @@ int element4(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
state->handler = element5;
return XML_ROLE_CONTENT_ELEMENT;
@ -852,6 +928,8 @@ int element5(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose;
return XML_ROLE_GROUP_CLOSE_REP;
@ -870,6 +948,8 @@ int element6(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_OPEN_PAREN:
state->level += 1;
return XML_ROLE_GROUP_OPEN;
@ -897,6 +977,8 @@ int element7(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_CLOSE_PAREN:
state->level -= 1;
if (state->level == 0)
@ -935,6 +1017,8 @@ int declClose(PROLOG_STATE *state,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
return XML_ROLE_NONE;

View file

@ -33,11 +33,11 @@ extern "C" {
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
#define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_COMMENT 12
#define XML_TOK_BOM 13 /* Byte order mark */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
#define XML_TOK_COMMENT 13
#define XML_TOK_BOM 14 /* Byte order mark */
/* The following tokens are returned only by XmlPrologTok */
#define XML_TOK_INSTANCE_START 14
#define XML_TOK_PROLOG_S 15
#define XML_TOK_DECL_OPEN 16 /* <!foo */
#define XML_TOK_DECL_CLOSE 17 /* > */
@ -52,9 +52,9 @@ extern "C" {
#define XML_TOK_CLOSE_BRACKET 26
#define XML_TOK_LITERAL 27
#define XML_TOK_PARAM_ENTITY_REF 28
#define XML_TOK_INSTANCE_START 29
/* The following occur only in element type declarations */
#define XML_TOK_COMMA 29
#define XML_TOK_NAME_QUESTION 30 /* name? */
#define XML_TOK_NAME_ASTERISK 31 /* name* */
#define XML_TOK_NAME_PLUS 32 /* name+ */
@ -63,7 +63,7 @@ extern "C" {
#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
#define XML_TOK_COMMA 38
#define XML_N_STATES 2
#define XML_PROLOG_STATE 0

View file

@ -162,12 +162,55 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL;
}
static
int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
{
int upper = 0;
if (end - ptr != MINBPC*3)
return 1;
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'x':
break;
case 'X':
upper = 1;
break;
default:
return 1;
}
ptr += MINBPC;
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'm':
break;
case 'M':
upper = 1;
break;
default:
return 1;
}
ptr += MINBPC;
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'l':
break;
case 'L':
upper = 1;
break;
default:
return 1;
}
if (upper)
return 0;
*tokPtr = XML_TOK_XML_DECL;
return 1;
}
/* ptr points to character following "<?" */
static
int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
int tok;
const char *target = ptr;
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
@ -180,6 +223,10 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC;
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
@ -191,7 +238,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
*nextTokPtr = ptr + MINBPC;
return XML_TOK_PI;
return tok;
}
break;
default:
@ -201,12 +248,16 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
}
return XML_TOK_PARTIAL;
case BT_QUEST:
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
*nextTokPtr = ptr + MINBPC;
return XML_TOK_PI;
return tok;
}
/* fall through */
default:

View file

@ -293,12 +293,9 @@ checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
return badCharRef;
}
break;
case XML_TOK_PI:
if (XmlNameMatchesAscii(enc, s + 2 * enc->minBytesPerChar, "xml")) {
*badPtr = s;
return misplacedXmlPi;
}
break;
case XML_TOK_XML_DECL:
*badPtr = s;
return misplacedXmlPi;
}
s = next;
if (level == 0) {
@ -411,166 +408,163 @@ checkProlog(DTD *dtd, const char *s, const char *end,
for (;;) {
const char *next;
int tok = XmlPrologTok(*enc, s, end, &next);
if (tok != XML_TOK_PROLOG_S) {
switch (XmlTokenRole(&state, tok, s, next, *enc)) {
case XML_ROLE_XML_DECL:
{
const char *encodingName = 0;
const ENCODING *encoding = 0;
const char *version;
int standalone = -1;
if (!XmlParseXmlDecl(0,
*enc,
s,
next,
nextPtr,
&version,
&encodingName,
&encoding,
&standalone))
return syntaxError;
if (encoding) {
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
*nextPtr = encodingName;
return incorrectEncoding;
}
*enc = encoding;
}
else if (encodingName) {
switch (XmlTokenRole(&state, tok, s, next, *enc)) {
case XML_ROLE_XML_DECL:
{
const char *encodingName = 0;
const ENCODING *encoding = 0;
const char *version;
int standalone = -1;
if (!XmlParseXmlDecl(0,
*enc,
s,
next,
nextPtr,
&version,
&encodingName,
&encoding,
&standalone))
return syntaxError;
if (encoding) {
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
*nextPtr = encodingName;
return unknownEncoding;
return incorrectEncoding;
}
if (standalone == 1)
dtd->standalone = 1;
break;
*enc = encoding;
}
case XML_ROLE_DOCTYPE_SYSTEM_ID:
dtd->containsRef = 1;
break;
case XML_ROLE_DOCTYPE_PUBLIC_ID:
case XML_ROLE_ENTITY_PUBLIC_ID:
case XML_ROLE_NOTATION_PUBLIC_ID:
if (!XmlIsPublicId(*enc, s, next, nextPtr))
return syntaxError;
break;
case XML_ROLE_INSTANCE_START:
*nextPtr = s;
return wellFormed;
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
{
const char *tem = 0;
enum WfCheckResult result
= checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
next - (*enc)->minBytesPerChar,
&tem);
if (result) {
if (tem)
*nextPtr = tem;
return result;
}
break;
else if (encodingName) {
*nextPtr = encodingName;
return unknownEncoding;
}
case XML_ROLE_ENTITY_VALUE:
{
enum WfCheckResult result
= storeEntity(dtd,
*enc,
entityNamePtr,
entityNameEnd,
s,
next,
nextPtr);
if (result != wellFormed)
return result;
}
break;
case XML_ROLE_ENTITY_SYSTEM_ID:
if (entityNamePtr) {
const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY));
if (entity->name != name) {
poolDiscard(&dtd->pool);
entity = 0;
}
else {
poolFinish(&dtd->pool);
entity->systemId = poolStoreString(&dtd->pool, *enc,
s + (*enc)->minBytesPerChar,
next - (*enc)->minBytesPerChar);
poolFinish(&dtd->pool);
}
}
break;
case XML_ROLE_ENTITY_NOTATION_NAME:
if (entity) {
entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
poolFinish(&dtd->pool);
}
break;
case XML_ROLE_GENERAL_ENTITY_NAME:
entityNamePtr = s;
entityNameEnd = next;
break;
case XML_ROLE_PARAM_ENTITY_NAME:
entityNamePtr = 0;
entityNameEnd = 0;
break;
case XML_ROLE_ERROR:
*nextPtr = s;
switch (tok) {
case XML_TOK_COND_SECT_OPEN:
return condSect;
case XML_TOK_PARAM_ENTITY_REF:
return paramEntityRef;
case XML_TOK_INVALID:
*nextPtr = next;
return invalidToken;
case XML_TOK_NONE:
return noElements;
case XML_TOK_PARTIAL:
return unclosedToken;
case XML_TOK_PARTIAL_CHAR:
return partialChar;
case XML_TOK_TRAILING_CR:
*nextPtr = s + (*enc)->minBytesPerChar;
return noElements;
case XML_TOK_PI:
if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml"))
return misplacedXmlPi;
default:
return syntaxError;
}
case XML_ROLE_GROUP_OPEN:
if (state.level >= dtd->groupSize) {
if (dtd->groupSize)
dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
else
dtd->groupConnector = malloc(dtd->groupSize = 32);
if (!dtd->groupConnector)
return noMemory;
}
dtd->groupConnector[state.level] = 0;
break;
case XML_ROLE_GROUP_SEQUENCE:
if (dtd->groupConnector[state.level] == '|') {
*nextPtr = s;
return syntaxError;
}
dtd->groupConnector[state.level] = ',';
break;
case XML_ROLE_GROUP_CHOICE:
if (dtd->groupConnector[state.level] == ',') {
*nextPtr = s;
return syntaxError;
}
dtd->groupConnector[state.level] = '|';
break;
case XML_ROLE_NONE:
if (tok == XML_TOK_PARAM_ENTITY_REF)
dtd->containsRef = 1;
if (standalone == 1)
dtd->standalone = 1;
break;
}
case XML_ROLE_DOCTYPE_SYSTEM_ID:
dtd->containsRef = 1;
break;
case XML_ROLE_DOCTYPE_PUBLIC_ID:
case XML_ROLE_ENTITY_PUBLIC_ID:
case XML_ROLE_NOTATION_PUBLIC_ID:
if (!XmlIsPublicId(*enc, s, next, nextPtr))
return syntaxError;
break;
case XML_ROLE_INSTANCE_START:
*nextPtr = s;
return wellFormed;
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
{
const char *tem = 0;
enum WfCheckResult result
= checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
next - (*enc)->minBytesPerChar,
&tem);
if (result) {
if (tem)
*nextPtr = tem;
return result;
}
break;
}
case XML_ROLE_ENTITY_VALUE:
{
enum WfCheckResult result
= storeEntity(dtd,
*enc,
entityNamePtr,
entityNameEnd,
s,
next,
nextPtr);
if (result != wellFormed)
return result;
}
break;
case XML_ROLE_ENTITY_SYSTEM_ID:
if (entityNamePtr) {
const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY));
if (entity->name != name) {
poolDiscard(&dtd->pool);
entity = 0;
}
else {
poolFinish(&dtd->pool);
entity->systemId = poolStoreString(&dtd->pool, *enc,
s + (*enc)->minBytesPerChar,
next - (*enc)->minBytesPerChar);
poolFinish(&dtd->pool);
}
}
break;
case XML_ROLE_ENTITY_NOTATION_NAME:
if (entity) {
entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
poolFinish(&dtd->pool);
}
break;
case XML_ROLE_GENERAL_ENTITY_NAME:
entityNamePtr = s;
entityNameEnd = next;
break;
case XML_ROLE_PARAM_ENTITY_NAME:
entityNamePtr = 0;
entityNameEnd = 0;
break;
case XML_ROLE_ERROR:
*nextPtr = s;
switch (tok) {
case XML_TOK_COND_SECT_OPEN:
return condSect;
case XML_TOK_PARAM_ENTITY_REF:
return paramEntityRef;
case XML_TOK_INVALID:
*nextPtr = next;
return invalidToken;
case XML_TOK_NONE:
return noElements;
case XML_TOK_PARTIAL:
return unclosedToken;
case XML_TOK_PARTIAL_CHAR:
return partialChar;
case XML_TOK_TRAILING_CR:
*nextPtr = s + (*enc)->minBytesPerChar;
return noElements;
case XML_TOK_XML_DECL:
return misplacedXmlPi;
default:
return syntaxError;
}
case XML_ROLE_GROUP_OPEN:
if (state.level >= dtd->groupSize) {
if (dtd->groupSize)
dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
else
dtd->groupConnector = malloc(dtd->groupSize = 32);
if (!dtd->groupConnector)
return noMemory;
}
dtd->groupConnector[state.level] = 0;
break;
case XML_ROLE_GROUP_SEQUENCE:
if (dtd->groupConnector[state.level] == '|') {
*nextPtr = s;
return syntaxError;
}
dtd->groupConnector[state.level] = ',';
break;
case XML_ROLE_GROUP_CHOICE:
if (dtd->groupConnector[state.level] == ',') {
*nextPtr = s;
return syntaxError;
}
dtd->groupConnector[state.level] = '|';
break;
case XML_ROLE_NONE:
if (tok == XML_TOK_PARAM_ENTITY_REF)
dtd->containsRef = 1;
break;
}
s = next;
}
@ -621,8 +615,7 @@ checkGeneralTextEntity(CONTEXT *context,
s = next;
tok = XmlContentTok(*enc, s, end, &next);
}
if (tok == XML_TOK_PI
&& XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) {
if (tok == XML_TOK_XML_DECL) {
const char *encodingName = 0;
const ENCODING *encoding = 0;
const char *version;