From 7488411c110d247ed88a278b4aa199b4ad91a512 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Jun 1999 10:58:20 +0000 Subject: [PATCH] First part of changes for DTD support. --- expat/xmlparse/xmlparse.c | 351 ++++++++++++++++++++++++++++++++++--- expat/xmltok/xmlrole.c | 257 +++++++++++++++++++++------ expat/xmltok/xmlrole.h | 13 ++ expat/xmltok/xmltok.c | 11 +- expat/xmltok/xmltok.h | 19 ++ expat/xmltok/xmltok_impl.c | 55 ++++++ 6 files changed, 628 insertions(+), 78 deletions(-) diff --git a/expat/xmlparse/xmlparse.c b/expat/xmlparse/xmlparse.c index 40dad39b..4fa59266 100755 --- a/expat/xmlparse/xmlparse.c +++ b/expat/xmlparse/xmlparse.c @@ -28,6 +28,10 @@ GPL. If you do not delete the provisions above, a recipient may use your version of this file under either the MPL or the GPL. */ +/* FIXME +need current base rather than dtd.base +check parse state at end of external param entity + */ #include "xmldef.h" #include "xmlparse.h" @@ -61,6 +65,10 @@ typedef char ICHAR; #endif +#ifdef XML_DTD +#define XML_ERROR_NOT_IMPLEMENTED (XML_ERROR_NOT_STANDALONE + 1) +#endif + #ifdef XML_UNICODE_WCHAR_T #define XML_T(x) L ## x #else @@ -170,6 +178,10 @@ typedef struct { int complete; int standalone; const XML_Char *base; +#ifdef XML_DTD + HASH_TABLE paramEntities; + const XML_Char *systemId; +#endif /* XML_DTD */ PREFIX defaultPrefix; } DTD; @@ -189,6 +201,9 @@ static Processor prologProcessor; static Processor prologInitProcessor; static Processor contentProcessor; static Processor cdataSectionProcessor; +#ifdef XML_DTD +static Processor ignoreSectionProcessor; +#endif /* XML_DTD */ static Processor epilogProcessor; static Processor errorProcessor; static Processor externalEntityInitProcessor; @@ -207,6 +222,10 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr); static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +#ifdef XML_DTD +static enum XML_Error +doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +#endif /* XML_DTD */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, TAG_NAME *tagNamePtr, BINDING **bindingsPtr); static @@ -237,6 +256,10 @@ static void normalizePublicId(XML_Char *s); static int dtdInit(DTD *); static void dtdDestroy(DTD *); static int dtdCopy(DTD *newDtd, const DTD *oldDtd); +static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); +#ifdef XML_DTD +static void dtdSwap(DTD *, DTD *); +#endif /* XML_DTD */ static void poolInit(STRING_POOL *); static void poolClear(STRING_POOL *); static void poolDestroy(STRING_POOL *); @@ -329,6 +352,9 @@ typedef struct { unsigned m_groupSize; int m_hadExternalDoctype; XML_Char m_namespaceSeparator; +#ifdef XML_DTD + XML_Parser m_parentParser; +#endif } Parser; #define userData (((Parser *)parser)->m_userData) @@ -396,6 +422,9 @@ typedef struct { #define groupSize (((Parser *)parser)->m_groupSize) #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype) #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator) +#ifdef XML_DTD +#define parentParser (((Parser *)parser)->m_parentParser) +#endif #ifdef _MSC_VER #ifdef _DEBUG @@ -465,6 +494,9 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) unknownEncodingData = 0; unknownEncodingHandlerData = 0; namespaceSeparator = '!'; +#ifdef XML_DTD + parentParser = 0; +#endif ns = 0; poolInit(&tempPool); poolInit(&temp2Pool); @@ -543,7 +575,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, void *oldHandlerArg = handlerArg; int oldDefaultExpandInternalEntities = defaultExpandInternalEntities; void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; - + parser = (ns ? XML_ParserCreateNS(encodingName, namespaceSeparator) : XML_ParserCreate(encodingName)); @@ -570,11 +602,24 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, if (oldExternalEntityRefHandlerArg != oldParser) externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; - if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) { - XML_ParserFree(parser); - return 0; +#ifdef XML_DTD + if (context) { +#endif /* XML_DTD */ + if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) { + XML_ParserFree(parser); + return 0; + } + processor = externalEntityInitProcessor; +#ifdef XML_DTD } - processor = externalEntityInitProcessor; + else { + dtdSwap(&dtd, oldDtd); + parentParser = oldParser; + XmlPrologStateInitExternalEntity(&prologState); + dtd.complete = 1; + hadExternalDoctype = 1; + } +#endif /* XML_DTD */ return parser; } @@ -611,6 +656,13 @@ void XML_ParserFree(XML_Parser parser) destroyBindings(inheritedBindings); poolDestroy(&tempPool); poolDestroy(&temp2Pool); +#ifdef XML_DTD + if (parentParser) { + if (hadExternalDoctype) + dtd.complete = 0; + dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd); + } +#endif /* XML_DTD */ dtdDestroy(&dtd); free((void *)atts); free(groupConnector); @@ -943,7 +995,10 @@ const XML_LChar *XML_ErrorString(int code) XML_T("encoding specified in XML declaration is incorrect"), XML_T("unclosed CDATA section"), XML_T("error in processing external entity reference"), - XML_T("document is not standalone") + XML_T("document is not standalone"), +#ifdef XML_DTD + XML_T("sorry, DTD not yet fully implemented"), +#endif }; if (code > 0 && code < sizeof(message)/sizeof(message[0])) return message[code]; @@ -1837,6 +1892,83 @@ enum XML_Error doCdataSection(XML_Parser parser, /* not reached */ } +#ifdef XML_DTD + +/* The idea here is to avoid using stack for each IGNORE section when +the whole file is parsed with one call. */ + +static +enum XML_Error ignoreSectionProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) +{ + enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr); + if (start) { + processor = prologProcessor; + return prologProcessor(parser, start, end, endPtr); + } + return result; +} + +/* startPtr gets set to non-null is the section is closed, and to null if +the section is not yet closed. */ + +static +enum XML_Error doIgnoreSection(XML_Parser parser, + const ENCODING *enc, + const char **startPtr, + const char *end, + const char **nextPtr) +{ + const char *next; + int tok; + const char *s = *startPtr; + const char **eventPP; + const char **eventEndPP; + if (enc == encoding) { + eventPP = &eventPtr; + *eventPP = s; + eventEndPP = &eventEndPtr; + } + else { + eventPP = &(openInternalEntities->internalEventPtr); + eventEndPP = &(openInternalEntities->internalEventEndPtr); + } + *eventPP = s; + *startPtr = 0; + tok = XmlIgnoreSectionTok(enc, s, end, &next); + *eventEndPP = next; + switch (tok) { + case XML_TOK_IGNORE_SECT: + if (defaultHandler) + reportDefault(parser, enc, s, next); + *startPtr = next; + return XML_ERROR_NONE; + case XML_TOK_INVALID: + *eventPP = next; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL_CHAR: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_PARTIAL: + case XML_TOK_NONE: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ + default: + abort(); + } + /* not reached */ +} + +#endif /* XML_DTD */ + static enum XML_Error initializeEncoding(XML_Parser parser) { @@ -1987,14 +2119,20 @@ prologProcessor(XML_Parser parser, case XML_TOK_INVALID: eventPtr = next; return XML_ERROR_INVALID_TOKEN; - case XML_TOK_NONE: - return XML_ERROR_NO_ELEMENTS; case XML_TOK_PARTIAL: return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; case XML_TOK_TRAILING_CR: eventPtr = s + encoding->minBytesPerChar; + /* fall through */ + case XML_TOK_NONE: +#ifdef XML_DTD + if (parentParser) { + hadExternalDoctype = 0; + return XML_ERROR_NONE; + } +#endif /* XML_DTD */ return XML_ERROR_NO_ELEMENTS; default: abort(); @@ -2008,11 +2146,30 @@ prologProcessor(XML_Parser parser, return result; } break; +#ifdef XML_DTD + case XML_ROLE_TEXT_DECL: + { + enum XML_Error result = processXmlDecl(parser, 1, s, next); + if (result != XML_ERROR_NONE) + return result; + } + break; +#endif /* XML_DTD */ case XML_ROLE_DOCTYPE_SYSTEM_ID: +#ifndef XML_DTD /* FIXME */ if (!dtd.standalone && notStandaloneHandler && !notStandaloneHandler(handlerArg)) return XML_ERROR_NOT_STANDALONE; +#endif /* not XML_DTD */ +#ifdef XML_DTD + dtd.systemId = poolStoreString(&dtd.pool, encoding, + s + encoding->minBytesPerChar, + next - encoding->minBytesPerChar); + if (!dtd.systemId) + return XML_ERROR_NO_MEMORY; + poolFinish(&dtd.pool); +#endif /* XML_DTD */ hadExternalDoctype = 1; break; case XML_ROLE_DOCTYPE_PUBLIC_ID: @@ -2031,10 +2188,33 @@ prologProcessor(XML_Parser parser, poolFinish(&dtd.pool); } break; +#ifdef XML_DTD + case XML_ROLE_DOCTYPE_CLOSE: + if (dtd.complete && hadExternalDoctype) { + dtd.complete = 0; + if (dtd.systemId /* && paramEntities*/ && externalEntityRefHandler) { + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + dtd.base, + dtd.systemId, + 0)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (!dtd.complete + && !dtd.standalone + && notStandaloneHandler + && !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + errorCode = XML_ERROR_NONE; + } + } + break; +#endif /* XML_DTD */ case XML_ROLE_INSTANCE_START: processor = contentProcessor; +#ifndef XML_DTD if (hadExternalDoctype) dtd.complete = 0; +#endif /* not XML_DTD */ return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: { @@ -2150,7 +2330,30 @@ prologProcessor(XML_Parser parser, } break; case XML_ROLE_PARAM_ENTITY_NAME: +#ifdef XML_DTD + { + const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + if (dtd.complete) { + declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + if (declEntity->name != name) { + poolDiscard(&dtd.pool); + declEntity = 0; + } + else + poolFinish(&dtd.pool); + } + else { + poolDiscard(&dtd.pool); + declEntity = 0; + } + } +#else /* not XML_DTD */ declEntity = 0; +#endif /* not XML_DTD */ break; case XML_ROLE_NOTATION_NAME: declNotationPublicId = 0; @@ -2215,6 +2418,22 @@ prologProcessor(XML_Parser parser, default: return XML_ERROR_SYNTAX; } +#ifdef XML_DTD + case XML_ROLE_INNER_PARAM_ENTITY_REF: + return XML_ERROR_NOT_IMPLEMENTED; + case XML_ROLE_IGNORE_SECT: + { + enum XML_Error result; + if (defaultHandler) + reportDefault(parser, encoding, s, next); + result = doIgnoreSection(parser, encoding, &next, end, nextPtr); + if (!next) { + processor = ignoreSectionProcessor; + return result; + } + } + break; +#endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: if (prologState.level >= groupSize) { if (groupSize) @@ -2241,11 +2460,44 @@ prologProcessor(XML_Parser parser, groupConnector[prologState.level] = '|'; break; case XML_ROLE_PARAM_ENTITY_REF: + if (!dtd.complete) + break; + dtd.complete = 0; +#ifdef XML_DTD + { + const XML_Char *name; + ENTITY *entity; + name = poolStoreString(&dtd.pool, encoding, + s + encoding->minBytesPerChar, + next - encoding->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); + poolDiscard(&dtd.pool); + if (!entity) + return XML_ERROR_UNDEFINED_ENTITY; + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->systemId && externalEntityRefHandler) { + entity->open = 1; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + dtd.base, + entity->systemId, + entity->publicId)) { + entity->open = 0; + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + } + entity->open = 0; + if (dtd.complete) + break; + } + } +#endif /* XML_DTD */ if (!dtd.standalone && notStandaloneHandler && !notStandaloneHandler(handlerArg)) return XML_ERROR_NOT_STANDALONE; - dtd.complete = 0; break; case XML_ROLE_NONE: switch (tok) { @@ -2498,6 +2750,15 @@ enum XML_Error storeEntityValue(XML_Parser parser, switch (tok) { case XML_TOK_PARAM_ENTITY_REF: eventPtr = entityTextPtr; +#ifdef XML_DTD + if (parentParser) { + if (declEntity) { + declEntity->textPtr = poolStart(&dtd.pool); + declEntity->textLen = 0; + } + return XML_ERROR_NOT_IMPLEMENTED; + } +#endif /* XML_DTD */ return XML_ERROR_SYNTAX; case XML_TOK_NONE: if (declEntity) { @@ -2922,11 +3183,27 @@ static int dtdInit(DTD *p) p->complete = 1; p->standalone = 0; p->base = 0; +#ifdef XML_DTD + hashTableInit(&(p->paramEntities)); + p->systemId = 0; +#endif /* XML_DTD */ p->defaultPrefix.name = 0; p->defaultPrefix.binding = 0; return 1; } +#ifdef XML_DTD + +static void dtdSwap(DTD *p1, DTD *p2) +{ + DTD tem; + memcpy(&tem, p1, sizeof(DTD)); + memcpy(p1, p2, sizeof(DTD)); + memcpy(p2, &tem, sizeof(DTD)); +} + +#endif /* XML_DTD */ + static void dtdDestroy(DTD *p) { HASH_TABLE_ITER iter; @@ -2939,6 +3216,9 @@ static void dtdDestroy(DTD *p) free(e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); +#ifdef XML_DTD + hashTableDestroy(&(p->paramEntities)); +#endif /* XML_DTD */ hashTableDestroy(&(p->elementTypes)); hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); @@ -2959,6 +3239,15 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) newDtd->base = tem; } +#ifdef XML_DTD + if (oldDtd->systemId) { + const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->systemId); + if (!tem) + return 0; + newDtd->systemId = tem; + } +#endif /* XML_DTD */ + /* Copy the prefix table. */ hashTableIterInit(&iter, &(oldDtd->prefixes)); @@ -3043,9 +3332,31 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) } } - /* Copy the entity table. */ + /* Copy the entity tables. */ + if (!copyEntityTable(&(newDtd->generalEntities), + &(newDtd->pool), + &(oldDtd->generalEntities))) + return 0; - hashTableIterInit(&iter, &(oldDtd->generalEntities)); +#ifdef XML_DTD + if (!copyEntityTable(&(newDtd->paramEntities), + &(newDtd->pool), + &(oldDtd->paramEntities))) + return 0; +#endif /* XML_DTD */ + + newDtd->complete = oldDtd->complete; + newDtd->standalone = oldDtd->standalone; + return 1; +} + +static int copyEntityTable(HASH_TABLE *newTable, + STRING_POOL *newPool, + const HASH_TABLE *oldTable) +{ + HASH_TABLE_ITER iter; + + hashTableIterInit(&iter, oldTable); for (;;) { ENTITY *newE; @@ -3053,44 +3364,42 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); if (!oldE) break; - name = poolCopyString(&(newDtd->pool), oldE->name); + name = poolCopyString(newPool, oldE->name); if (!name) return 0; - newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY)); + newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); if (!newE) return 0; if (oldE->systemId) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId); + const XML_Char *tem = poolCopyString(newPool, oldE->systemId); if (!tem) return 0; newE->systemId = tem; if (oldE->base) { +#if 0 /* FIXME */ if (oldE->base == oldDtd->base) newE->base = newDtd->base; - tem = poolCopyString(&(newDtd->pool), oldE->base); +#endif + tem = poolCopyString(newPool, oldE->base); if (!tem) return 0; newE->base = tem; } } else { - const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen); + const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); if (!tem) return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation); + const XML_Char *tem = poolCopyString(newPool, oldE->notation); if (!tem) return 0; newE->notation = tem; } } - - newDtd->complete = oldDtd->complete; - newDtd->standalone = oldDtd->standalone; - return 1; } static diff --git a/expat/xmltok/xmlrole.c b/expat/xmltok/xmlrole.c index b18e35eb..a01d364b 100755 --- a/expat/xmltok/xmlrole.c +++ b/expat/xmltok/xmlrole.c @@ -42,7 +42,16 @@ your version of this file under either the MPL or the GPL. #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif -typedef int PROLOG_HANDLER(struct prolog_state *state, +#ifdef XML_DTD +#define setTopLevel(state) \ + ((state)->handler = ((state)->documentEntity \ + ? internalSubset \ + : externalSubset1)) +#else /* not XML_DTD */ +#define setTopLevel(state) ((state)->handler = internalSubset) +#endif /* not XML_DTD */ + +typedef int PROLOG_HANDLER(PROLOG_STATE *state, int tok, const char *ptr, const char *end, @@ -59,11 +68,15 @@ static PROLOG_HANDLER attlist7, attlist8, attlist9, element0, element1, element2, element3, element4, element5, element6, element7, +#ifdef XML_DTD + externalSubset0, externalSubset1, + condSect0, condSect1, condSect2, +#endif /* XML_DTD */ declClose, error; static -int syntaxError(PROLOG_STATE *); +int common(PROLOG_STATE *state, int tok); static int prolog0(PROLOG_STATE *state, @@ -97,7 +110,7 @@ int prolog0(PROLOG_STATE *state, state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } static @@ -125,7 +138,7 @@ int prolog1(PROLOG_STATE *state, state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } static @@ -145,7 +158,7 @@ int prolog2(PROLOG_STATE *state, state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } static @@ -163,7 +176,7 @@ int doctype0(PROLOG_STATE *state, state->handler = doctype1; return XML_ROLE_DOCTYPE_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -193,7 +206,7 @@ int doctype1(PROLOG_STATE *state, } break; } - return syntaxError(state); + return common(state, tok); } static @@ -210,7 +223,7 @@ int doctype2(PROLOG_STATE *state, state->handler = doctype3; return XML_ROLE_DOCTYPE_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -227,7 +240,7 @@ int doctype3(PROLOG_STATE *state, state->handler = doctype4; return XML_ROLE_DOCTYPE_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -247,7 +260,7 @@ int doctype4(PROLOG_STATE *state, state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } - return syntaxError(state); + return common(state, tok); } static @@ -264,7 +277,7 @@ int doctype5(PROLOG_STATE *state, state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } - return syntaxError(state); + return common(state, tok); } static @@ -312,9 +325,52 @@ int internalSubset(PROLOG_STATE *state, state->handler = doctype5; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } +#ifdef XML_DTD + +static +int externalSubset0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + state->handler = externalSubset1; + if (tok == XML_TOK_XML_DECL) + return XML_ROLE_TEXT_DECL; + return externalSubset1(state, tok, ptr, end, enc); +} + +static +int externalSubset1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_COND_SECT_OPEN: + state->handler = condSect0; + return XML_ROLE_NONE; + case XML_TOK_COND_SECT_CLOSE: + if (state->includeLevel == 0) + break; + state->includeLevel -= 1; + return XML_ROLE_NONE; + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_CLOSE_BRACKET: + break; + default: + return internalSubset(state, tok, ptr, end, enc); + } + return common(state, tok); +} + +#endif /* XML_DTD */ + static int entity0(PROLOG_STATE *state, int tok, @@ -332,7 +388,7 @@ int entity0(PROLOG_STATE *state, state->handler = entity2; return XML_ROLE_GENERAL_ENTITY_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -349,7 +405,7 @@ int entity1(PROLOG_STATE *state, state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -376,7 +432,7 @@ int entity2(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -393,7 +449,7 @@ int entity3(PROLOG_STATE *state, state->handler = entity4; return XML_ROLE_ENTITY_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } @@ -411,7 +467,7 @@ int entity4(PROLOG_STATE *state, state->handler = entity5; return XML_ROLE_ENTITY_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -425,7 +481,7 @@ int entity5(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, "NDATA")) { @@ -434,7 +490,7 @@ int entity5(PROLOG_STATE *state, } break; } - return syntaxError(state); + return common(state, tok); } static @@ -451,7 +507,7 @@ int entity6(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_NOTATION_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -478,7 +534,7 @@ int entity7(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -495,7 +551,7 @@ int entity8(PROLOG_STATE *state, state->handler = entity9; return XML_ROLE_ENTITY_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -512,7 +568,7 @@ int entity9(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -529,7 +585,7 @@ int notation0(PROLOG_STATE *state, state->handler = notation1; return XML_ROLE_NOTATION_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -553,7 +609,7 @@ int notation1(PROLOG_STATE *state, } break; } - return syntaxError(state); + return common(state, tok); } static @@ -570,7 +626,7 @@ int notation2(PROLOG_STATE *state, state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -587,7 +643,7 @@ int notation3(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -604,10 +660,10 @@ int notation4(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NOTATION_NO_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -625,7 +681,7 @@ int attlist0(PROLOG_STATE *state, state->handler = attlist1; return XML_ROLE_ATTLIST_ELEMENT_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -639,14 +695,14 @@ int attlist1(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist2; return XML_ROLE_ATTRIBUTE_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -687,7 +743,7 @@ int attlist2(PROLOG_STATE *state, state->handler = attlist3; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -706,7 +762,7 @@ int attlist3(PROLOG_STATE *state, state->handler = attlist4; return XML_ROLE_ATTRIBUTE_ENUM_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -726,7 +782,7 @@ int attlist4(PROLOG_STATE *state, state->handler = attlist3; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -743,7 +799,7 @@ int attlist5(PROLOG_STATE *state, state->handler = attlist6; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } @@ -761,7 +817,7 @@ int attlist6(PROLOG_STATE *state, state->handler = attlist7; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -781,7 +837,7 @@ int attlist7(PROLOG_STATE *state, state->handler = attlist6; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } /* default value */ @@ -819,7 +875,7 @@ int attlist8(PROLOG_STATE *state, state->handler = attlist1; return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -836,7 +892,7 @@ int attlist9(PROLOG_STATE *state, state->handler = attlist1; return XML_ROLE_FIXED_ATTRIBUTE_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -854,7 +910,7 @@ int element0(PROLOG_STATE *state, state->handler = element1; return XML_ROLE_ELEMENT_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -882,7 +938,7 @@ int element1(PROLOG_STATE *state, state->level = 1; return XML_ROLE_GROUP_OPEN; } - return syntaxError(state); + return common(state, tok); } static @@ -921,7 +977,7 @@ int element2(PROLOG_STATE *state, state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } - return syntaxError(state); + return common(state, tok); } static @@ -942,7 +998,7 @@ int element3(PROLOG_STATE *state, state->handler = element4; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -960,7 +1016,7 @@ int element4(PROLOG_STATE *state, state->handler = element5; return XML_ROLE_CONTENT_ELEMENT; } - return syntaxError(state); + return common(state, tok); } static @@ -980,7 +1036,7 @@ int element5(PROLOG_STATE *state, state->handler = element4; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -1010,7 +1066,7 @@ int element6(PROLOG_STATE *state, state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } - return syntaxError(state); + return common(state, tok); } static @@ -1050,9 +1106,72 @@ int element7(PROLOG_STATE *state, state->handler = element6; return XML_ROLE_GROUP_CHOICE; } - return syntaxError(state); + return common(state, tok); } +#ifdef XML_DTD + +static +int condSect0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_NAME: + if (XmlNameMatchesAscii(enc, ptr, "INCLUDE")) { + state->handler = condSect1; + return XML_ROLE_NONE; + } + if (XmlNameMatchesAscii(enc, ptr, "IGNORE")) { + state->handler = condSect2; + return XML_ROLE_NONE; + } + break; + } + return common(state, tok); +} + +static +int condSect1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_OPEN_BRACKET: + state->handler = externalSubset1; + state->includeLevel += 1; + return XML_ROLE_NONE; + } + return common(state, tok); +} + +static +int condSect2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_OPEN_BRACKET: + state->handler = externalSubset1; + return XML_ROLE_IGNORE_SECT; + } + return common(state, tok); +} + +#endif /* XML_DTD */ + static int declClose(PROLOG_STATE *state, int tok, @@ -1064,10 +1183,10 @@ int declClose(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } #if 0 @@ -1086,23 +1205,27 @@ int ignore(PROLOG_STATE *state, default: return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } #endif static int error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { return XML_ROLE_NONE; } static -int syntaxError(PROLOG_STATE *state) +int common(PROLOG_STATE *state, int tok) { +#ifdef XML_DTD + if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) + return XML_ROLE_INNER_PARAM_ENTITY_REF; +#endif state->handler = error; return XML_ROLE_ERROR; } @@ -1110,4 +1233,26 @@ int syntaxError(PROLOG_STATE *state) void XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; +#ifdef XML_DTD + state->documentEntity = 1; + state->includeLevel = 0; +#endif /* XML_DTD */ } + +#ifdef XML_DTD + +void XmlPrologStateInitExternalEntity(PROLOG_STATE *state) +{ + state->handler = externalSubset0; + state->documentEntity = 0; + state->includeLevel = 0; +} + +void XmlPrologStateInitInternalEntity(PROLOG_STATE *state) +{ + state->handler = externalSubset1; + state->documentEntity = 0; + state->includeLevel = 0; +} + +#endif /* XML_DTD */ diff --git a/expat/xmltok/xmlrole.h b/expat/xmltok/xmlrole.h index 877c40ba..cb271086 100755 --- a/expat/xmltok/xmlrole.h +++ b/expat/xmltok/xmlrole.h @@ -87,6 +87,11 @@ enum { XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_PLUS, +#ifdef XML_DTD + XML_ROLE_TEXT_DECL, + XML_ROLE_IGNORE_SECT, + XML_ROLE_INNER_PARAM_ENTITY_REF, +#endif /* XML_DTD */ XML_ROLE_PARAM_ENTITY_REF }; @@ -97,9 +102,17 @@ typedef struct prolog_state { const char *end, const ENCODING *enc); unsigned level; +#ifdef XML_DTD + unsigned includeLevel; + int documentEntity; +#endif /* XML_DTD */ } PROLOG_STATE; void XMLTOKAPI XmlPrologStateInit(PROLOG_STATE *); +#ifdef XML_DTD +void XMLTOKAPI XmlPrologStateInitInternalEntity(PROLOG_STATE *); +void XMLTOKAPI XmlPrologStateInitExternalEntity(PROLOG_STATE *); +#endif /* XML_DTD */ #define XmlTokenRole(state, tok, ptr, end, enc) \ (((state)->handler)(state, tok, ptr, end, enc)) diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index 23d3a946..b632181f 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -32,8 +32,15 @@ your version of this file under either the MPL or the GPL. #include "xmltok.h" #include "nametab.h" +#ifdef XML_DTD +#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) +#else +#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ +#endif + #define VTABLE1 \ - { PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \ + { PREFIX(prologTok), PREFIX(contentTok), \ + PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ PREFIX(sameName), \ PREFIX(nameMatchesAscii), \ @@ -1397,9 +1404,11 @@ int initScan(const ENCODING **encodingTable, encPtr = enc->encPtr; if (ptr + 1 == end) { /* only a single byte available for auto-detection */ +#ifndef XML_DTD /* FIXME */ /* a well-formed document entity must have more than one byte */ if (state != XML_CONTENT_STATE) return XML_TOK_PARTIAL; +#endif /* so we're parsing an external text entity... */ /* if UTF-16 was externally specified, then we need at least 2 bytes */ switch (INIT_ENC_INDEX(enc)) { diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index e0fe250f..24fbc631 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -108,10 +108,22 @@ extern "C" { for a name with a colon. */ #define XML_TOK_PREFIXED_NAME 41 +#ifdef XML_DTD +#define XML_TOK_IGNORE_SECT 42 +#endif /* XML_DTD */ + +#ifdef XML_DTD +#define XML_N_STATES 4 +#else /* not XML_DTD */ #define XML_N_STATES 3 +#endif /* not XML_DTD */ + #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 #define XML_CDATA_SECTION_STATE 2 +#ifdef XML_DTD +#define XML_IGNORE_SECTION_STATE 3 +#endif /* XML_DTD */ #define XML_N_LITERAL_TYPES 2 #define XML_ATTRIBUTE_VALUE_LITERAL 0 @@ -211,6 +223,13 @@ literals, comments and processing instructions. #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) +#ifdef XML_DTD + +#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) + +#endif /* XML_DTD */ + /* This is used for performing a 2nd-level tokenization on the content of a literal that has already been returned by XmlTok. */ diff --git a/expat/xmltok/xmltok_impl.c b/expat/xmltok/xmltok_impl.c index 40962654..4228fc0c 100755 --- a/expat/xmltok/xmltok_impl.c +++ b/expat/xmltok/xmltok_impl.c @@ -1326,6 +1326,61 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end return XML_TOK_DATA_CHARS; } +#ifdef XML_DTD + +static +int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) +{ + int level = 0; + if (MINBPC(enc) > 1) { + size_t n = end - ptr; + if (n & (MINBPC(enc) - 1)) { + n &= ~(MINBPC(enc) - 1); + end = ptr + n; + } + } + while (ptr != end) { + switch (BYTE_TYPE(enc, ptr)) { + INVALID_CASES(ptr, nextTokPtr) + case BT_LT: + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, '!')) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, '[')) { + ++level; + ptr += MINBPC(enc); + } + } + break; + case BT_RSQB: + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ']')) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, '>')) { + ptr += MINBPC(enc); + if (level == 0) { + *nextTokPtr = ptr; + return XML_TOK_IGNORE_SECT; + } + --level; + } + } + break; + default: + ptr += MINBPC(enc); + break; + } + } + return XML_TOK_PARTIAL; +} + +#endif /* XML_DTD */ + static int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr)