First part of changes for DTD support.

This commit is contained in:
James Clark 1999-06-25 10:58:20 +00:00
parent e157ebf618
commit 7488411c11
6 changed files with 628 additions and 78 deletions

View file

@ -28,6 +28,10 @@ GPL. If you do not delete the provisions above, a recipient may use
your version of this file under either the MPL or the GPL.
*/
/* FIXME
need current base rather than dtd.base
check parse state at end of external param entity
*/
#include "xmldef.h"
#include "xmlparse.h"
@ -61,6 +65,10 @@ typedef char ICHAR;
#endif
#ifdef XML_DTD
#define XML_ERROR_NOT_IMPLEMENTED (XML_ERROR_NOT_STANDALONE + 1)
#endif
#ifdef XML_UNICODE_WCHAR_T
#define XML_T(x) L ## x
#else
@ -170,6 +178,10 @@ typedef struct {
int complete;
int standalone;
const XML_Char *base;
#ifdef XML_DTD
HASH_TABLE paramEntities;
const XML_Char *systemId;
#endif /* XML_DTD */
PREFIX defaultPrefix;
} DTD;
@ -189,6 +201,9 @@ static Processor prologProcessor;
static Processor prologInitProcessor;
static Processor contentProcessor;
static Processor cdataSectionProcessor;
#ifdef XML_DTD
static Processor ignoreSectionProcessor;
#endif /* XML_DTD */
static Processor epilogProcessor;
static Processor errorProcessor;
static Processor externalEntityInitProcessor;
@ -207,6 +222,10 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
const char *start, const char *end, const char **endPtr);
static enum XML_Error
doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
#ifdef XML_DTD
static enum XML_Error
doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
#endif /* XML_DTD */
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
static
@ -237,6 +256,10 @@ static void normalizePublicId(XML_Char *s);
static int dtdInit(DTD *);
static void dtdDestroy(DTD *);
static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
#ifdef XML_DTD
static void dtdSwap(DTD *, DTD *);
#endif /* XML_DTD */
static void poolInit(STRING_POOL *);
static void poolClear(STRING_POOL *);
static void poolDestroy(STRING_POOL *);
@ -329,6 +352,9 @@ typedef struct {
unsigned m_groupSize;
int m_hadExternalDoctype;
XML_Char m_namespaceSeparator;
#ifdef XML_DTD
XML_Parser m_parentParser;
#endif
} Parser;
#define userData (((Parser *)parser)->m_userData)
@ -396,6 +422,9 @@ typedef struct {
#define groupSize (((Parser *)parser)->m_groupSize)
#define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
#define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
#ifdef XML_DTD
#define parentParser (((Parser *)parser)->m_parentParser)
#endif
#ifdef _MSC_VER
#ifdef _DEBUG
@ -465,6 +494,9 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName)
unknownEncodingData = 0;
unknownEncodingHandlerData = 0;
namespaceSeparator = '!';
#ifdef XML_DTD
parentParser = 0;
#endif
ns = 0;
poolInit(&tempPool);
poolInit(&temp2Pool);
@ -543,7 +575,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
void *oldHandlerArg = handlerArg;
int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
parser = (ns
? XML_ParserCreateNS(encodingName, namespaceSeparator)
: XML_ParserCreate(encodingName));
@ -570,11 +602,24 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
if (oldExternalEntityRefHandlerArg != oldParser)
externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
XML_ParserFree(parser);
return 0;
#ifdef XML_DTD
if (context) {
#endif /* XML_DTD */
if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
XML_ParserFree(parser);
return 0;
}
processor = externalEntityInitProcessor;
#ifdef XML_DTD
}
processor = externalEntityInitProcessor;
else {
dtdSwap(&dtd, oldDtd);
parentParser = oldParser;
XmlPrologStateInitExternalEntity(&prologState);
dtd.complete = 1;
hadExternalDoctype = 1;
}
#endif /* XML_DTD */
return parser;
}
@ -611,6 +656,13 @@ void XML_ParserFree(XML_Parser parser)
destroyBindings(inheritedBindings);
poolDestroy(&tempPool);
poolDestroy(&temp2Pool);
#ifdef XML_DTD
if (parentParser) {
if (hadExternalDoctype)
dtd.complete = 0;
dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd);
}
#endif /* XML_DTD */
dtdDestroy(&dtd);
free((void *)atts);
free(groupConnector);
@ -943,7 +995,10 @@ const XML_LChar *XML_ErrorString(int code)
XML_T("encoding specified in XML declaration is incorrect"),
XML_T("unclosed CDATA section"),
XML_T("error in processing external entity reference"),
XML_T("document is not standalone")
XML_T("document is not standalone"),
#ifdef XML_DTD
XML_T("sorry, DTD not yet fully implemented"),
#endif
};
if (code > 0 && code < sizeof(message)/sizeof(message[0]))
return message[code];
@ -1837,6 +1892,83 @@ enum XML_Error doCdataSection(XML_Parser parser,
/* not reached */
}
#ifdef XML_DTD
/* The idea here is to avoid using stack for each IGNORE section when
the whole file is parsed with one call. */
static
enum XML_Error ignoreSectionProcessor(XML_Parser parser,
const char *start,
const char *end,
const char **endPtr)
{
enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr);
if (start) {
processor = prologProcessor;
return prologProcessor(parser, start, end, endPtr);
}
return result;
}
/* startPtr gets set to non-null is the section is closed, and to null if
the section is not yet closed. */
static
enum XML_Error doIgnoreSection(XML_Parser parser,
const ENCODING *enc,
const char **startPtr,
const char *end,
const char **nextPtr)
{
const char *next;
int tok;
const char *s = *startPtr;
const char **eventPP;
const char **eventEndPP;
if (enc == encoding) {
eventPP = &eventPtr;
*eventPP = s;
eventEndPP = &eventEndPtr;
}
else {
eventPP = &(openInternalEntities->internalEventPtr);
eventEndPP = &(openInternalEntities->internalEventEndPtr);
}
*eventPP = s;
*startPtr = 0;
tok = XmlIgnoreSectionTok(enc, s, end, &next);
*eventEndPP = next;
switch (tok) {
case XML_TOK_IGNORE_SECT:
if (defaultHandler)
reportDefault(parser, enc, s, next);
*startPtr = next;
return XML_ERROR_NONE;
case XML_TOK_INVALID:
*eventPP = next;
return XML_ERROR_INVALID_TOKEN;
case XML_TOK_PARTIAL_CHAR:
if (nextPtr) {
*nextPtr = s;
return XML_ERROR_NONE;
}
return XML_ERROR_PARTIAL_CHAR;
case XML_TOK_PARTIAL:
case XML_TOK_NONE:
if (nextPtr) {
*nextPtr = s;
return XML_ERROR_NONE;
}
return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
default:
abort();
}
/* not reached */
}
#endif /* XML_DTD */
static enum XML_Error
initializeEncoding(XML_Parser parser)
{
@ -1987,14 +2119,20 @@ prologProcessor(XML_Parser parser,
case XML_TOK_INVALID:
eventPtr = next;
return XML_ERROR_INVALID_TOKEN;
case XML_TOK_NONE:
return XML_ERROR_NO_ELEMENTS;
case XML_TOK_PARTIAL:
return XML_ERROR_UNCLOSED_TOKEN;
case XML_TOK_PARTIAL_CHAR:
return XML_ERROR_PARTIAL_CHAR;
case XML_TOK_TRAILING_CR:
eventPtr = s + encoding->minBytesPerChar;
/* fall through */
case XML_TOK_NONE:
#ifdef XML_DTD
if (parentParser) {
hadExternalDoctype = 0;
return XML_ERROR_NONE;
}
#endif /* XML_DTD */
return XML_ERROR_NO_ELEMENTS;
default:
abort();
@ -2008,11 +2146,30 @@ prologProcessor(XML_Parser parser,
return result;
}
break;
#ifdef XML_DTD
case XML_ROLE_TEXT_DECL:
{
enum XML_Error result = processXmlDecl(parser, 1, s, next);
if (result != XML_ERROR_NONE)
return result;
}
break;
#endif /* XML_DTD */
case XML_ROLE_DOCTYPE_SYSTEM_ID:
#ifndef XML_DTD /* FIXME */
if (!dtd.standalone
&& notStandaloneHandler
&& !notStandaloneHandler(handlerArg))
return XML_ERROR_NOT_STANDALONE;
#endif /* not XML_DTD */
#ifdef XML_DTD
dtd.systemId = poolStoreString(&dtd.pool, encoding,
s + encoding->minBytesPerChar,
next - encoding->minBytesPerChar);
if (!dtd.systemId)
return XML_ERROR_NO_MEMORY;
poolFinish(&dtd.pool);
#endif /* XML_DTD */
hadExternalDoctype = 1;
break;
case XML_ROLE_DOCTYPE_PUBLIC_ID:
@ -2031,10 +2188,33 @@ prologProcessor(XML_Parser parser,
poolFinish(&dtd.pool);
}
break;
#ifdef XML_DTD
case XML_ROLE_DOCTYPE_CLOSE:
if (dtd.complete && hadExternalDoctype) {
dtd.complete = 0;
if (dtd.systemId /* && paramEntities*/ && externalEntityRefHandler) {
if (!externalEntityRefHandler(externalEntityRefHandlerArg,
0,
dtd.base,
dtd.systemId,
0))
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
if (!dtd.complete
&& !dtd.standalone
&& notStandaloneHandler
&& !notStandaloneHandler(handlerArg))
return XML_ERROR_NOT_STANDALONE;
errorCode = XML_ERROR_NONE;
}
}
break;
#endif /* XML_DTD */
case XML_ROLE_INSTANCE_START:
processor = contentProcessor;
#ifndef XML_DTD
if (hadExternalDoctype)
dtd.complete = 0;
#endif /* not XML_DTD */
return contentProcessor(parser, s, end, nextPtr);
case XML_ROLE_ATTLIST_ELEMENT_NAME:
{
@ -2150,7 +2330,30 @@ prologProcessor(XML_Parser parser,
}
break;
case XML_ROLE_PARAM_ENTITY_NAME:
#ifdef XML_DTD
{
const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next);
if (!name)
return XML_ERROR_NO_MEMORY;
if (dtd.complete) {
declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY));
if (!declEntity)
return XML_ERROR_NO_MEMORY;
if (declEntity->name != name) {
poolDiscard(&dtd.pool);
declEntity = 0;
}
else
poolFinish(&dtd.pool);
}
else {
poolDiscard(&dtd.pool);
declEntity = 0;
}
}
#else /* not XML_DTD */
declEntity = 0;
#endif /* not XML_DTD */
break;
case XML_ROLE_NOTATION_NAME:
declNotationPublicId = 0;
@ -2215,6 +2418,22 @@ prologProcessor(XML_Parser parser,
default:
return XML_ERROR_SYNTAX;
}
#ifdef XML_DTD
case XML_ROLE_INNER_PARAM_ENTITY_REF:
return XML_ERROR_NOT_IMPLEMENTED;
case XML_ROLE_IGNORE_SECT:
{
enum XML_Error result;
if (defaultHandler)
reportDefault(parser, encoding, s, next);
result = doIgnoreSection(parser, encoding, &next, end, nextPtr);
if (!next) {
processor = ignoreSectionProcessor;
return result;
}
}
break;
#endif /* XML_DTD */
case XML_ROLE_GROUP_OPEN:
if (prologState.level >= groupSize) {
if (groupSize)
@ -2241,11 +2460,44 @@ prologProcessor(XML_Parser parser,
groupConnector[prologState.level] = '|';
break;
case XML_ROLE_PARAM_ENTITY_REF:
if (!dtd.complete)
break;
dtd.complete = 0;
#ifdef XML_DTD
{
const XML_Char *name;
ENTITY *entity;
name = poolStoreString(&dtd.pool, encoding,
s + encoding->minBytesPerChar,
next - encoding->minBytesPerChar);
if (!name)
return XML_ERROR_NO_MEMORY;
entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
poolDiscard(&dtd.pool);
if (!entity)
return XML_ERROR_UNDEFINED_ENTITY;
if (entity->open)
return XML_ERROR_RECURSIVE_ENTITY_REF;
if (entity->systemId && externalEntityRefHandler) {
entity->open = 1;
if (!externalEntityRefHandler(externalEntityRefHandlerArg,
0,
dtd.base,
entity->systemId,
entity->publicId)) {
entity->open = 0;
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
}
entity->open = 0;
if (dtd.complete)
break;
}
}
#endif /* XML_DTD */
if (!dtd.standalone
&& notStandaloneHandler
&& !notStandaloneHandler(handlerArg))
return XML_ERROR_NOT_STANDALONE;
dtd.complete = 0;
break;
case XML_ROLE_NONE:
switch (tok) {
@ -2498,6 +2750,15 @@ enum XML_Error storeEntityValue(XML_Parser parser,
switch (tok) {
case XML_TOK_PARAM_ENTITY_REF:
eventPtr = entityTextPtr;
#ifdef XML_DTD
if (parentParser) {
if (declEntity) {
declEntity->textPtr = poolStart(&dtd.pool);
declEntity->textLen = 0;
}
return XML_ERROR_NOT_IMPLEMENTED;
}
#endif /* XML_DTD */
return XML_ERROR_SYNTAX;
case XML_TOK_NONE:
if (declEntity) {
@ -2922,11 +3183,27 @@ static int dtdInit(DTD *p)
p->complete = 1;
p->standalone = 0;
p->base = 0;
#ifdef XML_DTD
hashTableInit(&(p->paramEntities));
p->systemId = 0;
#endif /* XML_DTD */
p->defaultPrefix.name = 0;
p->defaultPrefix.binding = 0;
return 1;
}
#ifdef XML_DTD
static void dtdSwap(DTD *p1, DTD *p2)
{
DTD tem;
memcpy(&tem, p1, sizeof(DTD));
memcpy(p1, p2, sizeof(DTD));
memcpy(p2, &tem, sizeof(DTD));
}
#endif /* XML_DTD */
static void dtdDestroy(DTD *p)
{
HASH_TABLE_ITER iter;
@ -2939,6 +3216,9 @@ static void dtdDestroy(DTD *p)
free(e->defaultAtts);
}
hashTableDestroy(&(p->generalEntities));
#ifdef XML_DTD
hashTableDestroy(&(p->paramEntities));
#endif /* XML_DTD */
hashTableDestroy(&(p->elementTypes));
hashTableDestroy(&(p->attributeIds));
hashTableDestroy(&(p->prefixes));
@ -2959,6 +3239,15 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
newDtd->base = tem;
}
#ifdef XML_DTD
if (oldDtd->systemId) {
const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->systemId);
if (!tem)
return 0;
newDtd->systemId = tem;
}
#endif /* XML_DTD */
/* Copy the prefix table. */
hashTableIterInit(&iter, &(oldDtd->prefixes));
@ -3043,9 +3332,31 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
}
}
/* Copy the entity table. */
/* Copy the entity tables. */
if (!copyEntityTable(&(newDtd->generalEntities),
&(newDtd->pool),
&(oldDtd->generalEntities)))
return 0;
hashTableIterInit(&iter, &(oldDtd->generalEntities));
#ifdef XML_DTD
if (!copyEntityTable(&(newDtd->paramEntities),
&(newDtd->pool),
&(oldDtd->paramEntities)))
return 0;
#endif /* XML_DTD */
newDtd->complete = oldDtd->complete;
newDtd->standalone = oldDtd->standalone;
return 1;
}
static int copyEntityTable(HASH_TABLE *newTable,
STRING_POOL *newPool,
const HASH_TABLE *oldTable)
{
HASH_TABLE_ITER iter;
hashTableIterInit(&iter, oldTable);
for (;;) {
ENTITY *newE;
@ -3053,44 +3364,42 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
if (!oldE)
break;
name = poolCopyString(&(newDtd->pool), oldE->name);
name = poolCopyString(newPool, oldE->name);
if (!name)
return 0;
newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY));
if (!newE)
return 0;
if (oldE->systemId) {
const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
if (!tem)
return 0;
newE->systemId = tem;
if (oldE->base) {
#if 0 /* FIXME */
if (oldE->base == oldDtd->base)
newE->base = newDtd->base;
tem = poolCopyString(&(newDtd->pool), oldE->base);
#endif
tem = poolCopyString(newPool, oldE->base);
if (!tem)
return 0;
newE->base = tem;
}
}
else {
const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
if (!tem)
return 0;
newE->textPtr = tem;
newE->textLen = oldE->textLen;
}
if (oldE->notation) {
const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
const XML_Char *tem = poolCopyString(newPool, oldE->notation);
if (!tem)
return 0;
newE->notation = tem;
}
}
newDtd->complete = oldDtd->complete;
newDtd->standalone = oldDtd->standalone;
return 1;
}
static

View file

@ -42,7 +42,16 @@ your version of this file under either the MPL or the GPL.
#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
#endif
typedef int PROLOG_HANDLER(struct prolog_state *state,
#ifdef XML_DTD
#define setTopLevel(state) \
((state)->handler = ((state)->documentEntity \
? internalSubset \
: externalSubset1))
#else /* not XML_DTD */
#define setTopLevel(state) ((state)->handler = internalSubset)
#endif /* not XML_DTD */
typedef int PROLOG_HANDLER(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
@ -59,11 +68,15 @@ static PROLOG_HANDLER
attlist7, attlist8, attlist9,
element0, element1, element2, element3, element4, element5, element6,
element7,
#ifdef XML_DTD
externalSubset0, externalSubset1,
condSect0, condSect1, condSect2,
#endif /* XML_DTD */
declClose,
error;
static
int syntaxError(PROLOG_STATE *);
int common(PROLOG_STATE *state, int tok);
static
int prolog0(PROLOG_STATE *state,
@ -97,7 +110,7 @@ int prolog0(PROLOG_STATE *state,
state->handler = error;
return XML_ROLE_INSTANCE_START;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -125,7 +138,7 @@ int prolog1(PROLOG_STATE *state,
state->handler = error;
return XML_ROLE_INSTANCE_START;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -145,7 +158,7 @@ int prolog2(PROLOG_STATE *state,
state->handler = error;
return XML_ROLE_INSTANCE_START;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -163,7 +176,7 @@ int doctype0(PROLOG_STATE *state,
state->handler = doctype1;
return XML_ROLE_DOCTYPE_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -193,7 +206,7 @@ int doctype1(PROLOG_STATE *state,
}
break;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -210,7 +223,7 @@ int doctype2(PROLOG_STATE *state,
state->handler = doctype3;
return XML_ROLE_DOCTYPE_PUBLIC_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -227,7 +240,7 @@ int doctype3(PROLOG_STATE *state,
state->handler = doctype4;
return XML_ROLE_DOCTYPE_SYSTEM_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -247,7 +260,7 @@ int doctype4(PROLOG_STATE *state,
state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -264,7 +277,7 @@ int doctype5(PROLOG_STATE *state,
state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -312,9 +325,52 @@ int internalSubset(PROLOG_STATE *state,
state->handler = doctype5;
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
#ifdef XML_DTD
static
int externalSubset0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
state->handler = externalSubset1;
if (tok == XML_TOK_XML_DECL)
return XML_ROLE_TEXT_DECL;
return externalSubset1(state, tok, ptr, end, enc);
}
static
int externalSubset1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_COND_SECT_OPEN:
state->handler = condSect0;
return XML_ROLE_NONE;
case XML_TOK_COND_SECT_CLOSE:
if (state->includeLevel == 0)
break;
state->includeLevel -= 1;
return XML_ROLE_NONE;
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_CLOSE_BRACKET:
break;
default:
return internalSubset(state, tok, ptr, end, enc);
}
return common(state, tok);
}
#endif /* XML_DTD */
static
int entity0(PROLOG_STATE *state,
int tok,
@ -332,7 +388,7 @@ int entity0(PROLOG_STATE *state,
state->handler = entity2;
return XML_ROLE_GENERAL_ENTITY_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -349,7 +405,7 @@ int entity1(PROLOG_STATE *state,
state->handler = entity7;
return XML_ROLE_PARAM_ENTITY_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -376,7 +432,7 @@ int entity2(PROLOG_STATE *state,
state->handler = declClose;
return XML_ROLE_ENTITY_VALUE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -393,7 +449,7 @@ int entity3(PROLOG_STATE *state,
state->handler = entity4;
return XML_ROLE_ENTITY_PUBLIC_ID;
}
return syntaxError(state);
return common(state, tok);
}
@ -411,7 +467,7 @@ int entity4(PROLOG_STATE *state,
state->handler = entity5;
return XML_ROLE_ENTITY_SYSTEM_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -425,7 +481,7 @@ int entity5(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
setTopLevel(state);
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, "NDATA")) {
@ -434,7 +490,7 @@ int entity5(PROLOG_STATE *state,
}
break;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -451,7 +507,7 @@ int entity6(PROLOG_STATE *state,
state->handler = declClose;
return XML_ROLE_ENTITY_NOTATION_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -478,7 +534,7 @@ int entity7(PROLOG_STATE *state,
state->handler = declClose;
return XML_ROLE_ENTITY_VALUE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -495,7 +551,7 @@ int entity8(PROLOG_STATE *state,
state->handler = entity9;
return XML_ROLE_ENTITY_PUBLIC_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -512,7 +568,7 @@ int entity9(PROLOG_STATE *state,
state->handler = declClose;
return XML_ROLE_ENTITY_SYSTEM_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -529,7 +585,7 @@ int notation0(PROLOG_STATE *state,
state->handler = notation1;
return XML_ROLE_NOTATION_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -553,7 +609,7 @@ int notation1(PROLOG_STATE *state,
}
break;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -570,7 +626,7 @@ int notation2(PROLOG_STATE *state,
state->handler = notation4;
return XML_ROLE_NOTATION_PUBLIC_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -587,7 +643,7 @@ int notation3(PROLOG_STATE *state,
state->handler = declClose;
return XML_ROLE_NOTATION_SYSTEM_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -604,10 +660,10 @@ int notation4(PROLOG_STATE *state,
state->handler = declClose;
return XML_ROLE_NOTATION_SYSTEM_ID;
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
setTopLevel(state);
return XML_ROLE_NOTATION_NO_SYSTEM_ID;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -625,7 +681,7 @@ int attlist0(PROLOG_STATE *state,
state->handler = attlist1;
return XML_ROLE_ATTLIST_ELEMENT_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -639,14 +695,14 @@ int attlist1(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
setTopLevel(state);
return XML_ROLE_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = attlist2;
return XML_ROLE_ATTRIBUTE_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -687,7 +743,7 @@ int attlist2(PROLOG_STATE *state,
state->handler = attlist3;
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -706,7 +762,7 @@ int attlist3(PROLOG_STATE *state,
state->handler = attlist4;
return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -726,7 +782,7 @@ int attlist4(PROLOG_STATE *state,
state->handler = attlist3;
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -743,7 +799,7 @@ int attlist5(PROLOG_STATE *state,
state->handler = attlist6;
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
@ -761,7 +817,7 @@ int attlist6(PROLOG_STATE *state,
state->handler = attlist7;
return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -781,7 +837,7 @@ int attlist7(PROLOG_STATE *state,
state->handler = attlist6;
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
/* default value */
@ -819,7 +875,7 @@ int attlist8(PROLOG_STATE *state,
state->handler = attlist1;
return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -836,7 +892,7 @@ int attlist9(PROLOG_STATE *state,
state->handler = attlist1;
return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -854,7 +910,7 @@ int element0(PROLOG_STATE *state,
state->handler = element1;
return XML_ROLE_ELEMENT_NAME;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -882,7 +938,7 @@ int element1(PROLOG_STATE *state,
state->level = 1;
return XML_ROLE_GROUP_OPEN;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -921,7 +977,7 @@ int element2(PROLOG_STATE *state,
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT_PLUS;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -942,7 +998,7 @@ int element3(PROLOG_STATE *state,
state->handler = element4;
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -960,7 +1016,7 @@ int element4(PROLOG_STATE *state,
state->handler = element5;
return XML_ROLE_CONTENT_ELEMENT;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -980,7 +1036,7 @@ int element5(PROLOG_STATE *state,
state->handler = element4;
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -1010,7 +1066,7 @@ int element6(PROLOG_STATE *state,
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT_PLUS;
}
return syntaxError(state);
return common(state, tok);
}
static
@ -1050,9 +1106,72 @@ int element7(PROLOG_STATE *state,
state->handler = element6;
return XML_ROLE_GROUP_CHOICE;
}
return syntaxError(state);
return common(state, tok);
}
#ifdef XML_DTD
static
int condSect0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, "INCLUDE")) {
state->handler = condSect1;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, "IGNORE")) {
state->handler = condSect2;
return XML_ROLE_NONE;
}
break;
}
return common(state, tok);
}
static
int condSect1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_OPEN_BRACKET:
state->handler = externalSubset1;
state->includeLevel += 1;
return XML_ROLE_NONE;
}
return common(state, tok);
}
static
int condSect2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_OPEN_BRACKET:
state->handler = externalSubset1;
return XML_ROLE_IGNORE_SECT;
}
return common(state, tok);
}
#endif /* XML_DTD */
static
int declClose(PROLOG_STATE *state,
int tok,
@ -1064,10 +1183,10 @@ int declClose(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
setTopLevel(state);
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
#if 0
@ -1086,23 +1205,27 @@ int ignore(PROLOG_STATE *state,
default:
return XML_ROLE_NONE;
}
return syntaxError(state);
return common(state, tok);
}
#endif
static
int error(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
return XML_ROLE_NONE;
}
static
int syntaxError(PROLOG_STATE *state)
int common(PROLOG_STATE *state, int tok)
{
#ifdef XML_DTD
if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
return XML_ROLE_INNER_PARAM_ENTITY_REF;
#endif
state->handler = error;
return XML_ROLE_ERROR;
}
@ -1110,4 +1233,26 @@ int syntaxError(PROLOG_STATE *state)
void XmlPrologStateInit(PROLOG_STATE *state)
{
state->handler = prolog0;
#ifdef XML_DTD
state->documentEntity = 1;
state->includeLevel = 0;
#endif /* XML_DTD */
}
#ifdef XML_DTD
void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
{
state->handler = externalSubset0;
state->documentEntity = 0;
state->includeLevel = 0;
}
void XmlPrologStateInitInternalEntity(PROLOG_STATE *state)
{
state->handler = externalSubset1;
state->documentEntity = 0;
state->includeLevel = 0;
}
#endif /* XML_DTD */

View file

@ -87,6 +87,11 @@ enum {
XML_ROLE_CONTENT_ELEMENT_REP,
XML_ROLE_CONTENT_ELEMENT_OPT,
XML_ROLE_CONTENT_ELEMENT_PLUS,
#ifdef XML_DTD
XML_ROLE_TEXT_DECL,
XML_ROLE_IGNORE_SECT,
XML_ROLE_INNER_PARAM_ENTITY_REF,
#endif /* XML_DTD */
XML_ROLE_PARAM_ENTITY_REF
};
@ -97,9 +102,17 @@ typedef struct prolog_state {
const char *end,
const ENCODING *enc);
unsigned level;
#ifdef XML_DTD
unsigned includeLevel;
int documentEntity;
#endif /* XML_DTD */
} PROLOG_STATE;
void XMLTOKAPI XmlPrologStateInit(PROLOG_STATE *);
#ifdef XML_DTD
void XMLTOKAPI XmlPrologStateInitInternalEntity(PROLOG_STATE *);
void XMLTOKAPI XmlPrologStateInitExternalEntity(PROLOG_STATE *);
#endif /* XML_DTD */
#define XmlTokenRole(state, tok, ptr, end, enc) \
(((state)->handler)(state, tok, ptr, end, enc))

View file

@ -32,8 +32,15 @@ your version of this file under either the MPL or the GPL.
#include "xmltok.h"
#include "nametab.h"
#ifdef XML_DTD
#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
#else
#define IGNORE_SECTION_TOK_VTABLE /* as nothing */
#endif
#define VTABLE1 \
{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \
{ PREFIX(prologTok), PREFIX(contentTok), \
PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
{ PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
PREFIX(sameName), \
PREFIX(nameMatchesAscii), \
@ -1397,9 +1404,11 @@ int initScan(const ENCODING **encodingTable,
encPtr = enc->encPtr;
if (ptr + 1 == end) {
/* only a single byte available for auto-detection */
#ifndef XML_DTD /* FIXME */
/* a well-formed document entity must have more than one byte */
if (state != XML_CONTENT_STATE)
return XML_TOK_PARTIAL;
#endif
/* so we're parsing an external text entity... */
/* if UTF-16 was externally specified, then we need at least 2 bytes */
switch (INIT_ENC_INDEX(enc)) {

View file

@ -108,10 +108,22 @@ extern "C" {
for a name with a colon. */
#define XML_TOK_PREFIXED_NAME 41
#ifdef XML_DTD
#define XML_TOK_IGNORE_SECT 42
#endif /* XML_DTD */
#ifdef XML_DTD
#define XML_N_STATES 4
#else /* not XML_DTD */
#define XML_N_STATES 3
#endif /* not XML_DTD */
#define XML_PROLOG_STATE 0
#define XML_CONTENT_STATE 1
#define XML_CDATA_SECTION_STATE 2
#ifdef XML_DTD
#define XML_IGNORE_SECTION_STATE 3
#endif /* XML_DTD */
#define XML_N_LITERAL_TYPES 2
#define XML_ATTRIBUTE_VALUE_LITERAL 0
@ -211,6 +223,13 @@ literals, comments and processing instructions.
#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
#ifdef XML_DTD
#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
#endif /* XML_DTD */
/* This is used for performing a 2nd-level tokenization on
the content of a literal that has already been returned by XmlTok. */

View file

@ -1326,6 +1326,61 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end
return XML_TOK_DATA_CHARS;
}
#ifdef XML_DTD
static
int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
int level = 0;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
if (n & (MINBPC(enc) - 1)) {
n &= ~(MINBPC(enc) - 1);
end = ptr + n;
}
}
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_LT:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '!')) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '[')) {
++level;
ptr += MINBPC(enc);
}
}
break;
case BT_RSQB:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ']')) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
ptr += MINBPC(enc);
if (level == 0) {
*nextTokPtr = ptr;
return XML_TOK_IGNORE_SECT;
}
--level;
}
}
break;
default:
ptr += MINBPC(enc);
break;
}
}
return XML_TOK_PARTIAL;
}
#endif /* XML_DTD */
static
int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
const char **badPtr)