From c446fb274ebc043a3de34730f6bef169d13dc0e9 Mon Sep 17 00:00:00 2001 From: Clark Cooper Date: Thu, 21 Sep 2000 21:20:18 +0000 Subject: [PATCH] Merged in modifications from perl-expat. Also fiddled around with more configuration issues. --- expat/Makefile.in | 15 +- expat/config.hin | 2 + expat/configure.in | 1 - expat/gennmtab/gennmtab.c | 2 +- expat/lib/Makefile.in | 4 +- expat/lib/ascii.h | 2 +- expat/lib/asciitab.h | 2 +- expat/lib/expat.h | 213 +++++++++-- expat/lib/iasciitab.h | 2 +- expat/lib/latin1tab.h | 2 +- expat/lib/utf8tab.h | 2 +- expat/lib/xmldef.h | 2 +- expat/lib/xmlparse.c | 729 ++++++++++++++++++++++++++++++++----- expat/lib/xmlrole.c | 10 +- expat/lib/xmlrole.h | 7 +- expat/lib/xmltok.c | 5 +- expat/lib/xmltok.h | 4 +- expat/lib/xmltok_impl.c | 2 +- expat/lib/xmltok_impl.h | 2 +- expat/lib/xmltok_ns.c | 2 + expat/xmltok/dllmain.c | 2 +- expat/xmltok/xmltok.c | 2 +- expat/xmlwf/codepage.c | 2 +- expat/xmlwf/codepage.h | 2 +- expat/xmlwf/filemap.h | 2 +- expat/xmlwf/readfilemap.c | 2 +- expat/xmlwf/unixfilemap.c | 2 +- expat/xmlwf/win32filemap.c | 2 +- expat/xmlwf/xmlfile.c | 2 +- expat/xmlwf/xmlfile.h | 2 +- expat/xmlwf/xmlwf.c | 2 +- 31 files changed, 888 insertions(+), 144 deletions(-) diff --git a/expat/Makefile.in b/expat/Makefile.in index a6b9e0fa..24db2bd7 100644 --- a/expat/Makefile.in +++ b/expat/Makefile.in @@ -78,7 +78,7 @@ VERSION = @VERSION@ SUBDIRS = lib ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -CONFIG_HEADERS = @CONFIG_HEADERS@ +CONFIG_HEADERS = config.h:config.hin DIST_COMMON = ChangeLog COPYING README Makefile.in aclocal.m4 config.hin \ configure configure.in conftools doc examples xmlwf @@ -108,17 +108,18 @@ config.h: config.hin config.status $(SHELL) ./config.status $(SUBDIRS): config.status - $(MAKE) -C $@ + cd $@; $(MAKE) clean: @list='$(SUBDIRS)'; for dir in $$list; do \ - $(MAKE) -C $$dir clean; \ + cd $$dir; $(MAKE) clean; cd ..; \ done + rm -f core *~ distclean: @list='$(SUBDIRS)'; for dir in $$list; do \ - $(MAKE) -C $$dir distclean; \ + cd $$dir; $(MAKE) distclean; cd ..; \ done rm -f config.h config.status libtool @@ -130,7 +131,7 @@ distdir: $(DIST_COMMON) clean test -d $(DISTDIR) || mkdir $(DISTDIR) @list='$(SUBDIRS)'; for dir in $$list; do \ test -d $(DISTDIR)/$$dir || mkdir $(DISTDIR)/$$dir; \ - $(MAKE) -C $$dir distdir; \ + cd $$dir; $(MAKE) distdir; cd ..; \ done @for file in $(DIST_COMMON); do \ if test -d $$file; then \ @@ -149,10 +150,10 @@ dist: $(DISTRIBUTION) install: @list='$(SUBDIRS)'; for dir in $$list; do \ - $(MAKE) -C $$dir install; \ + cd $$dir; $(MAKE) install; cd ..; \ done uninstall: @list='$(SUBDIRS)'; for dir in $$list; do \ - $(MAKE) -C $$dir uninstall; \ + cd $$dir; $(MAKE) uninstall; cd ..; \ done diff --git a/expat/config.hin b/expat/config.hin index c8aff0e8..f4431241 100644 --- a/expat/config.hin +++ b/expat/config.hin @@ -60,6 +60,8 @@ #define XML_BYTE_ORDER 12 #endif +#define XML_CONTEXT_BYTES 1024 + #ifndef HAVE_MEMMOVE #ifdef HAVE_BCOPY #define memmove(d,s,l) bcopy((s),(d),(l)) diff --git a/expat/configure.in b/expat/configure.in index c88b78a3..782a3b2e 100644 --- a/expat/configure.in +++ b/expat/configure.in @@ -24,7 +24,6 @@ AC_PROG_LIBTOOL AC_SUBST(PACKAGE) AC_SUBST(VERSION) -AC_SUBST(CONFIG_HEADERS) dnl Checks for programs. AC_PROG_CC diff --git a/expat/gennmtab/gennmtab.c b/expat/gennmtab/gennmtab.c index 4cbdf91d..2b0b7a7b 100755 --- a/expat/gennmtab/gennmtab.c +++ b/expat/gennmtab/gennmtab.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include diff --git a/expat/lib/Makefile.in b/expat/lib/Makefile.in index efc89093..24811938 100644 --- a/expat/lib/Makefile.in +++ b/expat/lib/Makefile.in @@ -133,8 +133,8 @@ $(top_builddir)/config.h: $(top_builddir)/config.hin cd $(top_builddir) && $(MAKE) config.h clean: - rm -f $(LIBRARY) *.o *.lo - rm -rf .libs _libs + rm -f $(LIBRARY) *.o *.lo *~ + rm -rf .libs _libs .deps distclean: clean diff --git a/expat/lib/ascii.h b/expat/lib/ascii.h index a8a621ce..6376b1f3 100644 --- a/expat/lib/ascii.h +++ b/expat/lib/ascii.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #define ASCII_A 0x41 diff --git a/expat/lib/asciitab.h b/expat/lib/asciitab.h index e994576a..eb445cc5 100644 --- a/expat/lib/asciitab.h +++ b/expat/lib/asciitab.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, diff --git a/expat/lib/expat.h b/expat/lib/expat.h index 6dffe741..6a8d9948 100644 --- a/expat/lib/expat.h +++ b/expat/lib/expat.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #ifndef XmlParse_INCLUDED @@ -48,6 +48,109 @@ typedef char XML_LChar; #endif /* not XML_UNICODE_WCHAR_T */ +enum XML_Content_Type { + XML_CTYPE_EMPTY = 1, + XML_CTYPE_ANY, + XML_CTYPE_MIXED, + XML_CTYPE_NAME, + XML_CTYPE_CHOICE, + XML_CTYPE_SEQ +}; + +enum XML_Content_Quant { + XML_CQUANT_NONE, + XML_CQUANT_OPT, + XML_CQUANT_REP, + XML_CQUANT_PLUS +}; + +/* If type == XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be + XML_CQUANT_NONE, and the other fields will be zero or NULL. + If type == XML_CTYPE_MIXED, then quant will be NONE or REP and + numchildren will contain number of elements that may be mixed in + and children point to an array of XML_Content cells that will be + all of XML_CTYPE_NAME type with no quantification. + + If type == XML_CTYPE_NAME, then the name points to the name, and + the numchildren field will be zero and children will be NULL. The + quant fields indicates any quantifiers placed on the name. + + CHOICE and SEQ will have name NULL, the number of children in + numchildren and children will point, recursively, to an array + of XML_Content cells. + + The EMPTY, ANY, and MIXED types will only occur at top level. +*/ + +typedef struct XML_cp XML_Content; + +struct XML_cp { + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char * name; + unsigned int numchildren; + XML_Content * children; +}; + + +/* This is called for an element declaration. See above for + description of the model argument. It's the caller's responsibility + to free model when finished with it by calling XML_ContentFree. +*/ + +typedef void (*XML_ElementDeclHandler) (void *userData, + const XML_Char *name, + XML_Content *model); + +void XMLPARSEAPI +XML_SetElementDeclHandler(XML_Parser parser, + XML_ElementDeclHandler eldecl); + + +void XMLPARSEAPI +XML_ContentFree(XML_Content *content); + +/* + The Attlist declaration handler is called for *each* attribute. So + a single Attlist declaration with multiple attributes declared will + generate multiple calls to this handler. The "default" parameter + may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword. + The "isrequired" parameter will be true and the default value will + be NULL in the case of "#REQUIRED". If "isrequired" is true and + default is non-NULL, then this is a "#FIXED" default. + */ + +typedef void (*XML_AttlistDeclHandler) (void *userData, + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired); + +void XMLPARSEAPI +XML_SetAttlistDeclHandler(XML_Parser parser, + XML_AttlistDeclHandler attdecl); + + + /* The XML declaration handler is called for *both* XML declarations and + text declarations. The way to distinguish is that the version parameter + will be null for text declarations. The encoding parameter may be null + for XML declarations. The standalone parameter will be -1, 0, or 1 + indicating respectively that there was no standalone parameter in + the declaration, that it was given as no, or that it was given as yes. + */ + +typedef void (*XML_XmlDeclHandler) (void *userData, + const XML_Char *version, + const XML_Char *encoding, + int standalone); + + + +void XMLPARSEAPI +XML_SetXmlDeclHandler(XML_Parser parser, + XML_XmlDeclHandler xmldecl); + /* Constructs a new parser; encoding is the encoding specified by the external protocol or null if there is none specified. */ @@ -112,16 +215,54 @@ typedef void (*XML_DefaultHandler)(void *userData, const XML_Char *s, int len); -/* This is called for the start of the DOCTYPE declaration when the -name of the DOCTYPE is encountered. */ +/* This is called for the start of the DOCTYPE declaration, before + any DTD or internal subset is parsed. */ + typedef void (*XML_StartDoctypeDeclHandler)(void *userData, - const XML_Char *doctypeName); + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset + ); /* This is called for the start of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset. */ typedef void (*XML_EndDoctypeDeclHandler)(void *userData); -/* This is called for a declaration of an unparsed (NDATA) +/* This is called for entity declarations. The is_parameter_entity + argument will be non-zero if the entity is a parameter entity, zero + otherwise. + + For internal entities (), value will + be non-null and systemId, publicID, and notationName will be null. + The value string is NOT null terminated; the length is provided in + the value_length argument. Since it is legal to have zero-length + values, do not use this argument to test for internal entities. + + For external entities, value will be null and systemId will be non-null. + The publicId argument will be null unless a public identifier was + provided. The notationName argument will have a non-null value only + for unparsed entity declarations. +*/ + +typedef void (*XML_EntityDeclHandler) (void *userData, + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName); + +void XMLPARSEAPI +XML_SetEntityDeclHandler(XML_Parser parser, + XML_EntityDeclHandler handler); + +/* OBSOLETE -- OBSOLETE -- OBSOLETE + This handler has been superceded by the EntityDeclHandler above. + It is provided here for backward compatibility. +This is called for a declaration of an unparsed (NDATA) entity. The base argument is whatever was set by XML_SetBase. The entityName, systemId and notationName arguments will never be null. The other arguments may be. */ @@ -143,17 +284,6 @@ typedef void (*XML_NotationDeclHandler)(void *userData, const XML_Char *systemId, const XML_Char *publicId); -typedef void (*XML_ExternalParsedEntityDeclHandler)(void *userData, - const XML_Char *entityName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); - -typedef void (*XML_InternalParsedEntityDeclHandler)(void *userData, - const XML_Char *entityName, - const XML_Char *replacementText, - int replacementTextLength); - /* When namespace processing is enabled, these are called once for each namespace declaration. The call to the start and end element handlers occur between the calls to the start and end namespace @@ -273,6 +403,12 @@ XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end); +void XMLPARSEAPI +XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler); + +void XMLPARSEAPI +XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler); + void XMLPARSEAPI XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler); @@ -289,6 +425,14 @@ XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end); +void XMLPARSEAPI +XML_SetStartCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start); + +void XMLPARSEAPI +XML_SetEndCdataSectionHandler(XML_Parser parser, + XML_EndCdataSectionHandler end); + /* This sets the default handler and also inhibits expansion of internal entities. The entity reference will be passed to the default handler. */ @@ -308,6 +452,14 @@ XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end); +void XMLPARSEAPI +XML_SetStartDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start); + +void XMLPARSEAPI +XML_SetEndDoctypeDeclHandler(XML_Parser parser, + XML_EndDoctypeDeclHandler end); + void XMLPARSEAPI XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); @@ -316,19 +468,19 @@ void XMLPARSEAPI XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); -void XMLPARSEAPI -XML_SetExternalParsedEntityDeclHandler(XML_Parser parser, - XML_ExternalParsedEntityDeclHandler handler); - -void XMLPARSEAPI -XML_SetInternalParsedEntityDeclHandler(XML_Parser parser, - XML_InternalParsedEntityDeclHandler handler); - void XMLPARSEAPI XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end); +void XMLPARSEAPI +XML_SetStartNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start); + +void XMLPARSEAPI +XML_SetEndNamespaceDeclHandler(XML_Parser parser, + XML_EndNamespaceDeclHandler end); + void XMLPARSEAPI XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler); @@ -508,6 +660,19 @@ Returns 0 if the event is in an internal entity. */ int XMLPARSEAPI XML_GetCurrentByteCount(XML_Parser parser); +/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets + the integer pointed to by offset to the offset within this buffer + of the current parse position, and sets the integer pointed to by size + to the size of this buffer (the number of input bytes). Otherwise + returns a null pointer. Also returns a null pointer if a parse isn't active. + + NOTE: The character pointer returned should not be used outside + the handler that makes the call. */ + +const char XMLPARSEAPI * XML_GetInputContext(XML_Parser parser, + int *offset, + int *size); + /* For backwards compatibility with previous versions. */ #define XML_GetErrorLineNumber XML_GetCurrentLineNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber diff --git a/expat/lib/iasciitab.h b/expat/lib/iasciitab.h index 2694d9d6..55dbc398 100644 --- a/expat/lib/iasciitab.h +++ b/expat/lib/iasciitab.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ /* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */ diff --git a/expat/lib/latin1tab.h b/expat/lib/latin1tab.h index 6e01d50e..178b1d18 100644 --- a/expat/lib/latin1tab.h +++ b/expat/lib/latin1tab.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ /* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/expat/lib/utf8tab.h b/expat/lib/utf8tab.h index 28d9b596..9e3b6b83 100644 --- a/expat/lib/utf8tab.h +++ b/expat/lib/utf8tab.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ diff --git a/expat/lib/xmldef.h b/expat/lib/xmldef.h index 57b83339..83e96b2b 100644 --- a/expat/lib/xmldef.h +++ b/expat/lib/xmldef.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c index 42fb3cd7..89b505a4 100644 --- a/expat/lib/xmlparse.c +++ b/expat/lib/xmlparse.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include @@ -114,8 +114,19 @@ typedef struct { const XML_Char *publicId; const XML_Char *notation; char open; + char is_param; } ENTITY; +typedef struct { + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char * name; + int firstchild; + int lastchild; + int childcnt; + int nextsib; +} CONTENT_SCAFFOLD; + typedef struct block { struct block *next; int size; @@ -166,6 +177,14 @@ typedef struct { HASH_TABLE paramEntities; #endif /* XML_DTD */ PREFIX defaultPrefix; + /* === scaffolding for building content model === */ + int in_eldecl; + CONTENT_SCAFFOLD *scaffold; + unsigned contentStringLen; + unsigned scaffSize; + unsigned scaffCount; + int scaffLevel; + int *scaffIndex; } DTD; typedef struct open_internal_entity { @@ -261,8 +280,16 @@ static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static int poolGrow(STRING_POOL *pool); +static int nextScaffoldPart(XML_Parser parser); +static XML_Content *build_model(XML_Parser parser); + static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); +static const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s); +static ELEMENT_TYPE * getElementType(XML_Parser Paraser, + const ENCODING *enc, + const char *ptr, + const char *end); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) @@ -303,14 +330,16 @@ typedef struct { XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; XML_NotationDeclHandler m_notationDeclHandler; - XML_ExternalParsedEntityDeclHandler m_externalParsedEntityDeclHandler; - XML_InternalParsedEntityDeclHandler m_internalParsedEntityDeclHandler; XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; XML_NotStandaloneHandler m_notStandaloneHandler; XML_ExternalEntityRefHandler m_externalEntityRefHandler; void *m_externalEntityRefHandlerArg; XML_UnknownEncodingHandler m_unknownEncodingHandler; + XML_ElementDeclHandler m_elementDeclHandler; + XML_AttlistDeclHandler m_attlistDeclHandler; + XML_EntityDeclHandler m_entityDeclHandler; + XML_XmlDeclHandler m_xmlDeclHandler; const ENCODING *m_encoding; INIT_ENCODING m_initEncoding; const ENCODING *m_internalEncoding; @@ -330,6 +359,10 @@ typedef struct { int m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; + const XML_Char *m_doctypeName; + const XML_Char *m_doctypeSysid; + const XML_Char *m_doctypePubid; + const XML_Char *m_declAttributeType; const XML_Char *m_declNotationName; const XML_Char *m_declNotationPublicId; ELEMENT_TYPE *m_declElementType; @@ -373,14 +406,16 @@ typedef struct { #define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler) #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler) #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler) -#define externalParsedEntityDeclHandler (((Parser *)parser)->m_externalParsedEntityDeclHandler) -#define internalParsedEntityDeclHandler (((Parser *)parser)->m_internalParsedEntityDeclHandler) #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler) #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler) #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler) #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler) #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg) #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler) +#define elementDeclHandler (((Parser *)parser)->m_elementDeclHandler) +#define attlistDeclHandler (((Parser *)parser)->m_attlistDeclHandler) +#define entityDeclHandler (((Parser *)parser)->m_entityDeclHandler) +#define xmlDeclHandler (((Parser *)parser)->m_xmlDeclHandler) #define encoding (((Parser *)parser)->m_encoding) #define initEncoding (((Parser *)parser)->m_initEncoding) #define internalEncoding (((Parser *)parser)->m_internalEncoding) @@ -412,6 +447,10 @@ typedef struct { #define dtd (((Parser *)parser)->m_dtd) #define curBase (((Parser *)parser)->m_curBase) #define declEntity (((Parser *)parser)->m_declEntity) +#define doctypeName (((Parser *)parser)->m_doctypeName) +#define doctypeSysid (((Parser *)parser)->m_doctypeSysid) +#define doctypePubid (((Parser *)parser)->m_doctypePubid) +#define declAttributeType (((Parser *)parser)->m_declAttributeType) #define declNotationName (((Parser *)parser)->m_declNotationName) #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId) #define declElementType (((Parser *)parser)->m_declElementType) @@ -467,14 +506,16 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) endDoctypeDeclHandler = 0; unparsedEntityDeclHandler = 0; notationDeclHandler = 0; - externalParsedEntityDeclHandler = 0; - internalParsedEntityDeclHandler = 0; startNamespaceDeclHandler = 0; endNamespaceDeclHandler = 0; notStandaloneHandler = 0; externalEntityRefHandler = 0; externalEntityRefHandlerArg = parser; unknownEncodingHandler = 0; + elementDeclHandler = 0; + attlistDeclHandler = 0; + entityDeclHandler = 0; + xmlDeclHandler = 0; buffer = 0; bufferPtr = 0; bufferEnd = 0; @@ -484,6 +525,10 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) declElementType = 0; declAttributeId = 0; declEntity = 0; + doctypeName = 0; + doctypeSysid = 0; + doctypePubid = 0; + declAttributeType = 0; declNotationName = 0; declNotationPublicId = 0; memset(&position, 0, sizeof(POSITION)); @@ -587,13 +632,16 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, XML_DefaultHandler oldDefaultHandler = defaultHandler; XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler; XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; - XML_ExternalParsedEntityDeclHandler oldExternalParsedEntityDeclHandler = externalParsedEntityDeclHandler; - XML_InternalParsedEntityDeclHandler oldInternalParsedEntityDeclHandler = internalParsedEntityDeclHandler; XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler; XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler; XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; + XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; + XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; + XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; + XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler; + ELEMENT_TYPE * oldDeclElementType = declElementType; void *oldUserData = userData; void *oldHandlerArg = handlerArg; int oldDefaultExpandInternalEntities = defaultExpandInternalEntities; @@ -616,13 +664,16 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, defaultHandler = oldDefaultHandler; unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; notationDeclHandler = oldNotationDeclHandler; - externalParsedEntityDeclHandler = oldExternalParsedEntityDeclHandler; - internalParsedEntityDeclHandler = oldInternalParsedEntityDeclHandler; startNamespaceDeclHandler = oldStartNamespaceDeclHandler; endNamespaceDeclHandler = oldEndNamespaceDeclHandler; notStandaloneHandler = oldNotStandaloneHandler; externalEntityRefHandler = oldExternalEntityRefHandler; unknownEncodingHandler = oldUnknownEncodingHandler; + elementDeclHandler = oldElementDeclHandler; + attlistDeclHandler = oldAttlistDeclHandler; + entityDeclHandler = oldEntityDeclHandler; + xmlDeclHandler = oldXmlDeclHandler; + declElementType = oldDeclElementType; userData = oldUserData; if (oldUserData == oldHandlerArg) handlerArg = userData; @@ -695,15 +746,23 @@ void XML_ParserFree(XML_Parser parser) #endif /* XML_DTD */ dtdDestroy(&dtd); free((void *)atts); - free(groupConnector); - free(buffer); + if (groupConnector) + free(groupConnector); + if (buffer) + free(buffer); free(dataBuf); - free(unknownEncodingMem); + if (unknownEncodingMem) + free(unknownEncodingMem); if (unknownEncodingRelease) unknownEncodingRelease(unknownEncodingData); free(parser); } +void XML_ContentFree(XML_Content *content) +{ + free(content); +} + void XML_UseParserAsHandlerArg(XML_Parser parser) { handlerArg = parser; @@ -753,6 +812,16 @@ void XML_SetElementHandler(XML_Parser parser, endElementHandler = end; } +void XML_SetStartElementHandler(XML_Parser parser, + XML_StartElementHandler start) { + startElementHandler = start; +} + +void XML_SetEndElementHandler(XML_Parser parser, + XML_EndElementHandler end) { + endElementHandler = end; +} + void XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler) { @@ -779,6 +848,16 @@ void XML_SetCdataSectionHandler(XML_Parser parser, endCdataSectionHandler = end; } +void XML_SetStartCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start) { + startCdataSectionHandler = start; +} + +void XML_SetEndCdataSectionHandler(XML_Parser parser, + XML_EndCdataSectionHandler end) { + endCdataSectionHandler = end; +} + void XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { @@ -801,24 +880,22 @@ void XML_SetDoctypeDeclHandler(XML_Parser parser, endDoctypeDeclHandler = end; } +void XML_SetStartDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start) { + startDoctypeDeclHandler = start; +} + +void XML_SetEndDoctypeDeclHandler(XML_Parser parser, + XML_EndDoctypeDeclHandler end) { + endDoctypeDeclHandler = end; +} + void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { unparsedEntityDeclHandler = handler; } -void XML_SetExternalParsedEntityDeclHandler(XML_Parser parser, - XML_ExternalParsedEntityDeclHandler handler) -{ - externalParsedEntityDeclHandler = handler; -} - -void XML_SetInternalParsedEntityDeclHandler(XML_Parser parser, - XML_InternalParsedEntityDeclHandler handler) -{ - internalParsedEntityDeclHandler = handler; -} - void XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { @@ -833,6 +910,15 @@ void XML_SetNamespaceDeclHandler(XML_Parser parser, endNamespaceDeclHandler = end; } +void XML_SetStartNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start) { + startNamespaceDeclHandler = start; +} + +void XML_SetEndNamespaceDeclHandler(XML_Parser parser, + XML_EndNamespaceDeclHandler end); + + void XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler) { @@ -861,6 +947,29 @@ void XML_SetUnknownEncodingHandler(XML_Parser parser, unknownEncodingHandlerData = data; } +void XML_SetElementDeclHandler(XML_Parser parser, + XML_ElementDeclHandler eldecl) +{ + elementDeclHandler = eldecl; +} + +void XML_SetAttlistDeclHandler(XML_Parser parser, + XML_AttlistDeclHandler attdecl) +{ + attlistDeclHandler = attdecl; +} + +void XML_SetEntityDeclHandler(XML_Parser parser, + XML_EntityDeclHandler handler) +{ + entityDeclHandler = handler; +} + +void XML_SetXmlDeclHandler(XML_Parser parser, + XML_XmlDeclHandler handler) { + xmlDeclHandler = handler; +} + int XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing) { @@ -885,6 +994,7 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) processor = errorProcessor; return 0; } +#ifndef XML_CONTEXT_BYTES else if (bufferPtr == bufferEnd) { const char *end; int nLeftOver; @@ -925,6 +1035,7 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) } return 1; } +#endif /* not defined XML_CONTEXT_BYTES */ else { memcpy(XML_GetBuffer(parser, len), s, len); return XML_ParseBuffer(parser, len, isFinal); @@ -956,10 +1067,26 @@ void *XML_GetBuffer(XML_Parser parser, int len) if (len > bufferLim - bufferEnd) { /* FIXME avoid integer overflow */ int neededSize = len + (bufferEnd - bufferPtr); +#ifdef XML_CONTEXT_BYTES + int keep = bufferPtr - buffer; + + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; + neededSize += keep; +#endif /* defined XML_CONTEXT_BYTES */ if (neededSize <= bufferLim - buffer) { +#ifdef XML_CONTEXT_BYTES + if (keep < bufferPtr - buffer) { + int offset = (bufferPtr - buffer) - keep; + memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); + bufferEnd -= offset; + bufferPtr -= offset; + } +#else memmove(buffer, bufferPtr, bufferEnd - bufferPtr); bufferEnd = buffer + (bufferEnd - bufferPtr); bufferPtr = buffer; +#endif /* not defined XML_CONTEXT_BYTES */ } else { char *newBuf; @@ -975,12 +1102,29 @@ void *XML_GetBuffer(XML_Parser parser, int len) return 0; } bufferLim = newBuf + bufferSize; +#ifdef XML_CONTEXT_BYTES + if (bufferPtr) { + int keep = bufferPtr - buffer; + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; + memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); + free(buffer); + buffer = newBuf; + bufferEnd = buffer + (bufferEnd - bufferPtr) + keep; + bufferPtr = buffer + keep; + } + else { + bufferEnd = newBuf + (bufferEnd - bufferPtr); + bufferPtr = buffer = newBuf; + } +#else if (bufferPtr) { memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); free(buffer); } bufferEnd = newBuf + (bufferEnd - bufferPtr); bufferPtr = buffer = newBuf; +#endif /* not defined XML_CONTEXT_BYTES */ } } return bufferEnd; @@ -1005,6 +1149,18 @@ int XML_GetCurrentByteCount(XML_Parser parser) return 0; } +const char * XML_GetInputContext(XML_Parser parser, int *offset, int *size) +{ +#ifdef XML_CONTEXT_BYTES + if (eventPtr && buffer) { + *offset = eventPtr - buffer; + *size = bufferEnd - buffer; + return buffer; + } +#endif /* defined XML_CONTEXT_BYTES */ + return (char *) 0; +} + int XML_GetCurrentLineNumber(XML_Parser parser) { if (eventPtr) { @@ -2096,8 +2252,11 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const char *next) { const char *encodingName = 0; + const char *storedEncName = 0; const ENCODING *newEncoding = 0; - const char *version; + const char *version = 0; + const char *versionend; + const char *storedversion = 0; int standalone = -1; if (!(ns ? XmlParseXmlDeclNS @@ -2107,6 +2266,7 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, next, &eventPtr, &version, + &versionend, &encodingName, &newEncoding, &standalone)) @@ -2118,7 +2278,28 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif /* XML_DTD */ } - if (defaultHandler) + if (xmlDeclHandler) { + if (encodingName) { + storedEncName = poolStoreString(&temp2Pool, + encoding, + encodingName, + encodingName + + XmlNameLength(encoding, encodingName)); + if (! storedEncName) + return XML_ERROR_NO_MEMORY; + poolFinish(&temp2Pool); + } + if (version) { + storedversion = poolStoreString(&temp2Pool, + encoding, + version, + versionend - encoding->minBytesPerChar); + if (! storedversion) + return XML_ERROR_NO_MEMORY; + } + xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone); + } + else if (defaultHandler) reportDefault(parser, encoding, s, next); if (!protocolEncodingName) { if (newEncoding) { @@ -2130,20 +2311,26 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, } else if (encodingName) { enum XML_Error result; - const XML_Char *s = poolStoreString(&tempPool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); - if (!s) - return XML_ERROR_NO_MEMORY; - result = handleUnknownEncoding(parser, s); - poolDiscard(&tempPool); + if (! storedEncName) { + storedEncName = poolStoreString(&temp2Pool, + encoding, + encodingName, + encodingName + + XmlNameLength(encoding, encodingName)); + if (! storedEncName) + return XML_ERROR_NO_MEMORY; + } + result = handleUnknownEncoding(parser, storedEncName); + poolClear(&tempPool); if (result == XML_ERROR_UNKNOWN_ENCODING) eventPtr = encodingName; return result; } } + + if (storedEncName || storedversion) + poolClear(&temp2Pool); + return XML_ERROR_NONE; } @@ -2224,6 +2411,8 @@ doProlog(XML_Parser parser, const char **eventPP; const char **eventEndPP; + enum XML_Content_Quant quant; + if (enc == encoding) { eventPP = &eventPtr; eventEndPP = &eventEndPtr; @@ -2280,10 +2469,19 @@ doProlog(XML_Parser parser, break; case XML_ROLE_DOCTYPE_NAME: if (startDoctypeDeclHandler) { - const XML_Char *name = poolStoreString(&tempPool, enc, s, next); - if (!name) + doctypeName = poolStoreString(&tempPool, enc, s, next); + if (! doctypeName) return XML_ERROR_NO_MEMORY; - startDoctypeDeclHandler(handlerArg, name); + poolFinish(&tempPool); + doctypeSysid = 0; + doctypePubid = 0; + } + break; + case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: + if (startDoctypeDeclHandler) { + startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, + doctypePubid, 1); + doctypeName = 0; poolClear(&tempPool); } break; @@ -2298,6 +2496,12 @@ doProlog(XML_Parser parser, break; #endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: + if (startDoctypeDeclHandler) { + doctypePubid = poolStoreString(&tempPool, enc, s + 1, next - 1); + if (! doctypePubid) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + } #ifdef XML_DTD declEntity = (ENTITY *)lookup(&dtd.paramEntities, externalSubsetName, @@ -2322,6 +2526,11 @@ doProlog(XML_Parser parser, } break; case XML_ROLE_DOCTYPE_CLOSE: + if (doctypeName) { + startDoctypeDeclHandler(handlerArg, doctypeName, + doctypeSysid, doctypePubid, 0); + poolClear(&tempPool); + } if (dtd.complete && hadExternalDoctype) { dtd.complete = 0; #ifdef XML_DTD @@ -2350,41 +2559,87 @@ doProlog(XML_Parser parser, processor = contentProcessor; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: - { - const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); - if (!declElementType) - return XML_ERROR_NO_MEMORY; - if (declElementType->name != name) - poolDiscard(&dtd.pool); - else { - poolFinish(&dtd.pool); - if (!setElementTypePrefix(parser, declElementType)) - return XML_ERROR_NO_MEMORY; - } - break; - } + declElementType = getElementType(parser, enc, s, next); + if (!declElementType) + return XML_ERROR_NO_MEMORY; + break; case XML_ROLE_ATTRIBUTE_NAME: declAttributeId = getAttributeId(parser, enc, s, next); if (!declAttributeId) return XML_ERROR_NO_MEMORY; declAttributeIsCdata = 0; + declAttributeType = 0; declAttributeIsId = 0; break; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: declAttributeIsCdata = 1; + declAttributeType = "CDATA"; break; case XML_ROLE_ATTRIBUTE_TYPE_ID: declAttributeIsId = 1; + declAttributeType = "ID"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_IDREF: + declAttributeType = "IDREF"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: + declAttributeType = "IDREFS"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: + declAttributeType = "ENTITY"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: + declAttributeType = "ENTITIES"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: + declAttributeType = "NMTOKEN"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: + declAttributeType = "NMTOKENS"; + break; + + case XML_ROLE_ATTRIBUTE_ENUM_VALUE: + case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: + if (attlistDeclHandler) + { + char *prefix; + if (declAttributeType) { + prefix = "|"; + } + else { + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE + ? "NOTATION(" + : "("); + } + if (! poolAppendString(&tempPool, prefix)) + return XML_ERROR_NO_MEMORY; + if (! poolAppend(&tempPool, enc, s, next)) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: if (dtd.complete - && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, - declAttributeIsId, 0)) + && !defineAttribute(declElementType, declAttributeId, + declAttributeIsCdata, declAttributeIsId, 0)) return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == '(' + || *declAttributeType == 'N' && declAttributeType[1] == 'O') { + /* Enumerated or Notation type */ + if (! poolAppendChar(&tempPool, ')') + || ! poolAppendChar(&tempPool, '\0')) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + } break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: @@ -2400,9 +2655,26 @@ doProlog(XML_Parser parser, attVal = poolStart(&dtd.pool); poolFinish(&dtd.pool); if (dtd.complete - // ID attributes aren't allowed to have a default + /* ID attributes aren't allowed to have a default */ && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0, attVal)) return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == '(' + || *declAttributeType == 'N' && declAttributeType[1] == 'O') { + /* Enumerated or Notation type */ + if (! poolAppendChar(&tempPool, ')') + || ! poolAppendChar(&tempPool, '\0')) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + attVal, + role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + } break; } case XML_ROLE_ENTITY_VALUE: @@ -2414,15 +2686,14 @@ doProlog(XML_Parser parser, declEntity->textPtr = poolStart(&dtd.pool); declEntity->textLen = poolLength(&dtd.pool); poolFinish(&dtd.pool); - if (internalParsedEntityDeclHandler - // Check it's not a parameter entity - && ((ENTITY *)lookup(&dtd.generalEntities, declEntity->name, 0) - == declEntity)) { + if (entityDeclHandler) { *eventEndPP = s; - internalParsedEntityDeclHandler(handlerArg, - declEntity->name, - declEntity->textPtr, - declEntity->textLen); + entityDeclHandler(handlerArg, + declEntity->name, + declEntity->is_param, + declEntity->textPtr, + declEntity->textLen, + curBase, 0, 0, 0); } } else @@ -2432,6 +2703,12 @@ doProlog(XML_Parser parser, } break; case XML_ROLE_DOCTYPE_SYSTEM_ID: + if (startDoctypeDeclHandler) { + doctypeSysid = poolStoreString(&tempPool, enc, s + 1, next - 1); + if (! doctypeSysid) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + } if (!dtd.standalone #ifdef XML_DTD && !paramEntityParsing @@ -2447,6 +2724,7 @@ doProlog(XML_Parser parser, declEntity = (ENTITY *)lookup(&dtd.paramEntities, externalSubsetName, sizeof(ENTITY)); + declEntity->publicId = 0; if (!declEntity) return XML_ERROR_NO_MEMORY; } @@ -2463,6 +2741,18 @@ doProlog(XML_Parser parser, poolFinish(&dtd.pool); } break; + case XML_ROLE_ENTITY_COMPLETE: + if (declEntity && entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + 0,0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + 0); + } + break; case XML_ROLE_ENTITY_NOTATION_NAME: if (declEntity) { declEntity->notation = poolStoreString(&dtd.pool, enc, s, next); @@ -2478,17 +2768,16 @@ doProlog(XML_Parser parser, declEntity->publicId, declEntity->notation); } - - } - break; - case XML_ROLE_EXTERNAL_GENERAL_ENTITY_NO_NOTATION: - if (declEntity && externalParsedEntityDeclHandler) { - *eventEndPP = s; - externalParsedEntityDeclHandler(handlerArg, - declEntity->name, - declEntity->base, - declEntity->systemId, - declEntity->publicId); + else if (entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + 0,0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + declEntity->notation); + } } break; case XML_ROLE_GENERAL_ENTITY_NAME: @@ -2509,8 +2798,11 @@ doProlog(XML_Parser parser, poolDiscard(&dtd.pool); declEntity = 0; } - else + else { poolFinish(&dtd.pool); + declEntity->publicId = 0; + declEntity->is_param = 0; + } } else { poolDiscard(&dtd.pool); @@ -2531,8 +2823,11 @@ doProlog(XML_Parser parser, poolDiscard(&dtd.pool); declEntity = 0; } - else + else { poolFinish(&dtd.pool); + declEntity->publicId = 0; + declEntity->is_param = 1; + } } #else /* not XML_DTD */ declEntity = 0; @@ -2616,14 +2911,25 @@ doProlog(XML_Parser parser, #endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: if (prologState.level >= groupSize) { - if (groupSize) + if (groupSize) { groupConnector = realloc(groupConnector, groupSize *= 2); + if (dtd.scaffIndex) + dtd.scaffIndex = realloc(dtd.scaffIndex, groupSize * sizeof(int)); + } else groupConnector = malloc(groupSize = 32); if (!groupConnector) return XML_ERROR_NO_MEMORY; } groupConnector[prologState.level] = 0; + if (dtd.in_eldecl) { + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd.scaffIndex[dtd.scaffLevel] = myindex; + dtd.scaffLevel++; + dtd.scaffold[myindex].type = XML_CTYPE_SEQ; + } break; case XML_ROLE_GROUP_SEQUENCE: if (groupConnector[prologState.level] == '|') @@ -2633,6 +2939,12 @@ doProlog(XML_Parser parser, case XML_ROLE_GROUP_CHOICE: if (groupConnector[prologState.level] == ',') return XML_ERROR_SYNTAX; + if (dtd.in_eldecl + && ! groupConnector[prologState.level] + && dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type != XML_CTYPE_MIXED + ) { + dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_CHOICE; + } groupConnector[prologState.level] = '|'; break; case XML_ROLE_PARAM_ENTITY_REF: @@ -2689,6 +3001,106 @@ doProlog(XML_Parser parser, if (defaultHandler) reportDefault(parser, enc, s, next); break; + + /* Element declaration stuff */ + + case XML_ROLE_ELEMENT_NAME: + if (elementDeclHandler) { + declElementType = getElementType(parser, enc, s, next); + if (! declElementType) + return XML_ERROR_NO_MEMORY; + dtd.scaffLevel = 0; + dtd.scaffCount = 0; + dtd.in_eldecl = 1; + } + break; + + case XML_ROLE_CONTENT_ANY: + case XML_ROLE_CONTENT_EMPTY: + if (dtd.in_eldecl) { + if (elementDeclHandler) { + XML_Content * content = (XML_Content *) malloc(sizeof(XML_Content)); + if (! content) + return XML_ERROR_NO_MEMORY; + content->quant = XML_CQUANT_NONE; + content->name = 0; + content->numchildren = 0; + content->children = 0; + content->type = ((role == XML_ROLE_CONTENT_ANY) ? + XML_CTYPE_ANY : + XML_CTYPE_EMPTY); + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, content); + } + dtd.in_eldecl = 0; + } + break; + + case XML_ROLE_CONTENT_PCDATA: + if (dtd.in_eldecl) { + dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_MIXED; + } + break; + + case XML_ROLE_CONTENT_ELEMENT: + quant = XML_CQUANT_NONE; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_OPT: + quant = XML_CQUANT_OPT; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_REP: + quant = XML_CQUANT_REP; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_PLUS: + quant = XML_CQUANT_PLUS; + elementContent: + if (dtd.in_eldecl) + { + ELEMENT_TYPE *el; + const char *nxt = quant == XML_CQUANT_NONE ? next : next - 1; + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd.scaffold[myindex].type = XML_CTYPE_NAME; + dtd.scaffold[myindex].quant = quant; + el = getElementType(parser, enc, s, nxt); + if (! el) + return XML_ERROR_NO_MEMORY; + dtd.scaffold[myindex].name = el->name; + dtd.contentStringLen += nxt - s + 1; + } + break; + + case XML_ROLE_GROUP_CLOSE: + quant = XML_CQUANT_NONE; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_OPT: + quant = XML_CQUANT_OPT; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_REP: + quant = XML_CQUANT_REP; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_PLUS: + quant = XML_CQUANT_PLUS; + closeGroup: + if (dtd.in_eldecl) { + dtd.scaffLevel--; + dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel]].quant = quant; + if (dtd.scaffLevel == 0) { + if (elementDeclHandler) { + XML_Content *model = build_model(parser); + if (! model) + return XML_ERROR_NO_MEMORY; + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, model); + } + dtd.in_eldecl = 0; + dtd.contentStringLen = 0; + } + } + break; + /* End element declaration stuff */ + case XML_ROLE_NONE: switch (tok) { case XML_TOK_PI: @@ -3441,6 +3853,15 @@ static int dtdInit(DTD *p) #endif /* XML_DTD */ p->defaultPrefix.name = 0; p->defaultPrefix.binding = 0; + + p->in_eldecl = 0; + p->scaffIndex = 0; + p->scaffLevel = 0; + p->scaffold = 0; + p->contentStringLen = 0; + p->scaffSize = 0; + p->scaffCount = 0; + return 1; } @@ -3475,6 +3896,10 @@ static void dtdDestroy(DTD *p) hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); + if (p->scaffIndex) + free(p->scaffIndex); + if (p->scaffold) + free(p->scaffold); } /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise. @@ -3585,6 +4010,15 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) newDtd->complete = oldDtd->complete; newDtd->standalone = oldDtd->standalone; + + /* Don't want deep copying for scaffolding */ + newDtd->in_eldecl = oldDtd->in_eldecl; + newDtd->scaffold = oldDtd->scaffold; + newDtd->contentStringLen = oldDtd->contentStringLen; + newDtd->scaffSize = oldDtd->scaffSize; + newDtd->scaffLevel = oldDtd->scaffLevel; + newDtd->scaffIndex = oldDtd->scaffIndex; + return 1; } @@ -3855,6 +4289,17 @@ static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int return s; } +static +const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s) +{ + while (*s) { + if (!poolAppendChar(pool, *s)) + return 0; + s++; + } + return pool->start; +} /* End poolAppendString */ + static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) @@ -3923,3 +4368,125 @@ int poolGrow(STRING_POOL *pool) } return 1; } + +static int +nextScaffoldPart(XML_Parser parser) +{ + CONTENT_SCAFFOLD * me; + int next; + + if (! dtd.scaffIndex) { + dtd.scaffIndex = malloc(groupSize * sizeof(int)); + if (! dtd.scaffIndex) + return -1; + dtd.scaffIndex[0] = 0; + } + + if (dtd.scaffCount >= dtd.scaffSize) { + if (dtd.scaffold) { + dtd.scaffSize *= 2; + dtd.scaffold = (CONTENT_SCAFFOLD *) realloc(dtd.scaffold, + dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); + } + else { + dtd.scaffSize = 32; + dtd.scaffold = (CONTENT_SCAFFOLD *) malloc(dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); + } + if (! dtd.scaffold) + return -1; + } + next = dtd.scaffCount++; + me = &dtd.scaffold[next]; + if (dtd.scaffLevel) { + CONTENT_SCAFFOLD *parent = &dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]]; + if (parent->lastchild) { + dtd.scaffold[parent->lastchild].nextsib = next; + } + if (! parent->childcnt) + parent->firstchild = next; + parent->lastchild = next; + parent->childcnt++; + } + me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; + return next; +} /* End nextScaffoldPart */ + +static void +build_node (XML_Parser parser, + int src_node, + XML_Content *dest, + XML_Content **contpos, + char **strpos) +{ + dest->type = dtd.scaffold[src_node].type; + dest->quant = dtd.scaffold[src_node].quant; + if (dest->type == XML_CTYPE_NAME) { + const char *src; + dest->name = *strpos; + src = dtd.scaffold[src_node].name; + for (;;) { + *(*strpos)++ = *src; + if (! *src) + break; + src++; + } + dest->numchildren = 0; + dest->children = 0; + } + else { + int i, cn; + dest->numchildren = dtd.scaffold[src_node].childcnt; + dest->children = *contpos; + *contpos += dest->numchildren; + for (i = 0, cn = dtd.scaffold[src_node].firstchild; + i < dest->numchildren; + i++, cn = dtd.scaffold[cn].nextsib) { + build_node(parser, cn, &(dest->children[i]), contpos, strpos); + } + dest->name = 0; + } +} /* End build_node */ + +static XML_Content * +build_model (XML_Parser parser) +{ + int node; + XML_Content *ret; + XML_Content *cpos; + char * str; + int allocsize = dtd.scaffCount * sizeof(XML_Content) + dtd.contentStringLen; + + ret = malloc(allocsize); + if (! ret) + return 0; + + str = (char *) (&ret[dtd.scaffCount]); + cpos = &ret[1]; + + build_node(parser, 0, ret, &cpos, &str); + return ret; +} /* End build_model */ + +static ELEMENT_TYPE * +getElementType(XML_Parser parser, + const ENCODING *enc, + const char *ptr, + const char *end) +{ + const XML_Char *name = poolStoreString(&dtd.pool, enc, ptr, end); + ELEMENT_TYPE *ret; + + if (! name) + return 0; + ret = (ELEMENT_TYPE *) lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); + if (! ret) + return 0; + if (ret->name != name) + poolDiscard(&dtd.pool); + else { + poolFinish(&dtd.pool); + if (!setElementTypePrefix(parser, ret)) + return 0; + } + return ret; +} /* End getElementType */ diff --git a/expat/lib/xmlrole.c b/expat/lib/xmlrole.c index a41acc64..ba9f0bb4 100644 --- a/expat/lib/xmlrole.c +++ b/expat/lib/xmlrole.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include @@ -194,7 +194,7 @@ int doctype1(PROLOG_STATE *state, return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; @@ -258,7 +258,7 @@ int doctype4(PROLOG_STATE *state, return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; @@ -493,7 +493,7 @@ int entity5(PROLOG_STATE *state, return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); - return XML_ROLE_EXTERNAL_GENERAL_ENTITY_NO_NOTATION; + return XML_ROLE_ENTITY_COMPLETE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { state->handler = entity6; @@ -1006,6 +1006,8 @@ int element3(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: + state->handler = declClose; + return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; return XML_ROLE_GROUP_CLOSE_REP; diff --git a/expat/lib/xmlrole.h b/expat/lib/xmlrole.h index 22958dfe..ce4f634c 100644 --- a/expat/lib/xmlrole.h +++ b/expat/lib/xmlrole.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #ifndef XmlRole_INCLUDED @@ -20,12 +20,14 @@ enum { XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_PUBLIC_ID, + XML_ROLE_DOCTYPE_INTERNAL_SUBSET, XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME, XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_PUBLIC_ID, + XML_ROLE_ENTITY_COMPLETE, XML_ROLE_ENTITY_NOTATION_NAME, XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_SYSTEM_ID, @@ -67,8 +69,7 @@ enum { XML_ROLE_IGNORE_SECT, XML_ROLE_INNER_PARAM_ENTITY_REF, #endif /* XML_DTD */ - XML_ROLE_PARAM_ENTITY_REF, - XML_ROLE_EXTERNAL_GENERAL_ENTITY_NO_NOTATION + XML_ROLE_PARAM_ENTITY_REF }; typedef struct prolog_state { diff --git a/expat/lib/xmltok.c b/expat/lib/xmltok.c index 64fae599..36dbe196 100644 --- a/expat/lib/xmltok.c +++ b/expat/lib/xmltok.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include @@ -1020,6 +1020,7 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingName, const ENCODING **encoding, int *standalone) @@ -1042,6 +1043,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, else { if (versionPtr) *versionPtr = val; + if (versionEndPtr) + *versionEndPtr = ptr; if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; diff --git a/expat/lib/xmltok.h b/expat/lib/xmltok.h index f7703cec..f681557d 100644 --- a/expat/lib/xmltok.h +++ b/expat/lib/xmltok.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #ifndef XmlTok_INCLUDED @@ -260,6 +260,7 @@ int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); @@ -283,6 +284,7 @@ int XMLTOKAPI XmlParseXmlDeclNS(int isGeneralTextEntity, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); diff --git a/expat/lib/xmltok_impl.c b/expat/lib/xmltok_impl.c index b46e4416..73617f74 100644 --- a/expat/lib/xmltok_impl.c +++ b/expat/lib/xmltok_impl.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #ifndef IS_INVALID_CHAR diff --git a/expat/lib/xmltok_impl.h b/expat/lib/xmltok_impl.h index eb92802a..da0ea60a 100644 --- a/expat/lib/xmltok_impl.h +++ b/expat/lib/xmltok_impl.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ enum { diff --git a/expat/lib/xmltok_ns.c b/expat/lib/xmltok_ns.c index 24278989..21859738 100644 --- a/expat/lib/xmltok_ns.c +++ b/expat/lib/xmltok_ns.c @@ -79,6 +79,7 @@ int NS(XmlParseXmlDecl)(int isGeneralTextEntity, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingName, const ENCODING **encoding, int *standalone) @@ -90,6 +91,7 @@ int NS(XmlParseXmlDecl)(int isGeneralTextEntity, end, badPtr, versionPtr, + versionEndPtr, encodingName, encoding, standalone); diff --git a/expat/xmltok/dllmain.c b/expat/xmltok/dllmain.c index d5aa4a3e..97cbcf5d 100755 --- a/expat/xmltok/dllmain.c +++ b/expat/xmltok/dllmain.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #define STRICT 1 diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index f7a5b2b2..ac4d939f 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include "xmldef.h" diff --git a/expat/xmlwf/codepage.c b/expat/xmlwf/codepage.c index fe7ab15d..4ab1344e 100755 --- a/expat/xmlwf/codepage.c +++ b/expat/xmlwf/codepage.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include "codepage.h" diff --git a/expat/xmlwf/codepage.h b/expat/xmlwf/codepage.h index b19a7f66..b36a5502 100755 --- a/expat/xmlwf/codepage.h +++ b/expat/xmlwf/codepage.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ int codepageMap(int cp, int *map); diff --git a/expat/xmlwf/filemap.h b/expat/xmlwf/filemap.h index a0a18474..ecf01751 100755 --- a/expat/xmlwf/filemap.h +++ b/expat/xmlwf/filemap.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ diff --git a/expat/xmlwf/readfilemap.c b/expat/xmlwf/readfilemap.c index 249af3ec..7829f612 100755 --- a/expat/xmlwf/readfilemap.c +++ b/expat/xmlwf/readfilemap.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include diff --git a/expat/xmlwf/unixfilemap.c b/expat/xmlwf/unixfilemap.c index 4944b02a..4016dd05 100755 --- a/expat/xmlwf/unixfilemap.c +++ b/expat/xmlwf/unixfilemap.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include diff --git a/expat/xmlwf/win32filemap.c b/expat/xmlwf/win32filemap.c index ec30ff65..7546fea8 100755 --- a/expat/xmlwf/win32filemap.c +++ b/expat/xmlwf/win32filemap.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #define STRICT 1 diff --git a/expat/xmlwf/xmlfile.c b/expat/xmlwf/xmlfile.c index eae9020a..ba79b23b 100755 --- a/expat/xmlwf/xmlfile.c +++ b/expat/xmlwf/xmlfile.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include diff --git a/expat/xmlwf/xmlfile.h b/expat/xmlwf/xmlfile.h index 0c7ac193..5d6bdce4 100755 --- a/expat/xmlwf/xmlfile.h +++ b/expat/xmlwf/xmlfile.h @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #define XML_MAP_FILE 01 diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index ccb9dec8..a69e36e9 100755 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -1,6 +1,6 @@ /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file copying.txt for copying permission. +See the file COPYING for copying permission. */ #include