From 508f4e99ee9fb55a843dfec3ff9bf05fbe874453 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 17 Jun 1998 10:04:31 +0000 Subject: [PATCH] Add default handler --- expat/xmlparse/xmlparse.c | 92 +++++++++++++++++++++++++++++++++++++-- expat/xmlparse/xmlparse.h | 22 ++++++++++ expat/xmlwf/xmlwf.c | 26 ++++++++--- 3 files changed, 131 insertions(+), 9 deletions(-) diff --git a/expat/xmlparse/xmlparse.c b/expat/xmlparse/xmlparse.c index e7488b98..388fbdba 100755 --- a/expat/xmlparse/xmlparse.c +++ b/expat/xmlparse/xmlparse.c @@ -166,6 +166,8 @@ static enum XML_Error storeEntityValue(XML_Parser parser, const char *start, const char *end); static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +static void +reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static const XML_Char *getOpenEntityNames(XML_Parser parser); static int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames); @@ -215,6 +217,7 @@ typedef struct { XML_EndElementHandler endElementHandler; XML_CharacterDataHandler characterDataHandler; XML_ProcessingInstructionHandler processingInstructionHandler; + XML_DefaultHandler defaultHandler; XML_UnparsedEntityDeclHandler unparsedEntityDeclHandler; XML_NotationDeclHandler notationDeclHandler; XML_ExternalEntityRefHandler externalEntityRefHandler; @@ -257,6 +260,7 @@ typedef struct { #define endElementHandler (((Parser *)parser)->endElementHandler) #define characterDataHandler (((Parser *)parser)->characterDataHandler) #define processingInstructionHandler (((Parser *)parser)->processingInstructionHandler) +#define defaultHandler (((Parser *)parser)->defaultHandler) #define unparsedEntityDeclHandler (((Parser *)parser)->unparsedEntityDeclHandler) #define notationDeclHandler (((Parser *)parser)->notationDeclHandler) #define externalEntityRefHandler (((Parser *)parser)->externalEntityRefHandler) @@ -314,6 +318,7 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) endElementHandler = 0; characterDataHandler = 0; processingInstructionHandler = 0; + defaultHandler = 0; unparsedEntityDeclHandler = 0; notationDeclHandler = 0; externalEntityRefHandler = 0; @@ -369,6 +374,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, XML_EndElementHandler oldEndElementHandler = endElementHandler; XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler; + XML_DefaultHandler oldDefaultHandler = defaultHandler; XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; void *oldUserData = userData; @@ -381,6 +387,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, endElementHandler = oldEndElementHandler; characterDataHandler = oldCharacterDataHandler; processingInstructionHandler = oldProcessingInstructionHandler; + defaultHandler = oldDefaultHandler; externalEntityRefHandler = oldExternalEntityRefHandler; unknownEncodingHandler = oldUnknownEncodingHandler; userData = oldUserData; @@ -475,6 +482,12 @@ void XML_SetProcessingInstructionHandler(XML_Parser parser, processingInstructionHandler = handler; } +void XML_SetDefaultHandler(XML_Parser parser, + XML_DefaultHandler handler) +{ + defaultHandler = handler; +} + void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { @@ -784,6 +797,8 @@ doContent(XML_Parser parser, XML_Char c = XML_T('\n'); characterDataHandler(handlerArg, &c, 1); } + else if (defaultHandler) + reportDefault(parser, enc, s, end); if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; if (tagLevel != startTagLevel) @@ -825,6 +840,8 @@ doContent(XML_Parser parser, if (ch) { if (characterDataHandler) characterDataHandler(handlerArg, &ch, 1); + else if (defaultHandler) + reportDefault(parser, enc, s, next); break; } name = poolStoreString(&dtd.pool, enc, @@ -837,6 +854,8 @@ doContent(XML_Parser parser, if (!entity) { if (dtd.complete || dtd.standalone) return XML_ERROR_UNDEFINED_ENTITY; + if (defaultHandler) + reportDefault(parser, enc, s, next); break; } if (entity->open) @@ -846,6 +865,10 @@ doContent(XML_Parser parser, if (entity) { if (entity->textPtr) { enum XML_Error result; + if (defaultHandler) { + reportDefault(parser, enc, s, next); + break; + } entity->open = 1; result = doContent(parser, tagLevel, @@ -867,6 +890,8 @@ doContent(XML_Parser parser, if (!externalEntityRefHandler(parser, openEntityNames, dtd.base, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } + else if (defaultHandler) + reportDefault(parser, enc, s, next); } break; } @@ -942,8 +967,11 @@ doContent(XML_Parser parser, startElementHandler(handlerArg, tag->name, (const XML_Char **)atts); poolClear(&tempPool); } - else + else { tag->name = 0; + if (defaultHandler) + reportDefault(parser, enc, s, next); + } break; } case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: @@ -972,6 +1000,8 @@ doContent(XML_Parser parser, endElementHandler(handlerArg, name); poolClear(&tempPool); } + else if (defaultHandler) + reportDefault(parser, enc, s, next); if (tagLevel == 0) return epilogProcessor(parser, next, end, nextPtr); break; @@ -1005,6 +1035,8 @@ doContent(XML_Parser parser, poolClear(&tempPool); } } + else if (defaultHandler) + reportDefault(parser, enc, s, next); if (tagLevel == 0) return epilogProcessor(parser, next, end, nextPtr); } @@ -1018,6 +1050,8 @@ doContent(XML_Parser parser, XML_Char buf[XML_ENCODE_MAX]; characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); } + else if (defaultHandler) + reportDefault(parser, enc, s, next); } break; case XML_TOK_XML_DECL: @@ -1027,10 +1061,15 @@ doContent(XML_Parser parser, XML_Char c = XML_T('\n'); characterDataHandler(handlerArg, &c, 1); } + else if (defaultHandler) + reportDefault(parser, enc, s, next); break; case XML_TOK_CDATA_SECT_OPEN: { - enum XML_Error result = doCdataSection(parser, enc, &next, end, nextPtr); + enum XML_Error result; + if (defaultHandler && !characterDataHandler) + reportDefault(parser, enc, s, next); + result = doCdataSection(parser, enc, &next, end, nextPtr); if (!next) { processor = cdataSectionProcessor; return result; @@ -1053,6 +1092,8 @@ doContent(XML_Parser parser, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); } + else if (defaultHandler) + reportDefault(parser, enc, s, end); if (startTagLevel == 0) { *eventPP = end; return XML_ERROR_NO_ELEMENTS; @@ -1076,11 +1117,17 @@ doContent(XML_Parser parser, (XML_Char *)s, (XML_Char *)next - (XML_Char *)s); } + else if (defaultHandler) + reportDefault(parser, enc, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; + default: + if (defaultHandler) + reportDefault(parser, enc, s, next); + break; } *eventPP = s = next; } @@ -1217,6 +1264,8 @@ enum XML_Error doCdataSection(XML_Parser parser, int tok = XmlCdataSectionTok(enc, s, end, &next); switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: + if (defaultHandler && !characterDataHandler) + reportDefault(parser, enc, s, next); *startPtr = next; return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: @@ -1224,6 +1273,8 @@ enum XML_Error doCdataSection(XML_Parser parser, XML_Char c = XML_T('\n'); characterDataHandler(handlerArg, &c, 1); } + else if (defaultHandler) + reportDefault(parser, enc, s, next); break; case XML_TOK_DATA_CHARS: if (characterDataHandler) { @@ -1239,6 +1290,8 @@ enum XML_Error doCdataSection(XML_Parser parser, (XML_Char *)s, (XML_Char *)next - (XML_Char *)s); } + else if (defaultHandler) + reportDefault(parser, enc, s, next); break; case XML_TOK_INVALID: *eventPP = next; @@ -1312,6 +1365,8 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, &newEncoding, &standalone)) return XML_ERROR_SYNTAX; + if (defaultHandler) + reportDefault(parser, encoding, s, next); if (!protocolEncodingName) { if (newEncoding) { if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { @@ -1667,6 +1722,16 @@ prologProcessor(XML_Parser parser, } break; } + if (defaultHandler) { + switch (tok) { + case XML_TOK_PI: + case XML_TOK_BOM: + case XML_TOK_XML_DECL: + break; + default: + reportDefault(parser, encoding, s, next); + } + } s = next; } /* not reached */ @@ -1685,12 +1750,17 @@ enum XML_Error epilogProcessor(XML_Parser parser, int tok = XmlPrologTok(encoding, s, end, &next); switch (tok) { case XML_TOK_TRAILING_CR: + if (defaultHandler) + reportDefault(parser, encoding, s, end); + /* fall through */ case XML_TOK_NONE: if (nextPtr) *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: case XML_TOK_COMMENT: + if (defaultHandler) + reportDefault(parser, encoding, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, encoding, s, next)) @@ -1962,8 +2032,11 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char * const XML_Char *target; XML_Char *data; const char *tem; - if (!processingInstructionHandler) + if (!processingInstructionHandler) { + if (defaultHandler) + reportDefault(parser, enc, start, end); return 1; + } start += enc->minBytesPerChar * 2; tem = start + XmlNameLength(enc, start); target = poolStoreString(&tempPool, enc, start, tem); @@ -1981,6 +2054,19 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char * return 1; } +static void +reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) +{ + if (MUST_CONVERT(enc, s)) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); + } + else + defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); +} + + static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value) { diff --git a/expat/xmlparse/xmlparse.h b/expat/xmlparse/xmlparse.h index f6efbe6f..ce00d502 100755 --- a/expat/xmlparse/xmlparse.h +++ b/expat/xmlparse/xmlparse.h @@ -68,6 +68,24 @@ typedef void (*XML_ProcessingInstructionHandler)(void *userData, const XML_Char *target, const XML_Char *data); +/* This is called for any characters in the XML document for +which there is no applicable handler. This includes both +characters that are part of markup which is of a kind that is +not reported (comments, markup declarations), or characters +that are part of a construct which could be reported but +for which no handler has been supplied. The characters are passed +exactly as they were in the XML document except that +they will be encoded in UTF-8. Line boundaries are not normalized. +Note that a byte order mark character is not passed to the default handler. +If a default handler is set, internal entity references +are not expanded. There are no guarantees about +how characters are divided between calls to the default handler: +for example, a comment might be split between multiple calls. */ + +typedef void (*XML_DefaultHandler)(void *userData, + const XML_Char *s, + int len); + /* This is called for a declaration of an unparsed (NDATA) entity. The base argument is whatever was set by XML_SetBase. The entityName, systemId and notationName arguments will never be null. @@ -197,6 +215,10 @@ void XMLPARSEAPI XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler); +void XMLPARSEAPI +XML_SetDefaultHandler(XML_Parser parser, + XML_DefaultHandler handler); + void XMLPARSEAPI XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index 0766bae1..8888232c 100755 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -172,6 +172,11 @@ static void processingInstruction(void *userData, const XML_Char *target, const puttc(T('>'), fp); } +static void markup(void *userData, const XML_Char *s, int len) +{ + for (; len > 0; --len, ++s) + puttc(*s, (FILE *)userData); +} static void metaLocation(XML_Parser parser) @@ -479,7 +484,7 @@ int tmain(int argc, XML_Char **argv) int useFilemap = 1; int processExternalEntities = 0; int windowsCodePages = 0; - int metaOutput = 0; + int outputType = 0; #ifdef _MSC_VER _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF); @@ -506,7 +511,11 @@ int tmain(int argc, XML_Char **argv) j++; } if (argv[i][j] == T('m')) { - metaOutput = 1; + outputType = 'm'; + j++; + } + if (argv[i][j] == T('c')) { + outputType = 'c'; j++; } if (argv[i][j] == T('d')) { @@ -562,7 +571,8 @@ int tmain(int argc, XML_Char **argv) puttc(0xFEFF, fp); #endif XML_SetUserData(parser, fp); - if (metaOutput) { + switch (outputType) { + case 'm': XML_UseParserAsHandlerArg(parser); fputts(T("\n"), fp); XML_SetElementHandler(parser, metaStartElement, metaEndElement); @@ -570,11 +580,15 @@ int tmain(int argc, XML_Char **argv) XML_SetCharacterDataHandler(parser, metaCharacterData); XML_SetUnparsedEntityDeclHandler(parser, metaUnparsedEntityDecl); XML_SetNotationDeclHandler(parser, metaNotationDecl); - } - else { + break; + case 'c': + XML_SetDefaultHandler(parser, markup); + break; + default: XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, characterData); XML_SetProcessingInstructionHandler(parser, processingInstruction); + break; } } if (windowsCodePages) @@ -598,7 +612,7 @@ int tmain(int argc, XML_Char **argv) else result = processStream(argv[i], parser); if (outputDir) { - if (metaOutput) + if (outputType == 'm') fputts(T("\n"), fp); fclose(fp); if (!result)