From 9f41c277f9c936c607b7d06b40daa88ee091e449 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 10 Dec 1997 07:44:19 +0000 Subject: [PATCH] Changes before release --- expat/expat.mak | 426 +++++++++++++++-- expat/xmltok/utf8tab.h | 4 +- expat/xmltok/xmlrole.c | 207 ++++++-- expat/xmltok/xmlrole.h | 24 +- expat/xmltok/xmltok.c | 490 ++++++++++++++++++- expat/xmltok/xmltok.h | 164 +++++-- expat/xmltok/xmltok_impl.c | 385 +++++++++++++-- expat/xmltok/xmltok_impl.h | 8 +- expat/xmlwf/readfilemap.c | 2 +- expat/xmlwf/wfcheck.c | 938 ++++++++++++++++++++++++++++++++----- expat/xmlwf/wfcheck.h | 22 +- expat/xmlwf/win32filemap.c | 39 +- expat/xmlwf/xmlwf.c | 61 +-- 13 files changed, 2388 insertions(+), 382 deletions(-) diff --git a/expat/expat.mak b/expat/expat.mak index 767969bb..59a0486b 100755 --- a/expat/expat.mak +++ b/expat/expat.mak @@ -5,18 +5,19 @@ # TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 !IF "$(CFG)" == "" -CFG=xmlwf - Win32 Debug -!MESSAGE No configuration specified. Defaulting to xmlwf - Win32 Debug. +CFG=gennmtab - Win32 Debug +!MESSAGE No configuration specified. Defaulting to gennmtab - Win32 Debug. !ENDIF !IF "$(CFG)" != "xmltok - Win32 Release" && "$(CFG)" != "xmltok - Win32 Debug"\ && "$(CFG)" != "xmlec - Win32 Release" && "$(CFG)" != "xmlec - Win32 Debug" &&\ - "$(CFG)" != "xmlwf - Win32 Release" && "$(CFG)" != "xmlwf - Win32 Debug" + "$(CFG)" != "xmlwf - Win32 Release" && "$(CFG)" != "xmlwf - Win32 Debug" &&\ + "$(CFG)" != "gennmtab - Win32 Release" && "$(CFG)" != "gennmtab - Win32 Debug" !MESSAGE Invalid configuration "$(CFG)" specified. !MESSAGE You can specify a configuration when running NMAKE on this makefile !MESSAGE by defining the macro CFG on the command line. For example: !MESSAGE -!MESSAGE NMAKE /f "xmltok.mak" CFG="xmlwf - Win32 Debug" +!MESSAGE NMAKE /f "xmltok.mak" CFG="gennmtab - Win32 Debug" !MESSAGE !MESSAGE Possible choices for configuration are: !MESSAGE @@ -26,6 +27,9 @@ CFG=xmlwf - Win32 Debug !MESSAGE "xmlec - Win32 Debug" (based on "Win32 (x86) Console Application") !MESSAGE "xmlwf - Win32 Release" (based on "Win32 (x86) Console Application") !MESSAGE "xmlwf - Win32 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "gennmtab - Win32 Release" (based on\ + "Win32 (x86) Console Application") +!MESSAGE "gennmtab - Win32 Debug" (based on "Win32 (x86) Console Application") !MESSAGE !ERROR An invalid configuration is specified. !ENDIF @@ -37,7 +41,7 @@ NULL=nul !ENDIF ################################################################################ # Begin Project -# PROP Target_Last_Scanned "xmlwf - Win32 Debug" +# PROP Target_Last_Scanned "gennmtab - Win32 Debug" !IF "$(CFG)" == "xmltok - Win32 Release" @@ -54,7 +58,7 @@ NULL=nul OUTDIR=.\Release INTDIR=.\Release -ALL : ".\bin\xmltok.dll" +ALL : "gennmtab - Win32 Release" ".\bin\xmltok.dll" CLEAN : -@erase "$(INTDIR)\dllmain.obj" @@ -63,15 +67,17 @@ CLEAN : -@erase "$(OUTDIR)\xmltok.exp" -@erase "$(OUTDIR)\xmltok.lib" -@erase ".\bin\xmltok.dll" + -@erase ".\nametab.h" "$(OUTDIR)" : if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" CPP=cl.exe # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c -# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c -CPP_PROJ=/nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\ - /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /c +# ADD CPP /nologo /MT /W3 /GX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D XMLTOKAPI=__declspec(dllexport) /YX /c +CPP_PROJ=/nologo /MT /W3 /GX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D\ + XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/"\ + /c CPP_OBJS=.\Release/ CPP_SBRS=.\. @@ -138,7 +144,7 @@ LINK32_OBJS= \ OUTDIR=.\Debug INTDIR=.\Debug -ALL : "$(OUTDIR)\xmltok.dll" +ALL : "gennmtab - Win32 Debug" "$(OUTDIR)\xmltok.dll" CLEAN : -@erase "$(INTDIR)\dllmain.obj" @@ -151,15 +157,17 @@ CLEAN : -@erase "$(OUTDIR)\xmltok.ilk" -@erase "$(OUTDIR)\xmltok.lib" -@erase "$(OUTDIR)\xmltok.pdb" + -@erase ".\nametab.h" "$(OUTDIR)" : if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" CPP=cl.exe # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c -# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c -CPP_PROJ=/nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS"\ - /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c +# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D XMLTOKAPI=__declspec(dllexport) /YX /c +CPP_PROJ=/nologo /MTd /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS"\ + /D XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX\ + /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c CPP_OBJS=.\Debug/ CPP_SBRS=.\. @@ -238,9 +246,10 @@ CLEAN : CPP=cl.exe # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c -# ADD CPP /nologo /W3 /GX /O2 /Ob2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /YX /c +# ADD CPP /nologo /W3 /GX /O2 /Ob2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c CPP_PROJ=/nologo /ML /W3 /GX /O2 /Ob2 /I "." /D "NDEBUG" /D "WIN32" /D\ - "_CONSOLE" /Fp"$(INTDIR)/xmlec.pch" /YX /Fo"$(INTDIR)/" /c + "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlec.pch" /YX\ + /Fo"$(INTDIR)/" /c CPP_OBJS=.\xmlec\Release/ CPP_SBRS=.\. @@ -317,9 +326,10 @@ CLEAN : CPP=cl.exe # ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c -# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /YX /c +# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D\ - "_CONSOLE" /Fp"$(INTDIR)/xmlec.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c + "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlec.pch" /YX\ + /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c CPP_OBJS=.\xmlec\Debug/ CPP_SBRS=.\. @@ -384,7 +394,9 @@ INTDIR=.\xmlwf\Release ALL : "xmltok - Win32 Release" ".\bin\xmlwf.exe" CLEAN : + -@erase "$(INTDIR)\hashtable.obj" -@erase "$(INTDIR)\wfcheck.obj" + -@erase "$(INTDIR)\wfcheckmessage.obj" -@erase "$(INTDIR)\win32filemap.obj" -@erase "$(INTDIR)\xmlwf.obj" -@erase ".\bin\xmlwf.exe" @@ -394,9 +406,10 @@ CLEAN : CPP=cl.exe # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c -# ADD CPP /nologo /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c -CPP_PROJ=/nologo /ML /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\ - /Fp"$(INTDIR)/xmlwf.pch" /YX /Fo"$(INTDIR)/" /c +# ADD CPP /nologo /W3 /GX /O2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c +CPP_PROJ=/nologo /ML /W3 /GX /O2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D\ + XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlwf.pch" /YX /Fo"$(INTDIR)/" /c\ + CPP_OBJS=.\xmlwf\Release/ CPP_SBRS=.\. @@ -435,7 +448,9 @@ LINK32_FLAGS=setargv.obj kernel32.lib user32.lib gdi32.lib winspool.lib\ odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no\ /pdb:"$(OUTDIR)/xmlwf.pdb" /machine:I386 /out:"bin/xmlwf.exe" LINK32_OBJS= \ + "$(INTDIR)\hashtable.obj" \ "$(INTDIR)\wfcheck.obj" \ + "$(INTDIR)\wfcheckmessage.obj" \ "$(INTDIR)\win32filemap.obj" \ "$(INTDIR)\xmlwf.obj" \ ".\Release\xmltok.lib" @@ -463,9 +478,11 @@ INTDIR=.\xmlwf\Debug ALL : "xmltok - Win32 Debug" ".\Debug\xmlwf.exe" CLEAN : + -@erase "$(INTDIR)\hashtable.obj" -@erase "$(INTDIR)\vc40.idb" -@erase "$(INTDIR)\vc40.pdb" -@erase "$(INTDIR)\wfcheck.obj" + -@erase "$(INTDIR)\wfcheckmessage.obj" -@erase "$(INTDIR)\win32filemap.obj" -@erase "$(INTDIR)\xmlwf.obj" -@erase "$(OUTDIR)\xmlwf.pdb" @@ -477,9 +494,10 @@ CLEAN : CPP=cl.exe # ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c -# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c -CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D\ - "_CONSOLE" /Fp"$(INTDIR)/xmlwf.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c +# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c +CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D\ + "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlwf.pch" /YX\ + /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c CPP_OBJS=.\xmlwf\Debug/ CPP_SBRS=.\. @@ -518,7 +536,9 @@ LINK32_FLAGS=setargv.obj kernel32.lib user32.lib gdi32.lib winspool.lib\ odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:yes\ /pdb:"$(OUTDIR)/xmlwf.pdb" /debug /machine:I386 /out:"Debug/xmlwf.exe" LINK32_OBJS= \ + "$(INTDIR)\hashtable.obj" \ "$(INTDIR)\wfcheck.obj" \ + "$(INTDIR)\wfcheckmessage.obj" \ "$(INTDIR)\win32filemap.obj" \ "$(INTDIR)\xmlwf.obj" \ ".\Debug\xmltok.lib" @@ -528,6 +548,159 @@ LINK32_OBJS= \ $(LINK32_FLAGS) $(LINK32_OBJS) << +!ELSEIF "$(CFG)" == "gennmtab - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "gennmtab\Release" +# PROP BASE Intermediate_Dir "gennmtab\Release" +# PROP BASE Target_Dir "gennmtab" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "gennmtab\Release" +# PROP Intermediate_Dir "gennmtab\Release" +# PROP Target_Dir "gennmtab" +OUTDIR=.\gennmtab\Release +INTDIR=.\gennmtab\Release + +ALL : "$(OUTDIR)\gennmtab.exe" + +CLEAN : + -@erase "$(INTDIR)\gennmtab.obj" + -@erase "$(OUTDIR)\gennmtab.exe" + +"$(OUTDIR)" : + if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" + +CPP=cl.exe +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c +# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c +CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\ + /Fp"$(INTDIR)/gennmtab.pch" /YX /Fo"$(INTDIR)/" /c +CPP_OBJS=.\gennmtab\Release/ +CPP_SBRS=.\. + +.c{$(CPP_OBJS)}.obj: + $(CPP) $(CPP_PROJ) $< + +.cpp{$(CPP_OBJS)}.obj: + $(CPP) $(CPP_PROJ) $< + +.cxx{$(CPP_OBJS)}.obj: + $(CPP) $(CPP_PROJ) $< + +.c{$(CPP_SBRS)}.sbr: + $(CPP) $(CPP_PROJ) $< + +.cpp{$(CPP_SBRS)}.sbr: + $(CPP) $(CPP_PROJ) $< + +.cxx{$(CPP_SBRS)}.sbr: + $(CPP) $(CPP_PROJ) $< + +RSC=rc.exe +# ADD BASE RSC /l 0x809 /d "NDEBUG" +# ADD RSC /l 0x809 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +BSC32_FLAGS=/nologo /o"$(OUTDIR)/gennmtab.bsc" +BSC32_SBRS= \ + +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +LINK32_FLAGS=kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib\ + advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib\ + odbccp32.lib /nologo /subsystem:console /incremental:no\ + /pdb:"$(OUTDIR)/gennmtab.pdb" /machine:I386 /out:"$(OUTDIR)/gennmtab.exe" +LINK32_OBJS= \ + "$(INTDIR)\gennmtab.obj" + +"$(OUTDIR)\gennmtab.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS) + $(LINK32) @<< + $(LINK32_FLAGS) $(LINK32_OBJS) +<< + +!ELSEIF "$(CFG)" == "gennmtab - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "gennmtab\Debug" +# PROP BASE Intermediate_Dir "gennmtab\Debug" +# PROP BASE Target_Dir "gennmtab" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "gennmtab\Debug" +# PROP Intermediate_Dir "gennmtab\Debug" +# PROP Target_Dir "gennmtab" +OUTDIR=.\gennmtab\Debug +INTDIR=.\gennmtab\Debug + +ALL : "$(OUTDIR)\gennmtab.exe" + +CLEAN : + -@erase "$(INTDIR)\gennmtab.obj" + -@erase "$(INTDIR)\vc40.idb" + -@erase "$(INTDIR)\vc40.pdb" + -@erase "$(OUTDIR)\gennmtab.exe" + -@erase "$(OUTDIR)\gennmtab.ilk" + -@erase "$(OUTDIR)\gennmtab.pdb" + +"$(OUTDIR)" : + if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" + +CPP=cl.exe +# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c +# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c +CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE"\ + /Fp"$(INTDIR)/gennmtab.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c +CPP_OBJS=.\gennmtab\Debug/ +CPP_SBRS=.\. + +.c{$(CPP_OBJS)}.obj: + $(CPP) $(CPP_PROJ) $< + +.cpp{$(CPP_OBJS)}.obj: + $(CPP) $(CPP_PROJ) $< + +.cxx{$(CPP_OBJS)}.obj: + $(CPP) $(CPP_PROJ) $< + +.c{$(CPP_SBRS)}.sbr: + $(CPP) $(CPP_PROJ) $< + +.cpp{$(CPP_SBRS)}.sbr: + $(CPP) $(CPP_PROJ) $< + +.cxx{$(CPP_SBRS)}.sbr: + $(CPP) $(CPP_PROJ) $< + +RSC=rc.exe +# ADD BASE RSC /l 0x809 /d "_DEBUG" +# ADD RSC /l 0x809 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +BSC32_FLAGS=/nologo /o"$(OUTDIR)/gennmtab.bsc" +BSC32_SBRS= \ + +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 +# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 +LINK32_FLAGS=kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib\ + advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib\ + odbccp32.lib /nologo /subsystem:console /incremental:yes\ + /pdb:"$(OUTDIR)/gennmtab.pdb" /debug /machine:I386\ + /out:"$(OUTDIR)/gennmtab.exe" +LINK32_OBJS= \ + "$(INTDIR)\gennmtab.obj" + +"$(OUTDIR)\gennmtab.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS) + $(LINK32) @<< + $(LINK32_FLAGS) $(LINK32_OBJS) +<< + !ENDIF ################################################################################ @@ -551,6 +724,7 @@ SOURCE=.\xmltok.c DEP_CPP_XMLTO=\ ".\asciitab.h"\ + ".\iasciitab.h"\ ".\latin1tab.h"\ ".\nametab.h"\ ".\utf8tab.h"\ @@ -560,15 +734,17 @@ DEP_CPP_XMLTO=\ # ADD CPP /Ob2 -"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" - $(CPP) /nologo /MT /W3 /GX /O2 /Ob2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\ - /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /c $(SOURCE) +"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" ".\nametab.h" + $(CPP) /nologo /MT /W3 /GX /O2 /Ob2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D\ + XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/"\ + /c $(SOURCE) !ELSEIF "$(CFG)" == "xmltok - Win32 Debug" DEP_CPP_XMLTO=\ ".\asciitab.h"\ + ".\iasciitab.h"\ ".\latin1tab.h"\ ".\nametab.h"\ ".\utf8tab.h"\ @@ -577,9 +753,10 @@ DEP_CPP_XMLTO=\ ".\xmltok_impl.h"\ -"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" - $(CPP) /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS"\ - /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c $(SOURCE) +"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" ".\nametab.h" + $(CPP) /nologo /MTd /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS"\ + /D XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX\ + /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c $(SOURCE) !ENDIF @@ -590,20 +767,9 @@ DEP_CPP_XMLTO=\ SOURCE=.\dllmain.c -!IF "$(CFG)" == "xmltok - Win32 Release" - - "$(INTDIR)\dllmain.obj" : $(SOURCE) "$(INTDIR)" -!ELSEIF "$(CFG)" == "xmltok - Win32 Debug" - - -"$(INTDIR)\dllmain.obj" : $(SOURCE) "$(INTDIR)" - - -!ENDIF - # End Source File ################################################################################ # Begin Source File @@ -630,6 +796,68 @@ DEP_CPP_XMLRO=\ "$(INTDIR)\xmlrole.obj" : $(SOURCE) $(DEP_CPP_XMLRO) "$(INTDIR)" +!ENDIF + +# End Source File +################################################################################ +# Begin Source File + +SOURCE=.\gennmtab\Release\gennmtab.exe + +!IF "$(CFG)" == "xmltok - Win32 Release" + +# Begin Custom Build - Generating nametab.h +InputPath=.\gennmtab\Release\gennmtab.exe + +"nametab.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(InputPath) >nametab.h + +# End Custom Build + +!ELSEIF "$(CFG)" == "xmltok - Win32 Debug" + +# PROP Exclude_From_Build 1 + +!ENDIF + +# End Source File +################################################################################ +# Begin Project Dependency + +# Project_Dep_Name "gennmtab" + +!IF "$(CFG)" == "xmltok - Win32 Release" + +"gennmtab - Win32 Release" : + $(MAKE) /$(MAKEFLAGS) /F ".\xmltok.mak" CFG="gennmtab - Win32 Release" + +!ELSEIF "$(CFG)" == "xmltok - Win32 Debug" + +"gennmtab - Win32 Debug" : + $(MAKE) /$(MAKEFLAGS) /F ".\xmltok.mak" CFG="gennmtab - Win32 Debug" + +!ENDIF + +# End Project Dependency +################################################################################ +# Begin Source File + +SOURCE=.\gennmtab\Debug\gennmtab.exe + +!IF "$(CFG)" == "xmltok - Win32 Release" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "xmltok - Win32 Debug" + +# Begin Custom Build - Generating nametab.h +InputPath=.\gennmtab\Debug\gennmtab.exe + +"nametab.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(InputPath) >nametab.h + +# End Custom Build + !ENDIF # End Source File @@ -670,6 +898,7 @@ DEP_CPP_XMLRO=\ SOURCE=.\xmlec\xmlec.c DEP_CPP_XMLEC=\ ".\xmltok.h"\ + {$(INCLUDE)}"\sys\TYPES.H"\ "$(INTDIR)\xmlec.obj" : $(SOURCE) $(DEP_CPP_XMLEC) "$(INTDIR)" @@ -712,9 +941,13 @@ DEP_CPP_XMLEC=\ # Begin Source File SOURCE=.\xmlwf\wfcheck.c + +!IF "$(CFG)" == "xmlwf - Win32 Release" + DEP_CPP_WFCHE=\ ".\xmlrole.h"\ ".\xmltok.h"\ + ".\xmlwf\hashtable.h"\ ".\xmlwf\wfcheck.h"\ @@ -722,11 +955,29 @@ DEP_CPP_WFCHE=\ $(CPP) $(CPP_PROJ) $(SOURCE) +!ELSEIF "$(CFG)" == "xmlwf - Win32 Debug" + +DEP_CPP_WFCHE=\ + ".\xmlrole.h"\ + ".\xmltok.h"\ + ".\xmlwf\hashtable.h"\ + ".\xmlwf\wfcheck.h"\ + + +"$(INTDIR)\wfcheck.obj" : $(SOURCE) $(DEP_CPP_WFCHE) "$(INTDIR)" + $(CPP) $(CPP_PROJ) $(SOURCE) + + +!ENDIF + # End Source File ################################################################################ # Begin Source File SOURCE=.\xmlwf\xmlwf.c + +!IF "$(CFG)" == "xmlwf - Win32 Release" + DEP_CPP_XMLWF=\ ".\xmlwf\filemap.h"\ ".\xmlwf\wfcheck.h"\ @@ -736,6 +987,19 @@ DEP_CPP_XMLWF=\ $(CPP) $(CPP_PROJ) $(SOURCE) +!ELSEIF "$(CFG)" == "xmlwf - Win32 Debug" + +DEP_CPP_XMLWF=\ + ".\xmlwf\filemap.h"\ + ".\xmlwf\wfcheck.h"\ + + +"$(INTDIR)\xmlwf.obj" : $(SOURCE) $(DEP_CPP_XMLWF) "$(INTDIR)" + $(CPP) $(CPP_PROJ) $(SOURCE) + + +!ENDIF + # End Source File ################################################################################ # Begin Source File @@ -749,6 +1013,88 @@ DEP_CPP_WIN32=\ $(CPP) $(CPP_PROJ) $(SOURCE) +# End Source File +################################################################################ +# Begin Source File + +SOURCE=.\xmlwf\hashtable.c +DEP_CPP_HASHT=\ + ".\xmlwf\hashtable.h"\ + + +"$(INTDIR)\hashtable.obj" : $(SOURCE) $(DEP_CPP_HASHT) "$(INTDIR)" + $(CPP) $(CPP_PROJ) $(SOURCE) + + +# End Source File +################################################################################ +# Begin Source File + +SOURCE=.\xmlwf\unixfilemap.c +DEP_CPP_UNIXF=\ + ".\xmlwf\filemap.h"\ + {$(INCLUDE)}"\sys\stat.h"\ + {$(INCLUDE)}"\sys\TYPES.H"\ + +# PROP Exclude_From_Build 1 +# End Source File +################################################################################ +# Begin Source File + +SOURCE=.\xmlwf\readfilemap.c +DEP_CPP_READF=\ + {$(INCLUDE)}"\sys\stat.h"\ + {$(INCLUDE)}"\sys\TYPES.H"\ + +# PROP Exclude_From_Build 1 +# End Source File +################################################################################ +# Begin Source File + +SOURCE=.\xmlwf\wfcheckmessage.c +DEP_CPP_WFCHEC=\ + ".\xmlwf\wfcheck.h"\ + + +"$(INTDIR)\wfcheckmessage.obj" : $(SOURCE) $(DEP_CPP_WFCHEC) "$(INTDIR)" + $(CPP) $(CPP_PROJ) $(SOURCE) + + +# End Source File +# End Target +################################################################################ +# Begin Target + +# Name "gennmtab - Win32 Release" +# Name "gennmtab - Win32 Debug" + +!IF "$(CFG)" == "gennmtab - Win32 Release" + +!ELSEIF "$(CFG)" == "gennmtab - Win32 Debug" + +!ENDIF + +################################################################################ +# Begin Source File + +SOURCE=.\gennmtab\gennmtab.c + +!IF "$(CFG)" == "gennmtab - Win32 Release" + + +"$(INTDIR)\gennmtab.obj" : $(SOURCE) "$(INTDIR)" + $(CPP) $(CPP_PROJ) $(SOURCE) + + +!ELSEIF "$(CFG)" == "gennmtab - Win32 Debug" + + +"$(INTDIR)\gennmtab.obj" : $(SOURCE) "$(INTDIR)" + $(CPP) $(CPP_PROJ) $(SOURCE) + + +!ENDIF + # End Source File # End Target # End Project diff --git a/expat/xmltok/utf8tab.h b/expat/xmltok/utf8tab.h index 80ab567c..f42ab31f 100755 --- a/expat/xmltok/utf8tab.h +++ b/expat/xmltok/utf8tab.h @@ -29,5 +29,5 @@ /* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, /* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, /* 0xF4 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, -/* 0xF8 */ BT_LEAD5, BT_LEAD5, BT_LEAD5, BT_LEAD5, -/* 0xFC */ BT_LEAD6, BT_LEAD6, BT_MALFORM, BT_MALFORM, +/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, +/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, diff --git a/expat/xmltok/xmlrole.c b/expat/xmltok/xmlrole.c index d681d31f..715fef29 100755 --- a/expat/xmltok/xmlrole.c +++ b/expat/xmltok/xmlrole.c @@ -1,6 +1,3 @@ -#ifdef _MSC_VER -#define XMLTOKAPI __declspec(dllexport) -#endif #include "xmlrole.h" /* Doesn't check: @@ -8,20 +5,21 @@ that ,| are not mixed in a model group content of literals -Separate handler for external internalSubsets - -Level 0 == outside DTD -Level 1 == in DTD internalSubset -Level 2 == in Declaration -Level 3 == in Group */ -PROLOG_HANDLER - prolog0, prolog1, +typedef int PROLOG_HANDLER(struct prolog_state *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc); + +static PROLOG_HANDLER + prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, entity3, entity4, entity5, entity6, - notation0, notation1, notation2, notation3, + entity7, entity8, entity9, + notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, attlist9, element0, element1, element2, element3, element4, element5, element6, @@ -29,10 +27,6 @@ PROLOG_HANDLER declClose, error; -PROLOG_HANDLER ignore, declParamEntityRef; - -PROLOG_HANDLER declParamEntityRef; - static int syntaxError(PROLOG_STATE *); @@ -44,9 +38,14 @@ int prolog0(PROLOG_STATE *state, const ENCODING *enc) { switch (tok) { - case XML_TOK_BOM: case XML_TOK_PI: + state->handler = prolog1; + if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) + return XML_ROLE_XML_DECL; + return XML_ROLE_NONE; case XML_TOK_COMMENT: + state->handler = prolog1; + case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, @@ -55,10 +54,7 @@ int prolog0(PROLOG_STATE *state, break; state->handler = doctype0; return XML_ROLE_NONE; - case XML_TOK_START_TAG_WITH_ATTS: - case XML_TOK_START_TAG_NO_ATTS: - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - case XML_TOK_EMPTY_ELEMENT_NO_ATTS: + case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } @@ -74,12 +70,39 @@ int prolog1(PROLOG_STATE *state, { switch (tok) { case XML_TOK_PI: + if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) + return syntaxError(state); + case XML_TOK_COMMENT: + case XML_TOK_BOM: + return XML_ROLE_NONE; + case XML_TOK_DECL_OPEN: + if (!XmlNameMatchesAscii(enc, + ptr + 2 * enc->minBytesPerChar, + "DOCTYPE")) + break; + state->handler = doctype0; + return XML_ROLE_NONE; + case XML_TOK_INSTANCE_START: + state->handler = error; + return XML_ROLE_INSTANCE_START; + } + return syntaxError(state); +} + +static +int prolog2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PI: + if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) + return syntaxError(state); case XML_TOK_COMMENT: return XML_ROLE_NONE; - case XML_TOK_START_TAG_WITH_ATTS: - case XML_TOK_START_TAG_NO_ATTS: - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - case XML_TOK_EMPTY_ELEMENT_NO_ATTS: + case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } @@ -101,6 +124,7 @@ int doctype0(PROLOG_STATE *state, return syntaxError(state); } +static int doctype1(PROLOG_STATE *state, int tok, const char *ptr, @@ -112,7 +136,7 @@ int doctype1(PROLOG_STATE *state, state->handler = internalSubset; return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = prolog1; + state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { @@ -128,6 +152,7 @@ int doctype1(PROLOG_STATE *state, return syntaxError(state); } +static int doctype2(PROLOG_STATE *state, int tok, const char *ptr, @@ -142,6 +167,7 @@ int doctype2(PROLOG_STATE *state, return syntaxError(state); } +static int doctype3(PROLOG_STATE *state, int tok, const char *ptr, @@ -156,6 +182,7 @@ int doctype3(PROLOG_STATE *state, return syntaxError(state); } +static int doctype4(PROLOG_STATE *state, int tok, const char *ptr, @@ -167,12 +194,13 @@ int doctype4(PROLOG_STATE *state, state->handler = internalSubset; return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = prolog1; + state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } return syntaxError(state); } +static int doctype5(PROLOG_STATE *state, int tok, const char *ptr, @@ -181,7 +209,7 @@ int doctype5(PROLOG_STATE *state, { switch (tok) { case XML_TOK_DECL_CLOSE: - state->handler = prolog1; + state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } return syntaxError(state); @@ -189,10 +217,10 @@ int doctype5(PROLOG_STATE *state, static int internalSubset(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_DECL_OPEN: @@ -204,14 +232,14 @@ int internalSubset(PROLOG_STATE *state, } if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, - "ELEMENT")) { - state->handler = element0; + "ATTLIST")) { + state->handler = attlist0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, - "ATTLIST")) { - state->handler = attlist0; + "ELEMENT")) { + state->handler = element0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, @@ -222,6 +250,8 @@ int internalSubset(PROLOG_STATE *state, } break; case XML_TOK_PI: + if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml")) + return syntaxError(state); case XML_TOK_COMMENT: case XML_TOK_PARAM_ENTITY_REF: return XML_ROLE_NONE; @@ -259,7 +289,7 @@ int entity1(PROLOG_STATE *state, { switch (tok) { case XML_TOK_NAME: - state->handler = entity2; + state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; } return syntaxError(state); @@ -357,6 +387,61 @@ int entity6(PROLOG_STATE *state, return syntaxError(state); } +static +int entity7(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_NAME: + if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + state->handler = entity9; + return XML_ROLE_NONE; + } + if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + state->handler = entity8; + return XML_ROLE_NONE; + } + break; + case XML_TOK_LITERAL: + state->handler = declClose; + return XML_ROLE_ENTITY_VALUE; + } + return syntaxError(state); +} + +static +int entity8(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_LITERAL: + state->handler = entity9; + return XML_ROLE_ENTITY_PUBLIC_ID; + } + return syntaxError(state); +} + +static +int entity9(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_LITERAL: + state->handler = declClose; + return XML_ROLE_ENTITY_SYSTEM_ID; + } + return syntaxError(state); +} + static int notation0(PROLOG_STATE *state, int tok, @@ -403,7 +488,7 @@ int notation2(PROLOG_STATE *state, { switch (tok) { case XML_TOK_LITERAL: - state->handler = notation3; + state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; } return syntaxError(state); @@ -424,6 +509,24 @@ int notation3(PROLOG_STATE *state, return syntaxError(state); } +static +int notation4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_LITERAL: + state->handler = declClose; + return XML_ROLE_NOTATION_SYSTEM_ID; + case XML_TOK_DECL_CLOSE: + state->handler = internalSubset; + return XML_ROLE_NONE; + } + return syntaxError(state); +} + static int attlist0(PROLOG_STATE *state, int tok, @@ -478,7 +581,7 @@ int attlist2(PROLOG_STATE *state, "NMTOKENS", }; int i; - for (i = 0; i < sizeof(types)/sizeof(types[0]); i++) + for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) if (XmlNameMatchesAscii(enc, ptr, types[i])) { state->handler = attlist8; return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; @@ -665,7 +768,8 @@ int element1(PROLOG_STATE *state, break; case XML_TOK_OPEN_PAREN: state->handler = element2; - return XML_ROLE_CONTENT_GROUP_OPEN; + state->level = 1; + return XML_ROLE_GROUP_OPEN; } return syntaxError(state); } @@ -689,21 +793,17 @@ int element2(PROLOG_STATE *state, case XML_TOK_OPEN_PAREN: state->level = 2; state->handler = element6; - return XML_ROLE_CONTENT_GROUP_OPEN; + return XML_ROLE_GROUP_OPEN; case XML_TOK_NAME: - state->level = 1; state->handler = element7; return XML_ROLE_CONTENT_ELEMENT; case XML_TOK_NAME_QUESTION: - state->level = 1; state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_OPT; case XML_TOK_NAME_ASTERISK: - state->level = 1; state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_REP; case XML_TOK_NAME_PLUS: - state->level = 1; state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } @@ -721,7 +821,7 @@ int element3(PROLOG_STATE *state, case XML_TOK_CLOSE_PAREN: case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; - return XML_ROLE_END_GROUP_REP; + return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; return XML_ROLE_NONE; @@ -754,7 +854,7 @@ int element5(PROLOG_STATE *state, switch (tok) { case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; - return XML_ROLE_END_GROUP_REP; + return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; return XML_ROLE_NONE; @@ -772,7 +872,7 @@ int element6(PROLOG_STATE *state, switch (tok) { case XML_TOK_OPEN_PAREN: state->level += 1; - return XML_ROLE_CONTENT_GROUP_OPEN; + return XML_ROLE_GROUP_OPEN; case XML_TOK_NAME: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT; @@ -801,22 +901,22 @@ int element7(PROLOG_STATE *state, state->level -= 1; if (state->level == 0) state->handler = declClose; - return XML_ROLE_END_GROUP; + return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->level -= 1; if (state->level == 0) state->handler = declClose; - return XML_ROLE_END_GROUP_REP; + return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_CLOSE_PAREN_QUESTION: state->level -= 1; if (state->level == 0) state->handler = declClose; - return XML_ROLE_END_GROUP_OPT; + return XML_ROLE_GROUP_CLOSE_OPT; case XML_TOK_CLOSE_PAREN_PLUS: state->level -= 1; if (state->level == 0) state->handler = declClose; - return XML_ROLE_END_GROUP_PLUS; + return XML_ROLE_GROUP_CLOSE_PLUS; case XML_TOK_COMMA: state->handler = element6; return XML_ROLE_GROUP_SEQUENCE; @@ -842,6 +942,8 @@ int declClose(PROLOG_STATE *state, return syntaxError(state); } +#if 0 + static int ignore(PROLOG_STATE *state, int tok, @@ -858,6 +960,7 @@ int ignore(PROLOG_STATE *state, } return syntaxError(state); } +#endif static int error(PROLOG_STATE *state, diff --git a/expat/xmltok/xmlrole.h b/expat/xmltok/xmlrole.h index 6622a083..ce24bfd9 100755 --- a/expat/xmltok/xmlrole.h +++ b/expat/xmltok/xmlrole.h @@ -10,6 +10,8 @@ extern "C" { enum { XML_ROLE_ERROR = -1, XML_ROLE_NONE = 0, + XML_ROLE_XML_DECL, + XML_ROLE_INSTANCE_START, XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_PUBLIC_ID, @@ -43,11 +45,11 @@ enum { XML_ROLE_CONTENT_ANY, XML_ROLE_CONTENT_EMPTY, XML_ROLE_CONTENT_PCDATA, - XML_ROLE_CONTENT_GROUP_OPEN, - XML_ROLE_END_GROUP, - XML_ROLE_END_GROUP_REP, - XML_ROLE_END_GROUP_OPT, - XML_ROLE_END_GROUP_PLUS, + XML_ROLE_GROUP_OPEN, + XML_ROLE_GROUP_CLOSE, + XML_ROLE_GROUP_CLOSE_REP, + XML_ROLE_GROUP_CLOSE_OPT, + XML_ROLE_GROUP_CLOSE_PLUS, XML_ROLE_GROUP_CHOICE, XML_ROLE_GROUP_SEQUENCE, XML_ROLE_CONTENT_ELEMENT, @@ -56,14 +58,12 @@ enum { XML_ROLE_CONTENT_ELEMENT_PLUS }; -typedef int PROLOG_HANDLER(struct prolog_state *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); - typedef struct prolog_state { - PROLOG_HANDLER *handler; + int (*handler)(struct prolog_state *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc); unsigned level; } PROLOG_STATE; diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index e846cdfc..8f89da35 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -1,10 +1,24 @@ -#ifdef _MSC_VER -#define XMLTOKAPI __declspec(dllexport) -#endif - #include "xmltok.h" #include "nametab.h" +#define VTABLE1 \ + { PREFIX(prologTok), PREFIX(contentTok) }, \ + { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ + PREFIX(sameName), \ + PREFIX(nameMatchesAscii), \ + PREFIX(nameLength), \ + PREFIX(getAtts), \ + PREFIX(charRefNumber), \ + PREFIX(updatePosition), \ + PREFIX(isPublicId), \ + PREFIX(isSystemId) + +#define VTABLE2 \ + PREFIX(encode), \ + { PREFIX(toUtf8) } + +#define VTABLE VTABLE1, VTABLE2 + #define UCS2_GET_NAMING(pages, hi, lo) \ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) @@ -37,18 +51,22 @@ We need 8 bits to index into pages, 3 bits to add to that index and ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) - -#include "xmltok_impl.h" - struct normal_encoding { ENCODING enc; unsigned char type[256]; }; +static const struct normal_encoding latin1_encoding; + +#define latin1tab (latin1_encoding.type) + +#include "xmltok_impl.h" + /* minimum bytes per character */ #define MINBPC 1 #define BYTE_TYPE(enc, p) \ (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) +#define BYTE_TO_ASCII(enc, p) (*p) #define IS_NAME_CHAR(enc, p, n) UTF8_GET_NAMING(namePages, p, n) #define IS_NMSTRT_CHAR(enc, p, n) UTF8_GET_NAMING(nmstrtPages, p, n) @@ -60,23 +78,136 @@ struct normal_encoding { #undef MINBPC #undef BYTE_TYPE +#undef BYTE_TO_ASCII #undef CHAR_MATCHES #undef IS_NAME_CHAR #undef IS_NMSTRT_CHAR -const struct normal_encoding utf8_encoding = { - { { PREFIX(prologTok), PREFIX(contentTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii), PREFIX(getAtts), PREFIX(updatePosition), 1 }, +enum { + // cvalN is value of masked first byte of N byte sequence + cval1 = 0x00, + cval2 = 0xc0, + cval3 = 0xe0, + cval4 = 0xf0, + // minN is minimum legal resulting value for N byte sequence + min2 = 0x80, + min3 = 0x800, + min4 = 0x10000 +}; + +static +int utf8_encode(const ENCODING *enc, int c, char *buf) +{ + if (c < 0) + return 0; + if (c < min2) { + buf[0] = (c | cval1); + return 1; + } + if (c < min3) { + buf[0] = ((c >> 6) | cval2); + buf[1] = ((c & 0x3f) | 0x80); + return 2; + } + if (c < min4) { + buf[0] = ((c >> 12) | cval3); + buf[1] = (((c >> 6) & 0x3f) | 0x80); + buf[2] = ((c & 0x3f) | 0x80); + return 3; + } + if (c < 0x110000) { + buf[0] = ((c >> 18) | cval4); + buf[1] = (((c >> 12) & 0x3f) | 0x80); + buf[2] = (((c >> 6) & 0x3f) | 0x80); + buf[3] = ((c & 0x3f) | 0x80); + return 3; + } + return 0; +} + +static +void utf8_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) +{ + char *to; + const char *from; + if (fromLim - *fromP > toLim - *toP) { + /* Avoid copying partial characters. */ + for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) + if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) + break; + } + for (to = *toP, from = *fromP; from != fromLim; from++, to++) + *to = *from; + *fromP = from; + *toP = to; +} + +static const struct normal_encoding utf8_encoding = { + { VTABLE1, utf8_encode, { utf8_toUtf8 }, 1 }, + { #include "asciitab.h" #include "utf8tab.h" + } }; -#undef PREFIX +static const struct normal_encoding internal_utf8_encoding = { + { VTABLE1, utf8_encode, { utf8_toUtf8 }, 1 }, + { +#include "iasciitab.h" +#include "utf8tab.h" + } +}; -static unsigned char latin1tab[256] = { +static +int latin1_encode(const ENCODING *enc, int c, char *buf) +{ + if (c < 0) + return 0; + if (c <= 0xFF) { + buf[0] = (char)c; + return 1; + } + return 0; +} + +static +void latin1_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) +{ + for (;;) { + unsigned char c; + if (*fromP == fromLim) + break; + c = (unsigned char)**fromP; + if (c & 0x80) { + if (toLim - *toP < 2) + break; + *(*toP)++ = ((c >> 6) | cval2); + *(*toP)++ = ((c & 0x3f) | 0x80); + } + else { + if (*toP == toLim) + break; + *(*toP)++ = *(*fromP)++; + } + } +} + +static const struct normal_encoding latin1_encoding = { + { VTABLE1, latin1_encode, { latin1_toUtf8 }, 1 }, + { #include "asciitab.h" #include "latin1tab.h" + } }; +#define latin1tab (latin1_encoding.type) + +#undef PREFIX + static int unicode_byte_type(char hi, char lo) { switch ((unsigned char)hi) { @@ -95,10 +226,81 @@ static int unicode_byte_type(char hi, char lo) return BT_NONASCII; } +#define DEFINE_UTF16_ENCODE \ +static \ +int PREFIX(encode)(const ENCODING *enc, int charNum, char *buf) \ +{ \ + if (charNum < 0) \ + return 0; \ + if (charNum < 0x10000) { \ + SET2(buf, charNum); \ + return 2; \ + } \ + if (charNum < 0x110000) { \ + charNum -= 0x10000; \ + SET2(buf, (charNum >> 10) + 0xD800); \ + SET2(buf + 2, (charNum & 0x3FF) + 0xDC00); \ + return 4; \ + } \ + return 0; \ +} + +#define DEFINE_UTF16_TO_UTF8 \ +static \ +void PREFIX(toUtf8)(const ENCODING *enc, \ + const char **fromP, const char *fromLim, \ + char **toP, const char *toLim) \ +{ \ + const char *from; \ + for (from = *fromP; from != fromLim; from += 2) { \ + unsigned char lo2; \ + unsigned char lo = GET_LO(from); \ + unsigned char hi = GET_HI(from); \ + switch (hi) { \ + case 0: \ + if (*toP == toLim) \ + return; \ + *(*toP)++ = lo; \ + break; \ + case 0x1: case 0x2: case 0x3: \ + case 0x4: case 0x5: case 0x6: case 0x7: \ + if (toLim - *toP < 2) \ + return; \ + *(*toP)++ = ((lo >> 6) | (hi << 2) | cval2); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + default: \ + if (toLim - *toP < 3) \ + return; \ + /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ + *(*toP)++ = ((hi >> 4) | cval3); \ + *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ + if (toLim - *toP < 4) \ + return; \ + /* IIIIIIWW XXXXXXYY IIIIIIYY YYZZZZZ => */ \ + /* JJJJJJWW JJXXXXXX JJYYYYYY JJZZZZZ */ \ + *(*toP)++ = ((hi & 0x3) | cval4); \ + *(*toP)++ = ((lo >> 2) | 0x80); \ + from += 2; \ + lo2 = GET_LO(from); \ + *(*toP)++ = (((lo & 0x3) << 4) \ + | ((GET_HI(from) & 0x3) << 2) \ + | (lo2 >> 6) \ + | 0x80); \ + *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ + break; \ + } \ + } \ +} + #define PREFIX(ident) little2_ ## ident #define MINBPC 2 #define BYTE_TYPE(enc, p) \ ((p)[1] == 0 ? latin1tab[(unsigned char)*(p)] : unicode_byte_type((p)[1], (p)[0])) +#define BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) #define CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) #define IS_NAME_CHAR(enc, p, n) \ UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) @@ -107,15 +309,25 @@ static int unicode_byte_type(char hi, char lo) #include "xmltok_impl.c" +#define SET2(ptr, ch) \ + (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) +#define GET_LO(ptr) ((unsigned char)(ptr)[0]) +#define GET_HI(ptr) ((unsigned char)(ptr)[1]) + +DEFINE_UTF16_ENCODE +DEFINE_UTF16_TO_UTF8 + +#undef SET2 +#undef GET_LO +#undef GET_HI #undef MINBPC #undef BYTE_TYPE +#undef BYTE_TO_ASCII #undef CHAR_MATCHES #undef IS_NAME_CHAR #undef IS_NMSTRT_CHAR -const struct encoding little2_encoding = { - { PREFIX(prologTok), PREFIX(contentTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii), PREFIX(getAtts), PREFIX(updatePosition), 2 -}; +static const struct encoding little2_encoding = { VTABLE, 2 }; #undef PREFIX @@ -124,6 +336,7 @@ const struct encoding little2_encoding = { /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ #define BYTE_TYPE(enc, p) \ ((p)[0] == 0 ? latin1tab[(unsigned char)(p)[1]] : unicode_byte_type((p)[0], (p)[1])) +#define BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) #define CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) #define IS_NAME_CHAR(enc, p, n) \ UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) @@ -132,15 +345,25 @@ const struct encoding little2_encoding = { #include "xmltok_impl.c" +#define SET2(ptr, ch) \ + (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) +#define GET_LO(ptr) ((unsigned char)(ptr)[1]) +#define GET_HI(ptr) ((unsigned char)(ptr)[0]) + +DEFINE_UTF16_ENCODE +DEFINE_UTF16_TO_UTF8 + +#undef SET2 +#undef GET_LO +#undef GET_HI #undef MINBPC #undef BYTE_TYPE +#undef BYTE_TO_ASCII #undef CHAR_MATCHES #undef IS_NAME_CHAR #undef IS_NMSTRT_CHAR -const struct encoding big2_encoding = { - { PREFIX(prologTok), PREFIX(contentTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii), PREFIX(getAtts), PREFIX(updatePosition), 2 -}; +static const struct encoding big2_encoding = { VTABLE, 2 }; #undef PREFIX @@ -205,6 +428,15 @@ void initUpdatePosition(const ENCODING *enc, const char *ptr, normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); } +const ENCODING *XmlGetInternalEncoding(int e) +{ + switch (e) { + case XML_UTF8_ENCODING: + return &internal_utf8_encoding.enc; + } + return 0; +} + void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr) { p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog; @@ -214,3 +446,227 @@ void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr) p->encPtr = encPtr; *encPtr = &(p->initEnc); } + +static +int toAscii(const ENCODING *enc, const char *ptr, const char *end) +{ + char buf[1]; + char *p = buf; + XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &p, p + 1); + if (p == buf) + return -1; + else + return buf[0]; +} + +static +int isSpace(int c) +{ + switch (c) { + case ' ': + case '\r': + case '\n': + case '\t': + return 1; + } + return 0; +} + +/* Return 1 if there's just optional white space +or there's an S followed by name=val. */ +static +int parsePseudoAttribute(const ENCODING *enc, + const char *ptr, + const char *end, + const char **namePtr, + const char **valPtr, + const char **nextTokPtr) +{ + int c; + char open; + if (ptr == end) { + *namePtr = 0; + return 1; + } + if (!isSpace(toAscii(enc, ptr, end))) { + *nextTokPtr = ptr; + return 0; + } + do { + ptr += enc->minBytesPerChar; + } while (isSpace(toAscii(enc, ptr, end))); + if (ptr == end) { + *namePtr = 0; + return 1; + } + *namePtr = ptr; + for (;;) { + c = toAscii(enc, ptr, end); + if (c == -1) { + *nextTokPtr = ptr; + return 0; + } + if (c == '=') + break; + if (isSpace(c)) { + do { + ptr += enc->minBytesPerChar; + } while (isSpace(c = toAscii(enc, ptr, end))); + if (c != '=') { + *nextTokPtr = ptr; + return 0; + } + break; + } + ptr += enc->minBytesPerChar; + } + if (ptr == *namePtr) { + *nextTokPtr = ptr; + return 0; + } + ptr += enc->minBytesPerChar; + c = toAscii(enc, ptr, end); + while (isSpace(c)) { + ptr += enc->minBytesPerChar; + c = toAscii(enc, ptr, end); + } + if (c != '"' && c != '\'') { + *nextTokPtr = ptr; + return 0; + } + open = c; + ptr += enc->minBytesPerChar; + *valPtr = ptr; + for (;; ptr += enc->minBytesPerChar) { + c = toAscii(enc, ptr, end); + if (c == open) + break; + if (!('a' <= c && c <= 'z') + && !('A' <= c && c <= 'Z') + && !('0' <= c && c <= '9') + && c != '.' + && c != '-' + && c != '_') { + *nextTokPtr = ptr; + return 0; + } + } + *nextTokPtr = ptr + enc->minBytesPerChar; + return 1; +} + +static +int streq(const char *s1, const char *s2) +{ + for (; *s1 == *s2; s1++, s2++) + if (!*s1) + return 1; + return 0; +} + +static +const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end) +{ +#define ENCODING_MAX 128 + char buf[ENCODING_MAX]; + char *p = buf; + int i; + XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &p, p + ENCODING_MAX - 1); + if (ptr != end) + return 0; + *p = 0; + for (i = 0; buf[i]; i++) { + if ('a' <= buf[i] && buf[i] <= 'z') + buf[i] += 'A' - 'a'; + } + if (streq(buf, "UTF-8")) + return &utf8_encoding.enc; + if (streq(buf, "ISO-8859-1")) + return &latin1_encoding.enc; + if (streq(buf, "UTF-16")) { + static const unsigned short n = 1; + if (enc->minBytesPerChar == 2) + return enc; + if (*(const char *)&n) + return &little2_encoding; + else + return &big2_encoding; + } + return 0; +} + +int XmlParseXmlDecl(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) +{ + const char *val = 0; + const char *name = 0; + ptr += 5 * enc->minBytesPerChar; + end -= 2 * enc->minBytesPerChar; + if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) { + *badPtr = ptr; + return 0; + } + if (!XmlNameMatchesAscii(enc, name, "version")) { + if (!isGeneralTextEntity) { + *badPtr = name; + return 0; + } + } + else { + if (versionPtr) + *versionPtr = val; + if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { + *badPtr = ptr; + return 0; + } + if (!name) + return 1; + } + if (XmlNameMatchesAscii(enc, name, "encoding")) { + int c = toAscii(enc, val, end); + if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) { + *badPtr = val; + return 0; + } + if (encodingName) + *encodingName = val; + if (encoding) + *encoding = findEncoding(enc, val, ptr - enc->minBytesPerChar); + if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { + *badPtr = ptr; + return 0; + } + if (!name) + return 1; + } + if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) { + *badPtr = name; + return 0; + } + if (XmlNameMatchesAscii(enc, val, "yes")) { + if (standalone) + *standalone = 1; + } + else if (XmlNameMatchesAscii(enc, val, "no")) { + if (standalone) + *standalone = 0; + } + else { + *badPtr = val; + return 0; + } + while (isSpace(toAscii(enc, ptr, end))) + ptr += enc->minBytesPerChar; + if (ptr != end) { + *badPtr = ptr; + return 0; + } + return 1; +} diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index 37af9f91..3608a2a8 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -10,35 +10,34 @@ extern "C" { #endif /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_NONE -3 /* The string to be scanned is empty */ +#define XML_TOK_NONE -4 /* The string to be scanned is empty */ +#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; + might be part of CRLF sequence */ #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ #define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 -/* The following token is returned by XmlPrologTok when it detects the end -of the prolog and is also returned by XmlContentTok */ +/* The following tokens are returned by XmlContentTok; some are also + returned by XmlAttributeValueTok and XmlEntityTok */ #define XML_TOK_START_TAG_WITH_ATTS 1 #define XML_TOK_START_TAG_NO_ATTS 2 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag */ #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 - -/* The following tokens are returned only by XmlContentTok */ - #define XML_TOK_END_TAG 5 #define XML_TOK_DATA_CHARS 6 -#define XML_TOK_CDATA_SECTION 7 -#define XML_TOK_ENTITY_REF 8 -#define XML_TOK_CHAR_REF 9 /* numeric character reference */ +#define XML_TOK_DATA_NEWLINE 7 +#define XML_TOK_CDATA_SECTION 8 +#define XML_TOK_ENTITY_REF 9 +#define XML_TOK_CHAR_REF 10 /* numeric character reference */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_PI 10 /* processing instruction */ -#define XML_TOK_COMMENT 11 -#define XML_TOK_BOM 12 /* Byte order mark */ +#define XML_TOK_PI 11 /* processing instruction */ +#define XML_TOK_COMMENT 12 +#define XML_TOK_BOM 13 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ -#define XML_TOK_LITERAL 13 -#define XML_TOK_PARAM_ENTITY_REF 14 +#define XML_TOK_INSTANCE_START 14 #define XML_TOK_PROLOG_S 15 #define XML_TOK_DECL_OPEN 16 /* */ @@ -51,47 +50,89 @@ of the prolog and is also returned by XmlContentTok */ #define XML_TOK_CLOSE_PAREN 24 #define XML_TOK_OPEN_BRACKET 25 #define XML_TOK_CLOSE_BRACKET 26 -/* The following occur only in element type declarations */ -#define XML_TOK_COMMA 27 -#define XML_TOK_CLOSE_PAREN_QUESTION 28 /* )? */ -#define XML_TOK_CLOSE_PAREN_ASTERISK 29 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 30 /* )+ */ -#define XML_TOK_NAME_QUESTION 31 /* name? */ -#define XML_TOK_NAME_ASTERISK 32 /* name* */ -#define XML_TOK_NAME_PLUS 33 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 34 /* */ +#define XML_TOK_LITERAL 27 +#define XML_TOK_PARAM_ENTITY_REF 28 -#define XML_NSTATES 2 +/* The following occur only in element type declarations */ +#define XML_TOK_COMMA 29 +#define XML_TOK_NAME_QUESTION 30 /* name? */ +#define XML_TOK_NAME_ASTERISK 31 /* name* */ +#define XML_TOK_NAME_PLUS 32 /* name+ */ +#define XML_TOK_COND_SECT_OPEN 33 /* */ +#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ +#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ +#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ + + +#define XML_N_STATES 2 #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 +#define XML_N_LITERAL_TYPES 2 +#define XML_ATTRIBUTE_VALUE_LITERAL 0 +#define XML_ENTITY_VALUE_LITERAL 1 + +#define XML_N_INTERNAL_ENCODINGS 1 +#define XML_UTF8_ENCODING 0 +#if 0 +#define XML_UTF16_ENCODING 1 +#define XML_UCS4_ENCODING 2 +#endif + +#define XML_MAX_BYTES_PER_CHAR 4 + typedef struct position { /* first line and first column are 0 not 1 */ unsigned long lineNumber; unsigned long columnNumber; - /* if the last character counted was CR, then an immediately - following LF should be ignored */ - int ignoreInitialLF; } POSITION; -typedef struct encoding { - int (*scanners[XML_NSTATES])(const struct encoding *, - const char *, - const char *, - const char **); - int (*sameName)(const struct encoding *, +typedef struct { + const char *name; + const char *valuePtr; + const char *valueEnd; + char containsRef; +} ATTRIBUTE; + +struct encoding; +typedef struct encoding ENCODING; + +struct encoding { + int (*scanners[XML_N_STATES])(const ENCODING *, + const char *, + const char *, + const char **); + int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *, + const char *, + const char *, + const char **); + int (*sameName)(const ENCODING *, const char *, const char *); - int (*nameMatchesAscii)(const struct encoding *, + int (*nameMatchesAscii)(const ENCODING *, const char *, const char *); - int (*getAtts)(const struct encoding *enc, const char *ptr, - int attsMax, const char **atts); - void (*updatePosition)(const struct encoding *, + int (*nameLength)(const ENCODING *, const char *); + int (*getAtts)(const ENCODING *enc, const char *ptr, + int attsMax, ATTRIBUTE *atts); + int (*charRefNumber)(const ENCODING *enc, const char *ptr); + void (*updatePosition)(const ENCODING *, const char *ptr, const char *end, POSITION *); + int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, + const char **badPtr); + int (*isSystemId)(const ENCODING *enc, const char *ptr, const char *end, + const char **badPtr); + int (*encode)(const ENCODING *enc, + int charNum, + char *buf); + void (*convert[XML_N_INTERNAL_ENCODINGS])(const ENCODING *enc, + const char **fromP, + const char *fromLim, + char **toP, + const char *toLim); int minBytesPerChar; -} ENCODING; +}; /* Scan the string starting at ptr until the end of the next complete token, @@ -123,21 +164,64 @@ literals, comments and processing instructions. #define XmlContentTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) +/* This is used for performing a 2nd-level tokenization on +the content of a literal that has already been returned by XmlTok. */ + +#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ + (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) + +#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) + +#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) + #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) -#define XmlNameMatchesAscii(enc, ptr1, ptr2) (((enc)->nameMatchesAscii)(enc, ptr1, ptr2)) + +#define XmlNameMatchesAscii(enc, ptr1, ptr2) \ + (((enc)->nameMatchesAscii)(enc, ptr1, ptr2)) + +#define XmlNameLength(enc, ptr) \ + (((enc)->nameLength)(enc, ptr)) #define XmlGetAttributes(enc, ptr, attsMax, atts) \ (((enc)->getAtts)(enc, ptr, attsMax, atts)) +#define XmlCharRefNumber(enc, ptr) \ + (((enc)->charRefNumber)(enc, ptr)) + #define XmlUpdatePosition(enc, ptr, end, pos) \ (((enc)->updatePosition)(enc, ptr, end, pos)) +#define XmlIsPublicId(enc, ptr, end, badPtr) \ + (((enc)->isPublicId)(enc, ptr, end, badPtr)) + +#define XmlIsSystemId(enc, ptr, end, badPtr) \ + (((enc)->isSystemId)(enc, ptr, end, badPtr)) + +#define XmlEncode(enc, ch, buf) \ + (((enc)->encode)(enc, ch, buf)) + +#define XmlConvert(enc, targetEnc, fromP, fromLim, toP, toLim) \ + (((enc)->convert[targetEnc])(enc, fromP, fromLim, toP, toLim)) + typedef struct { ENCODING initEnc; const ENCODING **encPtr; } INIT_ENCODING; +int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, + int *standalonePtr); + void XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **); +const ENCODING XMLTOKAPI *XmlGetInternalEncoding(int); #ifdef __cplusplus } diff --git a/expat/xmltok/xmltok_impl.c b/expat/xmltok/xmltok_impl.c index 11b274bd..618aa67a 100755 --- a/expat/xmltok/xmltok_impl.c +++ b/expat/xmltok/xmltok_impl.c @@ -7,9 +7,7 @@ #define MULTIBYTE_CASES(ptr, end, ret) \ DO_LEAD_CASE(2, ptr, end, ret) \ DO_LEAD_CASE(3, ptr, end, ret) \ - DO_LEAD_CASE(4, ptr, end, ret) \ - DO_LEAD_CASE(5, ptr, end, ret) \ - DO_LEAD_CASE(6, ptr, end, ret) + DO_LEAD_CASE(4, ptr, end, ret) #define INVALID_CASES(ptr, nextTokPtr) \ @@ -45,9 +43,7 @@ break; \ CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(5, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(6, enc, ptr, end, nextTokPtr) + CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ case BT_LEAD ## n: \ @@ -72,9 +68,7 @@ break; \ CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(5, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(6, enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) #ifndef PREFIX #define PREFIX(ident) ident @@ -127,7 +121,6 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, { if (ptr == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '-')) switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr); @@ -613,6 +606,17 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanLt)(enc, ptr + MINBPC, end, nextTokPtr); case BT_AMP: return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr); + case BT_CR: + ptr += MINBPC; + if (ptr == end) + return XML_TOK_TRAILING_CR; + if (BYTE_TYPE(enc, ptr) == BT_LF) + ptr += MINBPC; + *nextTokPtr = ptr; + return XML_TOK_DATA_NEWLINE; + case BT_LF: + *nextTokPtr = ptr + MINBPC; + return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC; if (ptr == end) @@ -658,6 +662,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_NONXML: case BT_MALFORM: case BT_TRAIL: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: @@ -799,20 +805,29 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: - case BT_LEAD5: - return PREFIX(contentTok)(enc, ptr - MINBPC, end, nextTokPtr); + *nextTokPtr = ptr - MINBPC; + return XML_TOK_INSTANCE_START; } *nextTokPtr = ptr; return XML_TOK_INVALID; } - case BT_S: case BT_CR: case BT_LF: + case BT_CR: + if (ptr + MINBPC == end) + return XML_TOK_TRAILING_CR; + /* fall through */ + case BT_S: case BT_LF: for (;;) { ptr += MINBPC; if (ptr == end) break; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: + case BT_S: case BT_LF: break; + case BT_CR: + /* don't split CR/LF pair */ + if (ptr + MINBPC != end) + break; + /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_PROLOG_S; @@ -859,9 +874,14 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_PLUS: *nextTokPtr = ptr + MINBPC; return XML_TOK_CLOSE_PAREN_PLUS; + case BT_CR: case BT_LF: case BT_S: + case BT_GT: case BT_COMMA: case BT_VERBAR: + case BT_RPAR: + *nextTokPtr = ptr; + return XML_TOK_CLOSE_PAREN; } *nextTokPtr = ptr; - return XML_TOK_CLOSE_PAREN; + return XML_TOK_INVALID; case BT_VERBAR: *nextTokPtr = ptr + MINBPC; return XML_TOK_OR; @@ -886,7 +906,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, } \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) LEAD_CASE(5) LEAD_CASE(6) + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NMSTRT: case BT_HEX: @@ -952,12 +972,204 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } +static +int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) +{ + const char *start; + if (ptr == end) + return XML_TOK_NONE; + start = ptr; + while (ptr != end) { + switch (BYTE_TYPE(enc, ptr)) { +#define LEAD_CASE(n) \ + case BT_LEAD ## n: ptr += n; break; + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) +#undef LEAD_CASE + case BT_AMP: + if (ptr == start) + return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr); + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; + case BT_LT: + /* this is for inside entity references */ + *nextTokPtr = ptr; + return XML_TOK_INVALID; + case BT_LF: + if (ptr == start) { + *nextTokPtr = ptr + MINBPC; + return XML_TOK_DATA_NEWLINE; + } + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; + case BT_CR: + if (ptr == start) { + ptr += MINBPC; + if (ptr == end) + return XML_TOK_TRAILING_CR; + if (BYTE_TYPE(enc, ptr) == BT_LF) + ptr += MINBPC; + *nextTokPtr = ptr; + return XML_TOK_DATA_NEWLINE; + } + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; + default: + ptr += MINBPC; + break; + } + } + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; +} + +static +int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) +{ + const char *start; + if (ptr == end) + return XML_TOK_NONE; + start = ptr; + while (ptr != end) { + switch (BYTE_TYPE(enc, ptr)) { +#define LEAD_CASE(n) \ + case BT_LEAD ## n: ptr += n; break; + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) +#undef LEAD_CASE + case BT_AMP: + if (ptr == start) + return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr); + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; + case BT_PERCNT: + if (ptr == start) + return PREFIX(scanPercent)(enc, ptr + MINBPC, end, nextTokPtr); + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; + case BT_LF: + if (ptr == start) { + *nextTokPtr = ptr + MINBPC; + return XML_TOK_DATA_NEWLINE; + } + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; + case BT_CR: + if (ptr == start) { + ptr += MINBPC; + if (ptr == end) + return XML_TOK_TRAILING_CR; + if (BYTE_TYPE(enc, ptr) == BT_LF) + ptr += MINBPC; + *nextTokPtr = ptr; + return XML_TOK_DATA_NEWLINE; + } + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; + default: + ptr += MINBPC; + break; + } + } + *nextTokPtr = ptr; + return XML_TOK_DATA_CHARS; +} + +static +int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, + const char **badPtr) +{ + ptr += MINBPC; + end -= MINBPC; + for (; ptr != end; ptr += MINBPC) { + switch (BYTE_TYPE(enc, ptr)) { + case BT_DIGIT: + case BT_HEX: + case BT_MINUS: + case BT_APOS: + case BT_LPAR: + case BT_RPAR: + case BT_PLUS: + case BT_COMMA: + case BT_SOL: + case BT_EQUALS: + case BT_QUEST: + case BT_CR: + case BT_LF: + break; + case BT_S: + if (CHAR_MATCHES(enc, ptr, '\t')) { + *badPtr = ptr; + return 0; + } + break; + case BT_NAME: + case BT_NMSTRT: + if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f) + && !CHAR_MATCHES(enc, ptr, '_')) + break; + default: + *badPtr = ptr; + return 0; + } + } + return 1; +} + +static +int PREFIX(isSystemId)(const ENCODING *enc, const char *ptr, const char *end, + const char **badPtr) +{ + ptr += MINBPC; + end -= MINBPC; + for (; ptr != end; ptr += MINBPC) { + switch (BYTE_TYPE(enc, ptr)) { + case BT_DIGIT: + case BT_HEX: + case BT_MINUS: + case BT_APOS: + case BT_LPAR: + case BT_RPAR: + case BT_PLUS: + case BT_COMMA: + case BT_SOL: + case BT_AMP: + case BT_SEMI: + case BT_EQUALS: + case BT_QUEST: + case BT_EXCL: + case BT_AST: + case BT_PERCNT: + break; + case BT_NAME: + case BT_NMSTRT: + if (BYTE_TO_ASCII(enc, ptr) & ~0x7f) { + *badPtr = ptr; + return 0; + } + break; + default: + switch (BYTE_TO_ASCII(enc, ptr)) { + case '@': + case '$': + break; + default: + *badPtr = ptr; + return 0; + } + break; + } + } + return 1; +} + /* This must only be called for a well-formed start-tag or empty element tag. -Returns the number of attributes. Pointers to the names of up to the first -attsMax attributes are stored in atts. */ +Returns the number of attributes. Pointers to the first attsMax attributes +are stored in atts. */ + static int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, const char **atts) + int attsMax, ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; @@ -967,14 +1179,15 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, switch (BYTE_TYPE(enc, ptr)) { #define START_NAME \ if (state == other) { \ - if (nAtts < attsMax) \ - atts[nAtts] = ptr; \ - ++nAtts; \ + if (nAtts < attsMax) { \ + atts[nAtts].name = ptr; \ + atts[nAtts].containsRef = 0; \ + } \ state = inName; \ } #define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC); break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) LEAD_CASE(5) LEAD_CASE(6) + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: @@ -983,20 +1196,29 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, break; #undef START_NAME case BT_QUOT: - if (state == other) { + if (state != inValue) { + atts[nAtts].valuePtr = ptr + MINBPC; state = inValue; open = BT_QUOT; } - else if (open == BT_QUOT) + else if (open == BT_QUOT) { state = other; + atts[nAtts++].valueEnd = ptr; + } break; case BT_APOS: - if (state == other) { + if (state != inValue) { + atts[nAtts].valuePtr = ptr; state = inValue; open = BT_APOS; } - else if (open == BT_APOS) + else if (open == BT_APOS) { state = other; + atts[nAtts++].valueEnd = ptr; + } + break; + case BT_AMP: + atts[nAtts].containsRef = 1; break; case BT_S: case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted @@ -1016,6 +1238,51 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, /* not reached */ } +static +int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) +{ + int result = 0; + /* skip &# */ + ptr += 2*MINBPC; + if (CHAR_MATCHES(enc, ptr, 'x')) { + for (ptr += MINBPC; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) { + int c = BYTE_TO_ASCII(enc, ptr); + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + result <<= 4; + result |= (c - '0'); + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + result <<= 4; + result += 10 + (c - 'A'); + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + result <<= 4; + result += 10 + (c - 'a'); + break; + } + if (result >= 0x110000) + return -1; + } + } + else { + for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) { + int c = BYTE_TO_ASCII(enc, ptr); + result *= 10; + result += (c - '0'); + if (result >= 0x110000) + return -1; + } + } + /* FIXME maybe exclude surrogates as well */ + if ((result < 0x80 && latin1tab[result] == BT_NONXML) + || result == 0xFFFE + || result == 0xFFFF) + return -1; + return result; +} + static int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { @@ -1025,7 +1292,7 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) case BT_LEAD ## n: \ if (*ptr1++ != *ptr2++) \ return 0; - LEAD_CASE(6) LEAD_CASE(5) LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) + LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) #undef LEAD_CASE /* fall through */ if (*ptr1++ != *ptr2++) @@ -1037,20 +1304,30 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) case BT_DIGIT: case BT_NAME: case BT_MINUS: - if (*ptr2 != *ptr1) + if (*ptr2++ != *ptr1++) return 0; - ptr1 += MINBPC; - ptr2 += MINBPC; +#if MINBPC > 1 + if (*ptr2++ != *ptr1++) + return 0; +#if MINBPC > 2 + if (*ptr2++ != *ptr1++) + return 0; +#if MINBPC > 3 + if (*ptr2++ != *ptr1++) + return 0; +#endif +#endif +#endif break; default: +#if MINBPC == 1 if (*ptr1 == *ptr2) return 1; +#endif switch (BYTE_TYPE(enc, ptr2)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: - case BT_LEAD5: - case BT_LEAD6: case BT_NONASCII: case BT_NMSTRT: case BT_HEX: @@ -1077,8 +1354,6 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char * case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: - case BT_LEAD5: - case BT_LEAD6: case BT_NONASCII: case BT_NMSTRT: case BT_HEX: @@ -1091,19 +1366,36 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char * } } +static +int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) +{ + const char *start = ptr; + for (;;) { + switch (BYTE_TYPE(enc, ptr)) { +#define LEAD_CASE(n) \ + case BT_LEAD ## n: ptr += n; break; + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) +#undef LEAD_CASE + case BT_NONASCII: + case BT_NMSTRT: + case BT_HEX: + case BT_DIGIT: + case BT_NAME: + case BT_MINUS: + ptr += MINBPC; + break; + default: + return ptr - start; + } + } +} + static void PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos) { - if (pos->ignoreInitialLF) { - if (ptr == end) - return; - if (CHAR_MATCHES(enc, ptr, '\n')) - ptr += MINBPC; - pos->ignoreInitialLF = 0; - } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { MULTIBYTE_CASES(ptr, end, ;/* hack! */) @@ -1115,14 +1407,9 @@ void PREFIX(updatePosition)(const ENCODING *enc, case BT_CR: pos->lineNumber++; ptr += MINBPC; - if (ptr == end) { - pos->ignoreInitialLF = 1; - pos->columnNumber = 0; - return; - } - pos->columnNumber = (unsigned)-1; - if (CHAR_MATCHES(enc, ptr, '\n')) + if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC; + pos->columnNumber = (unsigned)-1; break; default: ptr += MINBPC; diff --git a/expat/xmltok/xmltok_impl.h b/expat/xmltok/xmltok_impl.h index e3d6dcfd..f68d4aa7 100755 --- a/expat/xmltok/xmltok_impl.h +++ b/expat/xmltok/xmltok_impl.h @@ -3,12 +3,13 @@ enum { BT_MALFORM, BT_LT, BT_AMP, + BT_RSQB, BT_LEAD2, BT_LEAD3, BT_LEAD4, - BT_LEAD5, - BT_LEAD6, BT_TRAIL, + BT_CR, + BT_LF, BT_GT, BT_QUOT, BT_APOS, @@ -19,10 +20,7 @@ enum { BT_SEMI, BT_NUM, BT_LSQB, - BT_RSQB, BT_S, - BT_CR, - BT_LF, BT_NMSTRT, BT_HEX, BT_DIGIT, diff --git a/expat/xmlwf/readfilemap.c b/expat/xmlwf/readfilemap.c index 9a32378e..63cd62f8 100755 --- a/expat/xmlwf/readfilemap.c +++ b/expat/xmlwf/readfilemap.c @@ -51,7 +51,7 @@ int filemap(const char *name, fprintf(stderr, "%s: out of memory\n", name); return 0; } - n = _read(fd, p, nbytes); + n = read(fd, p, nbytes); if (n < 0) { perror(name); close(fd); diff --git a/expat/xmlwf/wfcheck.c b/expat/xmlwf/wfcheck.c index cd36e2b5..c80dd010 100755 --- a/expat/xmlwf/wfcheck.c +++ b/expat/xmlwf/wfcheck.c @@ -1,26 +1,26 @@ #include #include -#include "wfcheck.h" -#ifdef _MSC_VER -#define XMLTOKAPI __declspec(dllimport) -#endif +#include "wfcheck.h" +#include "hashtable.h" #include "xmltok.h" #include "xmlrole.h" typedef struct { const char *name; -} NAMED; + const char *textPtr; + size_t textLen; + const char *systemId; + const char *publicId; + const char *notation; + char open; + char wfInContent; + char wfInAttribute; + char magic; +} ENTITY; -typedef struct { - NAMED **v; - size_t size; - size_t used; - size_t usedLim; -} HASH_TABLE; - -#define BLOCK_SIZE 1024 +#define INIT_BLOCK_SIZE 1024 typedef struct block { struct block *next; @@ -30,122 +30,266 @@ typedef struct block { typedef struct { BLOCK *blocks; const char *end; - const char *ptr; - const char *start; + char *ptr; + char *start; } STRING_POOL; typedef struct { - STRING_POOL pool; - HASH_TABLE paramEntities; HASH_TABLE generalEntities; + STRING_POOL pool; + int containsRef; + int standalone; + char *groupConnector; + size_t groupSize; } DTD; +typedef struct { + DTD dtd; + size_t stackSize; + const char **startName; + int attsSize; + ATTRIBUTE *atts; +} CONTEXT; + +static void poolInit(STRING_POOL *); +static void poolDestroy(STRING_POOL *); +static const char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static int poolGrow(STRING_POOL *); +static int dtdInit(DTD *); +static void dtdDestroy(DTD *); +static int contextInit(CONTEXT *); +static void contextDestroy(CONTEXT *); + +#define poolStart(pool) ((pool)->start) +#define poolDiscard(pool) ((pool)->ptr = (pool)->start) +#define poolFinish(pool) ((pool)->start = (pool)->ptr) + static enum WfCheckResult -checkProlog(int *tok, const char **s, const char *end, const char **nextTokP, - const ENCODING **enc); +checkProlog(DTD *, const char *s, const char *end, const char **, const ENCODING **enc); +static enum WfCheckResult +checkContent(size_t level, CONTEXT *context, const ENCODING *enc, + const char *s, const char *end, const char **badPtr); +static enum WfCheckResult +checkGeneralTextEntity(CONTEXT *context, + const char *s, const char *end, + const char **nextPtr, + const ENCODING **enc); +static enum WfCheckResult +checkAttributeValue(DTD *, const ENCODING *, const char *, const char *, const char **); +static enum WfCheckResult +checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts, + const char **badPtr); static -void setPosition(const ENCODING *enc, - const char *start, - const char *end, - const char **badPtr, - unsigned long *badLine, - unsigned long *badCol); +enum WfCheckResult storeEntity(DTD *dtd, + const ENCODING *enc, + const char *entityNamePtr, + const char *entityNameEnd, + const char *entityTextPtr, + const char *entityTextEnd, + const char **badPtr); + enum WfCheckResult -wfCheck(const char *s, size_t n, +wfCheck(enum EntityType entityType, const char *s, size_t n, const char **badPtr, unsigned long *badLine, unsigned long *badCol) { - enum WfCheckResult result; - unsigned nElements = 0; - unsigned nAtts = 0; + CONTEXT context; + const ENCODING *enc; const char *start = s; const char *end = s + n; - const char *next; - const ENCODING *enc; - size_t stackSize = 1024; - size_t level = 0; - int tok; - const char **startName = malloc(stackSize * sizeof(char *)); - int attsSize = 1024; - const char **atts = malloc(attsSize * sizeof(char *)); -#define RETURN_CLEANUP(n) return (free((void *)startName), free((void *)atts), (n)) - if (!startName) + const char *next = 0; + enum WfCheckResult result; + + if (!contextInit(&context)) { + contextDestroy(&context); return noMemory; - result = checkProlog(&tok, &s, end, &next, &enc); - if (result) { - setPosition(enc, start, s, badPtr, badLine, badCol); - RETURN_CLEANUP(result); } + if (entityType == documentEntity) { + result = checkProlog(&context.dtd, s, end, &next, &enc); + s = next; + if (!result) { + result = checkContent(0, &context, enc, s, end, &next); + s = next; + } + } + else { + result = checkGeneralTextEntity(&context, s, end, &next, &enc); + s = next; + } + if (result && s) { + POSITION pos; + memset(&pos, 0, sizeof(POSITION)); + XmlUpdatePosition(enc, start, s, &pos); + *badPtr = s; + *badLine = pos.lineNumber; + *badCol = pos.columnNumber; + } + contextDestroy(&context); + return result; +} + +static +int contextInit(CONTEXT *p) +{ + p->stackSize = 1024; + p->startName = malloc(p->stackSize * sizeof(char *)); + p->attsSize = 1024; + p->atts = malloc(p->attsSize * sizeof(ATTRIBUTE)); + return dtdInit(&(p->dtd)) && p->atts && p->startName; +} + +static +void contextDestroy(CONTEXT *p) +{ + dtdDestroy(&(p->dtd)); + free((void *)p->startName); + free((void *)p->atts); +} + +static enum WfCheckResult +checkContent(size_t level, CONTEXT *context, const ENCODING *enc, + const char *s, const char *end, const char **badPtr) +{ + size_t startLevel = level; + const char *next; + int tok = XmlContentTok(enc, s, end, &next); for (;;) { switch (tok) { + case XML_TOK_TRAILING_CR: case XML_TOK_NONE: - setPosition(enc, start, s, badPtr, badLine, badCol); - RETURN_CLEANUP(noElements); - case XML_TOK_INVALID: - setPosition(enc, start, next, badPtr, badLine, badCol); - RETURN_CLEANUP(invalidToken); - case XML_TOK_PARTIAL: - setPosition(enc, start, s, badPtr, badLine, badCol); - RETURN_CLEANUP(unclosedToken); - case XML_TOK_PARTIAL_CHAR: - setPosition(enc, start, s, badPtr, badLine, badCol); - RETURN_CLEANUP(partialChar); - case XML_TOK_EMPTY_ELEMENT_NO_ATTS: - nElements++; - break; - case XML_TOK_START_TAG_NO_ATTS: - nElements++; - if (level == stackSize) { - startName = realloc((void *)startName, (stackSize *= 2) * sizeof(char *)); - if (!startName) { - free((void *)atts); - return noMemory; - } + if (startLevel > 0) { + if (level != startLevel) { + *badPtr = s; + return asyncEntity; + } + return wellFormed; } - startName[level++] = s + enc->minBytesPerChar; + *badPtr = s; + return noElements; + case XML_TOK_INVALID: + *badPtr = next; + return invalidToken; + case XML_TOK_PARTIAL: + *badPtr = s; + return unclosedToken; + case XML_TOK_PARTIAL_CHAR: + *badPtr = s; + return partialChar; + case XML_TOK_EMPTY_ELEMENT_NO_ATTS: + break; + case XML_TOK_ENTITY_REF: + { + const char *name = poolStoreString(&context->dtd.pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + ENTITY *entity = (ENTITY *)lookup(&context->dtd.generalEntities, name, 0); + poolDiscard(&context->dtd.pool); + if (!entity) { + if (!context->dtd.containsRef || context->dtd.standalone) { + *badPtr = s; + return undefinedEntity; + } + break; + } + if (entity->wfInContent) + break; + if (entity->open) { + *badPtr = s; + return recursiveEntityRef; + } + if (entity->notation) { + *badPtr = s; + return binaryEntityRef; + } + if (entity) { + if (entity->textPtr) { + enum WfCheckResult result; + const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING); + entity->open = 1; + result = checkContent(level, context, internalEnc, + entity->textPtr, entity->textPtr + entity->textLen, + badPtr); + entity->open = 0; + if (result && *badPtr) { + *badPtr = s; + return result; + } + entity->wfInContent = 1; + } + } + break; + } + case XML_TOK_START_TAG_NO_ATTS: + if (level == context->stackSize) { + context->startName + = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *)); + if (!context->startName) + return noMemory; + } + context->startName[level++] = s + enc->minBytesPerChar; break; case XML_TOK_START_TAG_WITH_ATTS: - if (level == stackSize) { - startName = realloc((void *)startName, (stackSize *= 2) * sizeof(char *)); - if (!startName) { - free((void *)atts); + if (level == context->stackSize) { + context->startName = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *)); + if (!context->startName) return noMemory; - } } - startName[level++] = s + enc->minBytesPerChar; + context->startName[level++] = s + enc->minBytesPerChar; /* fall through */ case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - nElements++; { int i; - int n = XmlGetAttributes(enc, s, attsSize, atts); - nAtts += n; - if (n > attsSize) { - attsSize = 2*n; - atts = realloc((void *)atts, attsSize * sizeof(char *)); - if (!atts) { - free((void *)startName); + int n = XmlGetAttributes(enc, s, context->attsSize, context->atts); + if (n > context->attsSize) { + context->attsSize = 2*n; + context->atts = realloc((void *)context->atts, context->attsSize * sizeof(ATTRIBUTE)); + if (!context->atts) return noMemory; - } - XmlGetAttributes(enc, s, n, atts); + XmlGetAttributes(enc, s, n, context->atts); } - for (i = 1; i < n; i++) { - int j; - for (j = 0; j < i; j++) { - if (XmlSameName(enc, atts[i], atts[j])) { - setPosition(enc, start, atts[i], badPtr, badLine, badCol); - RETURN_CLEANUP(duplicateAttribute); - } + for (i = 0; i < n; i++) { + if (context->atts[i].containsRef) { + enum WfCheckResult result + = checkAttributeValue(&context->dtd, enc, + context->atts[i].valuePtr, + context->atts[i].valueEnd, + badPtr); + if (result) + return result; } } + if (i > 1) { + enum WfCheckResult result = checkAttributeUniqueness(context, enc, n, badPtr); + if (result) + return result; + } } break; case XML_TOK_END_TAG: + if (level == startLevel) { + *badPtr = s; + return asyncEntity; + } --level; - if (!XmlSameName(enc, startName[level], s + enc->minBytesPerChar * 2)) { - setPosition(enc, start, s, badPtr, badLine, badCol); - RETURN_CLEANUP(tagMismatch); + if (!XmlSameName(enc, context->startName[level], s + enc->minBytesPerChar * 2)) { + *badPtr = s; + return tagMismatch; + } + break; + case XML_TOK_CHAR_REF: + if (XmlCharRefNumber(enc, s) < 0) { + *badPtr = s; + return badCharRef; + } + break; + case XML_TOK_PI: + if (XmlNameMatchesAscii(enc, s + 2 * enc->minBytesPerChar, "xml")) { + *badPtr = s; + return misplacedXmlPi; } break; } @@ -154,8 +298,9 @@ wfCheck(const char *s, size_t n, do { tok = XmlPrologTok(enc, s, end, &next); switch (tok) { + case XML_TOK_TRAILING_CR: case XML_TOK_NONE: - RETURN_CLEANUP(wellFormed); + return wellFormed; case XML_TOK_PROLOG_S: case XML_TOK_COMMENT: case XML_TOK_PI: @@ -163,8 +308,8 @@ wfCheck(const char *s, size_t n, break; default: if (tok > 0) { - setPosition(enc, start, s, badPtr, badLine, badCol); - RETURN_CLEANUP(junkAfterDocElement); + *badPtr = s; + return junkAfterDocElement; } break; } @@ -174,57 +319,592 @@ wfCheck(const char *s, size_t n, tok = XmlContentTok(enc, s, end, &next); } /* not reached */ - return 0; } static -int checkProlog(int *tokp, - const char **startp, const char *end, - const char **nextTokP, const ENCODING **enc) +int attcmp(const void *p1, const void *p2) { + const ATTRIBUTE *a1 = p1; + const ATTRIBUTE *a2 = p2; + size_t n1 = a1->valuePtr - a1->name; + size_t n2 = a2->valuePtr - a2->name; + + if (n1 == n2) { + int n = memcmp(a1->name, a2->name, n1); + if (n) + return n; + /* Sort identical attribute names by position, so that we always + report the first duplicate attribute. */ + if (a1->name < a2->name) + return -1; + else if (a1->name > a2->name) + return 1; + else + return 0; + } + else if (n1 < n2) + return -1; + else + return 1; +} + +/* Note that this trashes the attribute values. */ + +static enum WfCheckResult +checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts, + const char **badPtr) +{ +#define QSORT_MIN_ATTS 10 + if (nAtts < QSORT_MIN_ATTS) { + int i; + for (i = 1; i < nAtts; i++) { + int j; + for (j = 0; j < i; j++) { + if (XmlSameName(enc, context->atts[i].name, context->atts[j].name)) { + *badPtr = context->atts[i].name; + return duplicateAttribute; + } + } + } + } + else { + int i; + const char *dup = 0; + /* Store the end of the name in valuePtr */ + for (i = 0; i < nAtts; i++) { + ATTRIBUTE *a = context->atts + i; + a->valuePtr = a->name + XmlNameLength(enc, a->name); + } + qsort(context->atts, nAtts, sizeof(ATTRIBUTE), attcmp); + for (i = 1; i < nAtts; i++) { + ATTRIBUTE *a = context->atts + i; + if (XmlSameName(enc, a->name, a[-1].name)) { + if (!dup || a->name < dup) + dup = a->name; + } + } + if (dup) { + *badPtr = dup; + return duplicateAttribute; + } + } + return wellFormed; +} + +static enum WfCheckResult +checkProlog(DTD *dtd, const char *s, const char *end, + const char **nextPtr, const ENCODING **enc) +{ + const char *entityNamePtr, *entityNameEnd; PROLOG_STATE state; - const char *s = *startp; + ENTITY *entity; INIT_ENCODING initEnc; XmlInitEncoding(&initEnc, enc); XmlPrologStateInit(&state); for (;;) { - int tok = XmlPrologTok(*enc, s, end, nextTokP); - switch (tok) { - case XML_TOK_START_TAG_WITH_ATTS: - case XML_TOK_START_TAG_NO_ATTS: - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - case XML_TOK_EMPTY_ELEMENT_NO_ATTS: - case XML_TOK_INVALID: - case XML_TOK_NONE: - case XML_TOK_PARTIAL: - *tokp = tok; - *startp = s; - return wellFormed; - case XML_TOK_BOM: - case XML_TOK_PROLOG_S: - break; - default: - switch (XmlTokenRole(&state, tok, s, *nextTokP, *enc)) { + const char *next; + int tok = XmlPrologTok(*enc, s, end, &next); + if (tok != XML_TOK_PROLOG_S) { + switch (XmlTokenRole(&state, tok, s, next, *enc)) { + case XML_ROLE_XML_DECL: + { + const char *encodingName = 0; + const ENCODING *encoding = 0; + const char *version; + int standalone = -1; + if (!XmlParseXmlDecl(0, + *enc, + s, + next, + nextPtr, + &version, + &encodingName, + &encoding, + &standalone)) + return syntaxError; + if (encoding) { + if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) { + *nextPtr = encodingName; + return incorrectEncoding; + } + *enc = encoding; + } + else if (encodingName) { + *nextPtr = encodingName; + return unknownEncoding; + } + if (standalone == 1) + dtd->standalone = 1; + break; + } + case XML_ROLE_DOCTYPE_SYSTEM_ID: + if (!XmlIsSystemId(*enc, s, next, nextPtr)) + return syntaxError; + dtd->containsRef = 1; + break; + case XML_ROLE_NOTATION_SYSTEM_ID: + if (!XmlIsSystemId(*enc, s, next, nextPtr)) + return syntaxError; + break; + case XML_ROLE_DOCTYPE_PUBLIC_ID: + case XML_ROLE_ENTITY_PUBLIC_ID: + case XML_ROLE_NOTATION_PUBLIC_ID: + if (!XmlIsPublicId(*enc, s, next, nextPtr)) + return syntaxError; + break; + case XML_ROLE_INSTANCE_START: + *nextPtr = s; + return wellFormed; + case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: + case XML_ROLE_FIXED_ATTRIBUTE_VALUE: + { + const char *tem = 0; + enum WfCheckResult result + = checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar, + next - (*enc)->minBytesPerChar, + &tem); + if (result) { + if (tem) + *nextPtr = tem; + return result; + } + break; + } + case XML_ROLE_ENTITY_VALUE: + { + enum WfCheckResult result + = storeEntity(dtd, + *enc, + entityNamePtr, + entityNameEnd, + s, + next, + nextPtr); + if (result != wellFormed) + return result; + } + break; + case XML_ROLE_ENTITY_SYSTEM_ID: + if (!XmlIsSystemId(*enc, s, next, nextPtr)) + return syntaxError; + if (entityNamePtr) { + const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd); + entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY)); + if (entity->name != name) { + poolDiscard(&dtd->pool); + entity = 0; + } + else { + poolFinish(&dtd->pool); + entity->systemId = poolStoreString(&dtd->pool, *enc, + s + (*enc)->minBytesPerChar, + next - (*enc)->minBytesPerChar); + poolFinish(&dtd->pool); + } + } + break; + case XML_ROLE_ENTITY_NOTATION_NAME: + if (entity) { + entity->notation = poolStoreString(&dtd->pool, *enc, s, next); + poolFinish(&dtd->pool); + } + break; + case XML_ROLE_GENERAL_ENTITY_NAME: + entityNamePtr = s; + entityNameEnd = next; + break; + case XML_ROLE_PARAM_ENTITY_NAME: + entityNamePtr = 0; + entityNameEnd = 0; + break; case XML_ROLE_ERROR: - *startp = s; - return syntaxError; + *nextPtr = s; + switch (tok) { + case XML_TOK_COND_SECT_OPEN: + return condSect; + case XML_TOK_PARAM_ENTITY_REF: + return paramEntityRef; + case XML_TOK_INVALID: + *nextPtr = next; + return invalidToken; + case XML_TOK_NONE: + return noElements; + case XML_TOK_PARTIAL: + return unclosedToken; + case XML_TOK_PARTIAL_CHAR: + return partialChar; + case XML_TOK_TRAILING_CR: + *nextPtr = s + (*enc)->minBytesPerChar; + return noElements; + case XML_TOK_PI: + if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) + return misplacedXmlPi; + default: + return syntaxError; + } + case XML_ROLE_GROUP_OPEN: + if (state.level >= dtd->groupSize) { + if (dtd->groupSize) + dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2); + else + dtd->groupConnector = malloc(dtd->groupSize = 32); + if (!dtd->groupConnector) + return noMemory; + } + dtd->groupConnector[state.level] = 0; + break; + case XML_ROLE_GROUP_SEQUENCE: + if (dtd->groupConnector[state.level] == '|') { + *nextPtr = s; + return syntaxError; + } + dtd->groupConnector[state.level] = ','; + break; + case XML_ROLE_GROUP_CHOICE: + if (dtd->groupConnector[state.level] == ',') { + *nextPtr = s; + return syntaxError; + } + dtd->groupConnector[state.level] = '|'; + break; + case XML_ROLE_NONE: + if (tok == XML_TOK_PARAM_ENTITY_REF) + dtd->containsRef = 1; + break; + } + } + s = next; + } + /* not reached */ +} + +static enum WfCheckResult +checkGeneralTextEntity(CONTEXT *context, + const char *s, const char *end, + const char **nextPtr, + const ENCODING **enc) +{ + INIT_ENCODING initEnc; + const char *next; + int tok; + + XmlInitEncoding(&initEnc, enc); + tok = XmlContentTok(*enc, s, end, &next); + + if (tok == XML_TOK_BOM) { + s = next; + tok = XmlContentTok(*enc, s, end, &next); + } + if (tok == XML_TOK_PI + && XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) { + const char *encodingName = 0; + const ENCODING *encoding = 0; + const char *version; + if (!XmlParseXmlDecl(1, + *enc, + s, + next, + nextPtr, + &version, + &encodingName, + &encoding, + 0)) + return syntaxError; + if (encoding) { + if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) { + *nextPtr = encodingName; + return incorrectEncoding; + } + *enc = encoding; + } + else if (encodingName) { + *nextPtr = encodingName; + return unknownEncoding; + } + s = next; + } + context->dtd.containsRef = 1; + return checkContent(1, context, *enc, s, end, nextPtr); +} + +static enum WfCheckResult +checkAttributeValue(DTD *dtd, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr) +{ + for (;;) { + const char *next; + int tok = XmlAttributeValueTok(enc, ptr, end, &next); + switch (tok) { + case XML_TOK_TRAILING_CR: + case XML_TOK_NONE: + return wellFormed; + case XML_TOK_INVALID: + *badPtr = next; + return invalidToken; + case XML_TOK_PARTIAL: + *badPtr = ptr; + return invalidToken; + case XML_TOK_CHAR_REF: + if (XmlCharRefNumber(enc, ptr) < 0) { + *badPtr = ptr; + return badCharRef; } break; + case XML_TOK_DATA_CHARS: + case XML_TOK_DATA_NEWLINE: + break; + case XML_TOK_ENTITY_REF: + { + const char *name = poolStoreString(&dtd->pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + ENTITY *entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + poolDiscard(&dtd->pool); + if (!entity) { + if (!dtd->containsRef) { + *badPtr = ptr; + return undefinedEntity; + } + break; + } + if (entity->wfInAttribute) + break; + if (entity->open) { + *badPtr = ptr; + return recursiveEntityRef; + } + if (entity->notation) { + *badPtr = ptr; + return binaryEntityRef; + } + if (entity) { + if (entity->textPtr) { + enum WfCheckResult result; + const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING); + const char *textEnd = entity->textPtr + entity->textLen; + entity->open = 1; + result = checkAttributeValue(dtd, internalEnc, entity->textPtr, textEnd, badPtr); + entity->open = 0; + if (result && *badPtr) { + *badPtr = ptr; + return result; + } + entity->wfInAttribute = 1; + } + else { + *badPtr = ptr; + return attributeExternalEntityRef; + } + } + break; + } + break; + default: + abort(); } - s = *nextTokP; + ptr = next; } /* not reached */ } static -void setPosition(const ENCODING *enc, - const char *start, const char *end, - const char **badPtr, unsigned long *badLine, unsigned long *badCol) +void poolInit(STRING_POOL *pool) { - POSITION pos; - memset(&pos, 0, sizeof(POSITION)); - XmlUpdatePosition(enc, start, end, &pos); - *badPtr = end; - *badLine = pos.lineNumber; - *badCol = pos.columnNumber; + pool->blocks = 0; + pool->start = 0; + pool->ptr = 0; + pool->end = 0; +} + +static +void poolDestroy(STRING_POOL *pool) +{ + BLOCK *p = pool->blocks; + while (p) { + BLOCK *tem = p->next; + free(p); + p = tem; + } + pool->blocks = 0; + pool->ptr = 0; + pool->start = 0; + pool->end = 0; +} + +static +const char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) +{ + for (;;) { + XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end); + if (ptr == end) + break; + if (!poolGrow(pool)) + return 0; + } + return pool->start; +} + +static +const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) +{ + if (!poolAppend(pool, enc, ptr, end)) + return 0; + if (pool->ptr == pool->end && !poolGrow(pool)) + return 0; + *(pool->ptr)++ = 0; + return pool->start; +} + +static +int poolGrow(STRING_POOL *pool) +{ + if (pool->blocks && pool->start == pool->blocks->s) { + size_t blockSize = (pool->end - pool->start)*2; + pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize); + if (!pool->blocks) + return 0; + pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->start = pool->blocks->s; + pool->end = pool->start + blockSize; + } + else { + BLOCK *tem; + size_t blockSize = pool->end - pool->start; + if (blockSize < INIT_BLOCK_SIZE) + blockSize = INIT_BLOCK_SIZE; + else + blockSize *= 2; + tem = malloc(offsetof(BLOCK, s) + blockSize); + if (!tem) + return 0; + tem->next = pool->blocks; + pool->blocks = tem; + memcpy(tem->s, pool->start, pool->ptr - pool->start); + pool->ptr = tem->s + (pool->ptr - pool->start); + pool->start = tem->s; + pool->end = tem->s + blockSize; + } + return 1; +} + +static int dtdInit(DTD *dtd) +{ + static const char *names[] = { "lt", "amp", "gt", "quot", "apos" }; + static const char chars[] = { '<', '&', '>', '"', '\'' }; + int i; + + poolInit(&(dtd->pool)); + hashTableInit(&(dtd->generalEntities)); + for (i = 0; i < 5; i++) { + ENTITY *entity = (ENTITY *)lookup(&(dtd->generalEntities), names[i], sizeof(ENTITY)); + if (!entity) + return 0; + entity->textPtr = chars + i; + entity->textLen = 1; + entity->magic = 1; + entity->wfInContent = 1; + entity->wfInAttribute = 1; + } + dtd->containsRef = 0; + dtd->groupSize = 0; + dtd->groupConnector = 0; + return 1; +} + +static void dtdDestroy(DTD *dtd) +{ + poolDestroy(&(dtd->pool)); + hashTableDestroy(&(dtd->generalEntities)); + free(dtd->groupConnector); +} + +static +enum WfCheckResult storeEntity(DTD *dtd, + const ENCODING *enc, + const char *entityNamePtr, + const char *entityNameEnd, + const char *entityTextPtr, + const char *entityTextEnd, + const char **badPtr) +{ + ENTITY *entity; + const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING); + STRING_POOL *pool = &(dtd->pool); + if (entityNamePtr) { + if (!poolStoreString(pool, enc, entityNamePtr, entityNameEnd)) + return noMemory; + entity = (ENTITY *)lookup(&(dtd->generalEntities), pool->start, sizeof(ENTITY)); + if (entity->name != pool->start) { + poolDiscard(pool); + entityNamePtr = 0; + } + else + poolFinish(pool); + } + entityTextPtr += enc->minBytesPerChar; + entityTextEnd -= enc->minBytesPerChar; + for (;;) { + const char *next; + int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + switch (tok) { + case XML_TOK_PARAM_ENTITY_REF: + *badPtr = entityTextPtr; + return syntaxError; + case XML_TOK_NONE: + if (entityNamePtr) { + entity->textPtr = pool->start; + entity->textLen = pool->ptr - pool->start; + poolFinish(pool); + } + else + poolDiscard(pool); + return wellFormed; + case XML_TOK_ENTITY_REF: + case XML_TOK_DATA_CHARS: + if (!poolAppend(pool, enc, entityTextPtr, next)) + return noMemory; + break; + case XML_TOK_TRAILING_CR: + next = entityTextPtr + enc->minBytesPerChar; + /* fall through */ + case XML_TOK_DATA_NEWLINE: + if (pool->end == pool->ptr && !poolGrow(pool)) + return noMemory; + *(pool->ptr)++ = '\n'; + break; + case XML_TOK_CHAR_REF: + { + char buf[XML_MAX_BYTES_PER_CHAR]; + int i; + int n = XmlCharRefNumber(enc, entityTextPtr); + if (n < 0) { + *badPtr = entityTextPtr; + return badCharRef; + } + n = XmlEncode(utf8, n, buf); + if (!n) { + *badPtr = entityTextPtr; + return badCharRef; + } + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && !poolGrow(pool)) + return noMemory; + *(pool->ptr)++ = buf[i]; + } + } + break; + case XML_TOK_PARTIAL: + *badPtr = entityTextPtr; + return invalidToken; + case XML_TOK_INVALID: + *badPtr = next; + return invalidToken; + default: + abort(); + } + entityTextPtr = next; + } + /* not reached */ } diff --git a/expat/xmlwf/wfcheck.h b/expat/xmlwf/wfcheck.h index 7c253222..f97bdbbd 100755 --- a/expat/xmlwf/wfcheck.h +++ b/expat/xmlwf/wfcheck.h @@ -11,11 +11,29 @@ enum WfCheckResult { partialChar, tagMismatch, duplicateAttribute, - junkAfterDocElement + junkAfterDocElement, + paramEntityRef, + condSect, + undefinedEntity, + recursiveEntityRef, + asyncEntity, + badCharRef, + binaryEntityRef, + attributeExternalEntityRef, + misplacedXmlPi, + unknownEncoding, + incorrectEncoding }; -enum WfCheckResult wfCheck(const char *s, size_t n, +enum EntityType { + documentEntity, + generalTextEntity +}; + +enum WfCheckResult wfCheck(enum EntityType entityType, + const char *s, size_t n, const char **errorPtr, unsigned long *errorLineNumber, unsigned long *errorColNumber); +const char *wfCheckMessage(enum WfCheckResult); diff --git a/expat/xmlwf/win32filemap.c b/expat/xmlwf/win32filemap.c index 62b210cb..ea57c6ab 100755 --- a/expat/xmlwf/win32filemap.c +++ b/expat/xmlwf/win32filemap.c @@ -3,6 +3,8 @@ #include #include "filemap.h" +static void win32perror(const char *); + int filemap(const char *name, void (*processor)(const void *, size_t, const char *, void *arg), void *arg) @@ -16,30 +18,36 @@ int filemap(const char *name, f = CreateFile(name, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (f == INVALID_HANDLE_VALUE) { - fprintf(stderr, "%s: CreateFile failed\n", name); + win32perror(name); return 0; } size = GetFileSize(f, &sizeHi); + if (size == (DWORD)-1) { + win32perror(name); + return 0; + } if (sizeHi) { - fprintf(stderr, "%s: too big (limit 2Gb)\n", name); + fprintf(stderr, "%s: bigger than 2Gb\n", name); return 0; } /* CreateFileMapping barfs on zero length files */ if (size == 0) { - fprintf(stderr, "%s: zero-length file\n", name); - return 0; + static const char c = '\0'; + processor(&c, 0, name, arg); + CloseHandle(f); + return 1; } m = CreateFileMapping(f, NULL, PAGE_READONLY, 0, 0, NULL); if (m == NULL) { - fprintf(stderr, "%s: CreateFileMapping failed\n", name); + win32perror(name); CloseHandle(f); return 0; } p = MapViewOfFile(m, FILE_MAP_READ, 0, 0, 0); if (p == NULL) { + win32perror(name); CloseHandle(m); CloseHandle(f); - fprintf(stderr, "%s: MapViewOfFile failed\n", name); return 0; } processor(p, size, name, arg); @@ -48,3 +56,22 @@ int filemap(const char *name, CloseHandle(f); return 1; } + +static +void win32perror(const char *s) +{ + LPVOID buf; + if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, + NULL, + GetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) &buf, + 0, + NULL)) { + fprintf(stderr, "%s: %s", s, buf); + fflush(stderr); + LocalFree(buf); + } + else + fprintf(stderr, "%s: unknown Windows error\n", s); +} diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index fce0ba65..94a93111 100755 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -1,52 +1,59 @@ #include +#include #include "wfcheck.h" #include "filemap.h" +struct ProcessFileArg { + enum EntityType entityType; + int result; +}; + static -void processFile(const void *data, size_t size, const char *filename, void *arg) +void processFile(const void *data, size_t size, const char *filename, void *p) { const char *badPtr = 0; unsigned long badLine = 0; unsigned long badCol = 0; - int *ret = arg; + struct ProcessFileArg *arg = p; enum WfCheckResult result; - result = wfCheck(data, size, &badPtr, &badLine, &badCol); + result = wfCheck(arg->entityType, data, size, &badPtr, &badLine, &badCol); if (result) { - static const char *message[] = { - 0, - "out of memory", - "syntax error", - "no element found", - "invalid token", - "unclosed token", - "unclosed token", - "mismatched tag", - "duplicate attribute", - "junk after document element", - }; - fprintf(stderr, "%s:", filename); + const char *msg = wfCheckMessage(result); + fprintf(stdout, "%s:", filename); if (badPtr != 0) - fprintf(stderr, "%lu:%lu:", badLine+1, badCol); - fprintf(stderr, "E: %s", message[result]); - putc('\n', stderr); - if (!*ret) - *ret = 1; + fprintf(stdout, "%lu:%lu:", badLine+1, badCol); + fprintf(stdout, "E: %s", msg ? msg : "(unknown message)"); + putc('\n', stdout); + arg->result = 1; } + else + arg->result = 0; } - int main(int argc, char **argv) { - int i; + int i = 1; int ret = 0; - if (argc == 1) { - fprintf(stderr, "usage: %s filename ...\n", argv[0]); + struct ProcessFileArg arg; + + arg.entityType = documentEntity; + + if (i < argc && strcmp(argv[i], "-g") == 0) { + i++; + arg.entityType = generalTextEntity; + } + if (i < argc && strcmp(argv[i], "--") == 0) + i++; + if (i == argc) { + fprintf(stderr, "usage: %s [-g] filename ...\n", argv[0]); return 1; } - for (i = 1; i < argc; i++) { - if (!filemap(argv[i], processFile, &ret)) + for (; i < argc; i++) { + if (!filemap(argv[i], processFile, &arg)) ret = 2; + else if (arg.result && !ret) + ret = 1; } return ret; }