diff --git a/expat/xmlparse/xmlparse.c b/expat/xmlparse/xmlparse.c index b8ec1e91..57906d42 100755 --- a/expat/xmlparse/xmlparse.c +++ b/expat/xmlparse/xmlparse.c @@ -1350,7 +1350,7 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) XML_Encoding info; int i; for (i = 0; i < 256; i++) - info.map[i] = 0; + info.map[i] = -1; info.convert = 0; info.data = 0; info.release = 0; diff --git a/expat/xmlparse/xmlparse.h b/expat/xmlparse/xmlparse.h index edd16c9e..680f8141 100755 --- a/expat/xmlparse/xmlparse.h +++ b/expat/xmlparse/xmlparse.h @@ -111,9 +111,9 @@ typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, const XML_Char *publicId); typedef struct { - unsigned short map[256]; + int map[256]; void *data; - unsigned short (*convert)(void *data, const char *s); + int (*convert)(void *data, const char *s); void (*release)(void *data); } XML_Encoding; diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index 94eaa109..2c7129e3 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -962,7 +962,7 @@ int XmlUtf16Encode(int charNum, unsigned short *buf) struct unknown_encoding { struct normal_encoding normal; - unsigned short (*convert)(void *userData, const char *p); + int (*convert)(void *userData, const char *p); void *userData; unsigned short utf16[256]; unsigned char utf8[256][4]; @@ -976,24 +976,29 @@ int XmlSizeOfUnknownEncoding() static int unknown_isName(const ENCODING *enc, const char *p) { - unsigned short c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + int c = ((const struct unknown_encoding *)enc) + ->convert(((const struct unknown_encoding *)enc)->userData, p); + if (c & ~0xFFFF) + return 0; return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); } static int unknown_isNmstrt(const ENCODING *enc, const char *p) { - unsigned short c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + int c = ((const struct unknown_encoding *)enc) + ->convert(((const struct unknown_encoding *)enc)->userData, p); + if (c & ~0xFFFF) + return 0; return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); } static int unknown_isInvalid(const ENCODING *enc, const char *p) { - return ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p) == 0; + int c = ((const struct unknown_encoding *)enc) + ->convert(((const struct unknown_encoding *)enc)->userData, p); + return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } static @@ -1010,9 +1015,8 @@ void unknown_toUtf8(const ENCODING *enc, utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { - unsigned short c - = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); + int c = ((const struct unknown_encoding *)enc) + ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) break; @@ -1040,7 +1044,7 @@ void unknown_toUtf16(const ENCODING *enc, unsigned short c = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP]; if (c == 0) { - c = ((const struct unknown_encoding *)enc) + c = (unsigned short)((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2); @@ -1053,8 +1057,8 @@ void unknown_toUtf16(const ENCODING *enc, ENCODING * XmlInitUnknownEncoding(void *mem, - unsigned short *table, - unsigned short (*convert)(void *userData, const char *p), + int *table, + int (*convert)(void *userData, const char *p), void *userData) { int i; @@ -1067,23 +1071,25 @@ XmlInitUnknownEncoding(void *mem, && table[i] != i) return 0; for (i = 0; i < 256; i++) { - unsigned short c = table[i]; - if (c < 0x80) { + int c = table[i]; + if (c == -1) + c = 0xFFFF; + if (c < 0) { + if (c < -4) + return 0; + e->normal.type[i] = BT_LEAD2 - (c + 2); + e->utf8[i][0] = 0; + e->utf16[i] = 0; + } + else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER && latin1_encoding.type[c] != BT_NONXML && c != i) - return 0; - if (c >= 2 && c <= 4) { - e->normal.type[i] = BT_LEAD2 + (c - 2); - e->utf8[i][0] = 0; - e->utf16[i] = 0; - } - else { - e->normal.type[i] = latin1_encoding.type[c]; - e->utf8[i][0] = 1; - e->utf8[i][1] = (char)c; - e->utf16[i] = c == 0 ? 0xFFFF : c; - } + return 0; + e->normal.type[i] = latin1_encoding.type[c]; + e->utf8[i][0] = 1; + e->utf8[i][1] = (char)c; + e->utf16[i] = c == 0 ? 0xFFFF : c; } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; @@ -1092,6 +1098,8 @@ XmlInitUnknownEncoding(void *mem, e->utf8[i][1] = 0; } else { + if (c > 0xFFFF) + return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) e->normal.type[i] = BT_NMSTRT; else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index f28414a9..6d0b91df 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -265,8 +265,8 @@ int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf); int XMLTOKAPI XmlSizeOfUnknownEncoding(); ENCODING XMLTOKAPI * XmlInitUnknownEncoding(void *mem, - unsigned short *table, - unsigned short (*convert)(void *userData, const char *p), + int *table, + int (*convert)(void *userData, const char *p), void *userData); #ifdef __cplusplus diff --git a/expat/xmlwf/codepage.c b/expat/xmlwf/codepage.c index 8144e954..d4791c36 100755 --- a/expat/xmlwf/codepage.c +++ b/expat/xmlwf/codepage.c @@ -23,14 +23,14 @@ Contributor(s): #ifdef WIN32 #include -int codepageMap(int cp, unsigned short *map) +int codepageMap(int cp, int *map) { int i; CPINFO info; if (!GetCPInfo(cp, &info) || info.MaxCharSize > 2) return 0; for (i = 0; i < 256; i++) - map[i] = 0; + map[i] = -1; if (info.MaxCharSize > 1) { for (i = 0; i < MAX_LEADBYTES; i++) { int j, lim; @@ -38,27 +38,28 @@ int codepageMap(int cp, unsigned short *map) break; lim = info.LeadByte[i + 1]; for (j = info.LeadByte[i]; j < lim; j++) - map[j] = 2; + map[j] = -2; } } for (i = 0; i < 256; i++) { - if (map[i] == 0) { + if (map[i] == -1) { char c = i; + unsigned short n; if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, - &c, 1, map + i, 1) == 0) - map[i] = 0; + &c, 1, &n, 1) == 1) + map[i] = n; } } return 1; } -unsigned short codepageConvert(int cp, const char *p) +int codepageConvert(int cp, const char *p) { unsigned short c; if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, p, 2, &c, 1) == 1) return c; - return 0; + return -1; } #else /* not WIN32 */ @@ -70,7 +71,7 @@ int codepageMap(int cp, unsigned short *map) unsigned short codepageConvert(int cp, const char *p) { - return 0; + return -1; } #endif /* not WIN32 */ diff --git a/expat/xmlwf/codepage.h b/expat/xmlwf/codepage.h index a8563276..94c66f56 100755 --- a/expat/xmlwf/codepage.h +++ b/expat/xmlwf/codepage.h @@ -18,5 +18,5 @@ James Clark. All Rights Reserved. Contributor(s): */ -int codepageMap(int cp, unsigned short *map); -unsigned short codepageConvert(int cp, const char *p); +int codepageMap(int cp, int *map); +int codepageConvert(int cp, const char *p); diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index 80a69d78..72ce770a 100755 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -405,7 +405,7 @@ int externalEntityRefStream(XML_Parser parser, } static -unsigned short unknownEncodingConvert(void *data, const char *p) +int unknownEncodingConvert(void *data, const char *p) { return codepageConvert(*(int *)data, p); }