Use ints rather than unsigned shorts in unknown encoding support

This commit is contained in:
James Clark 1998-06-03 07:52:49 +00:00
parent d38e459410
commit 0e864d50fb
7 changed files with 53 additions and 44 deletions

View file

@ -1350,7 +1350,7 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
XML_Encoding info;
int i;
for (i = 0; i < 256; i++)
info.map[i] = 0;
info.map[i] = -1;
info.convert = 0;
info.data = 0;
info.release = 0;

View file

@ -111,9 +111,9 @@ typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser,
const XML_Char *publicId);
typedef struct {
unsigned short map[256];
int map[256];
void *data;
unsigned short (*convert)(void *data, const char *s);
int (*convert)(void *data, const char *s);
void (*release)(void *data);
} XML_Encoding;

View file

@ -962,7 +962,7 @@ int XmlUtf16Encode(int charNum, unsigned short *buf)
struct unknown_encoding {
struct normal_encoding normal;
unsigned short (*convert)(void *userData, const char *p);
int (*convert)(void *userData, const char *p);
void *userData;
unsigned short utf16[256];
unsigned char utf8[256][4];
@ -976,24 +976,29 @@ int XmlSizeOfUnknownEncoding()
static
int unknown_isName(const ENCODING *enc, const char *p)
{
unsigned short c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
if (c & ~0xFFFF)
return 0;
return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
}
static
int unknown_isNmstrt(const ENCODING *enc, const char *p)
{
unsigned short c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
if (c & ~0xFFFF)
return 0;
return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
}
static
int unknown_isInvalid(const ENCODING *enc, const char *p)
{
return ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p) == 0;
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
}
static
@ -1010,9 +1015,8 @@ void unknown_toUtf8(const ENCODING *enc,
utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
n = *utf8++;
if (n == 0) {
unsigned short c
= ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP)
break;
@ -1040,7 +1044,7 @@ void unknown_toUtf16(const ENCODING *enc,
unsigned short c
= ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
if (c == 0) {
c = ((const struct unknown_encoding *)enc)
c = (unsigned short)((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
*fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2);
@ -1053,8 +1057,8 @@ void unknown_toUtf16(const ENCODING *enc,
ENCODING *
XmlInitUnknownEncoding(void *mem,
unsigned short *table,
unsigned short (*convert)(void *userData, const char *p),
int *table,
int (*convert)(void *userData, const char *p),
void *userData)
{
int i;
@ -1067,23 +1071,25 @@ XmlInitUnknownEncoding(void *mem,
&& table[i] != i)
return 0;
for (i = 0; i < 256; i++) {
unsigned short c = table[i];
if (c < 0x80) {
int c = table[i];
if (c == -1)
c = 0xFFFF;
if (c < 0) {
if (c < -4)
return 0;
e->normal.type[i] = BT_LEAD2 - (c + 2);
e->utf8[i][0] = 0;
e->utf16[i] = 0;
}
else if (c < 0x80) {
if (latin1_encoding.type[c] != BT_OTHER
&& latin1_encoding.type[c] != BT_NONXML
&& c != i)
return 0;
if (c >= 2 && c <= 4) {
e->normal.type[i] = BT_LEAD2 + (c - 2);
e->utf8[i][0] = 0;
e->utf16[i] = 0;
}
else {
e->normal.type[i] = latin1_encoding.type[c];
e->utf8[i][0] = 1;
e->utf8[i][1] = (char)c;
e->utf16[i] = c == 0 ? 0xFFFF : c;
}
return 0;
e->normal.type[i] = latin1_encoding.type[c];
e->utf8[i][0] = 1;
e->utf8[i][1] = (char)c;
e->utf16[i] = c == 0 ? 0xFFFF : c;
}
else if (checkCharRefNumber(c) < 0) {
e->normal.type[i] = BT_NONXML;
@ -1092,6 +1098,8 @@ XmlInitUnknownEncoding(void *mem,
e->utf8[i][1] = 0;
}
else {
if (c > 0xFFFF)
return 0;
if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
e->normal.type[i] = BT_NMSTRT;
else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))

View file

@ -265,8 +265,8 @@ int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf);
int XMLTOKAPI XmlSizeOfUnknownEncoding();
ENCODING XMLTOKAPI *
XmlInitUnknownEncoding(void *mem,
unsigned short *table,
unsigned short (*convert)(void *userData, const char *p),
int *table,
int (*convert)(void *userData, const char *p),
void *userData);
#ifdef __cplusplus

View file

@ -23,14 +23,14 @@ Contributor(s):
#ifdef WIN32
#include <windows.h>
int codepageMap(int cp, unsigned short *map)
int codepageMap(int cp, int *map)
{
int i;
CPINFO info;
if (!GetCPInfo(cp, &info) || info.MaxCharSize > 2)
return 0;
for (i = 0; i < 256; i++)
map[i] = 0;
map[i] = -1;
if (info.MaxCharSize > 1) {
for (i = 0; i < MAX_LEADBYTES; i++) {
int j, lim;
@ -38,27 +38,28 @@ int codepageMap(int cp, unsigned short *map)
break;
lim = info.LeadByte[i + 1];
for (j = info.LeadByte[i]; j < lim; j++)
map[j] = 2;
map[j] = -2;
}
}
for (i = 0; i < 256; i++) {
if (map[i] == 0) {
if (map[i] == -1) {
char c = i;
unsigned short n;
if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
&c, 1, map + i, 1) == 0)
map[i] = 0;
&c, 1, &n, 1) == 1)
map[i] = n;
}
}
return 1;
}
unsigned short codepageConvert(int cp, const char *p)
int codepageConvert(int cp, const char *p)
{
unsigned short c;
if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
p, 2, &c, 1) == 1)
return c;
return 0;
return -1;
}
#else /* not WIN32 */
@ -70,7 +71,7 @@ int codepageMap(int cp, unsigned short *map)
unsigned short codepageConvert(int cp, const char *p)
{
return 0;
return -1;
}
#endif /* not WIN32 */

View file

@ -18,5 +18,5 @@ James Clark. All Rights Reserved.
Contributor(s):
*/
int codepageMap(int cp, unsigned short *map);
unsigned short codepageConvert(int cp, const char *p);
int codepageMap(int cp, int *map);
int codepageConvert(int cp, const char *p);

View file

@ -405,7 +405,7 @@ int externalEntityRefStream(XML_Parser parser,
}
static
unsigned short unknownEncodingConvert(void *data, const char *p)
int unknownEncodingConvert(void *data, const char *p)
{
return codepageConvert(*(int *)data, p);
}