Allow MINBPC to depend on enc.

Separate out ns dependent duplicate code into xmltok_ns.c
This commit is contained in:
James Clark 1998-11-17 09:51:18 +00:00
parent 73b6649f5d
commit 947cd0feac

View file

@ -156,7 +156,7 @@ static int checkCharRefNumber(int);
#include "xmltok_impl.h"
/* minimum bytes per character */
#define MINBPC 1
#define MINBPC(enc) 1
#define BYTE_TYPE(enc, p) \
(((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#define BYTE_TO_ASCII(enc, p) (*p)
@ -483,7 +483,7 @@ void PREFIX(toUtf16)(const ENCODING *enc, \
}
#define PREFIX(ident) little2_ ## ident
#define MINBPC 2
#define MINBPC(enc) 2
#define BYTE_TYPE(enc, p) \
((p)[1] == 0 \
? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
@ -583,7 +583,7 @@ static const struct normal_encoding internal_little2_encoding = {
#undef PREFIX
#define PREFIX(ident) big2_ ## ident
#define MINBPC 2
#define MINBPC(enc) 2
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) \
((p)[0] == 0 \
@ -701,60 +701,6 @@ int streqci(const char *s1, const char *s2)
return 1;
}
static
int initScan(const ENCODING *enc, int state, const char *ptr, const char *end,
const char **nextTokPtr)
{
const ENCODING **encPtr;
if (ptr == end)
return XML_TOK_NONE;
encPtr = ((const INIT_ENCODING *)enc)->encPtr;
if (ptr + 1 == end) {
switch ((unsigned char)*ptr) {
case 0xFE:
case 0xFF:
case 0x00:
case 0x3C:
return XML_TOK_PARTIAL;
}
}
else {
switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
case 0x003C:
*encPtr = &big2_encoding.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFEFF:
*nextTokPtr = ptr + 2;
*encPtr = &big2_encoding.enc;
return XML_TOK_BOM;
case 0x3C00:
*encPtr = &little2_encoding.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFFFE:
*nextTokPtr = ptr + 2;
*encPtr = &little2_encoding.enc;
return XML_TOK_BOM;
}
}
*encPtr = &utf8_encoding.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
static
int initScanProlog(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
}
static
int initScanContent(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
}
static
void initUpdatePosition(const ENCODING *enc, const char *ptr,
const char *end, POSITION *pos)
@ -762,50 +708,6 @@ void initUpdatePosition(const ENCODING *enc, const char *ptr,
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
}
const ENCODING *XmlGetUtf8InternalEncoding()
{
return &internal_utf8_encoding.enc;
}
const ENCODING *XmlGetUtf16InternalEncoding()
{
#if BYTE_ORDER == 12
return &internal_little2_encoding.enc;
#elif BYTE_ORDER == 21
return &internal_big2_encoding.enc;
#else
const short n = 1;
return *(const char *)&n ? &internal_little2_encoding.enc : &internal_big2_encoding.enc;
#endif
}
int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
{
if (name) {
if (streqci(name, "ISO-8859-1")) {
*encPtr = &latin1_encoding.enc;
return 1;
}
if (streqci(name, "UTF-8")) {
*encPtr = &utf8_encoding.enc;
return 1;
}
if (streqci(name, "US-ASCII")) {
*encPtr = &ascii_encoding.enc;
return 1;
}
if (!streqci(name, "UTF-16"))
return 0;
}
p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog;
p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent;
p->initEnc.updatePosition = initUpdatePosition;
p->initEnc.minBytesPerChar = 1;
p->encPtr = encPtr;
*encPtr = &(p->initEnc);
return 1;
}
static
int toAscii(const ENCODING *enc, const char *ptr, const char *end)
{
@ -914,36 +816,6 @@ int parsePseudoAttribute(const ENCODING *enc,
return 1;
}
static
const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end)
{
#define ENCODING_MAX 128
char buf[ENCODING_MAX];
char *p = buf;
int i;
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
if (ptr != end)
return 0;
*p = 0;
for (i = 0; buf[i]; i++) {
if ('a' <= buf[i] && buf[i] <= 'z')
buf[i] += 'A' - 'a';
}
if (streqci(buf, "UTF-8"))
return &utf8_encoding.enc;
if (streqci(buf, "ISO-8859-1"))
return &latin1_encoding.enc;
if (streqci(buf, "US-ASCII"))
return &ascii_encoding.enc;
if (streqci(buf, "UTF-16")) {
static const unsigned short n = 1;
if (enc->minBytesPerChar == 2)
return enc;
return &big2_encoding.enc;
}
return 0;
}
static
int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const char *,
@ -1024,19 +896,6 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
return 1;
}
int XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
return doParseXmlDecl(findEncoding, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone);
}
static
int checkCharRefNumber(int result)
{
@ -1283,151 +1142,21 @@ XmlInitUnknownEncoding(void *mem,
return &(e->normal.enc);
}
#define NS(x) x
#define ns(x) x
#include "xmltok_ns.c"
#undef NS
#undef ns
#ifdef XMLNS
const ENCODING *XmlGetUtf8InternalEncodingNS()
{
return &internal_utf8_encoding_ns.enc;
}
#define NS(x) x ## NS
#define ns(x) x ## _ns
const ENCODING *XmlGetUtf16InternalEncodingNS()
{
#if BYTE_ORDER == 12
return &internal_little2_encoding_ns.enc;
#elif BYTE_ORDER == 21
return &internal_big2_encoding_ns.enc;
#else
const short n = 1;
return *(const char *)&n ? &internal_little2_encoding_ns.enc : &internal_big2_encoding_ns.enc;
#endif
}
#include "xmltok_ns.c"
static
int initScanNS(const ENCODING *enc, int state, const char *ptr, const char *end,
const char **nextTokPtr)
{
const ENCODING **encPtr;
if (ptr == end)
return XML_TOK_NONE;
encPtr = ((const INIT_ENCODING *)enc)->encPtr;
if (ptr + 1 == end) {
switch ((unsigned char)*ptr) {
case 0xFE:
case 0xFF:
case 0x00:
case 0x3C:
return XML_TOK_PARTIAL;
}
}
else {
switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
case 0x003C:
*encPtr = &big2_encoding_ns.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFEFF:
*nextTokPtr = ptr + 2;
*encPtr = &big2_encoding_ns.enc;
return XML_TOK_BOM;
case 0x3C00:
*encPtr = &little2_encoding_ns.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFFFE:
*nextTokPtr = ptr + 2;
*encPtr = &little2_encoding_ns.enc;
return XML_TOK_BOM;
}
}
*encPtr = &utf8_encoding_ns.enc;
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
static
int initScanPrologNS(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScanNS(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
}
static
int initScanContentNS(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScanNS(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
}
int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
{
if (name) {
if (streqci(name, "ISO-8859-1")) {
*encPtr = &latin1_encoding_ns.enc;
return 1;
}
if (streqci(name, "UTF-8")) {
*encPtr = &utf8_encoding_ns.enc;
return 1;
}
if (streqci(name, "US-ASCII")) {
*encPtr = &ascii_encoding_ns.enc;
return 1;
}
if (!streqci(name, "UTF-16"))
return 0;
}
p->initEnc.scanners[XML_PROLOG_STATE] = initScanPrologNS;
p->initEnc.scanners[XML_CONTENT_STATE] = initScanContentNS;
p->initEnc.updatePosition = initUpdatePosition;
p->initEnc.minBytesPerChar = 1;
p->encPtr = encPtr;
*encPtr = &(p->initEnc);
return 1;
}
static
const ENCODING *findEncodingNS(const ENCODING *enc, const char *ptr, const char *end)
{
#define ENCODING_MAX 128
char buf[ENCODING_MAX];
char *p = buf;
int i;
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
if (ptr != end)
return 0;
*p = 0;
for (i = 0; buf[i]; i++) {
if ('a' <= buf[i] && buf[i] <= 'z')
buf[i] += 'A' - 'a';
}
if (streqci(buf, "UTF-8"))
return &utf8_encoding_ns.enc;
if (streqci(buf, "ISO-8859-1"))
return &latin1_encoding_ns.enc;
if (streqci(buf, "US-ASCII"))
return &ascii_encoding_ns.enc;
if (streqci(buf, "UTF-16")) {
static const unsigned short n = 1;
if (enc->minBytesPerChar == 2)
return enc;
return &big2_encoding_ns.enc;
}
return 0;
}
int XmlParseXmlDeclNS(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
return doParseXmlDecl(findEncodingNS, isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone);
}
#undef NS
#undef ns
ENCODING *
XmlInitUnknownEncodingNS(void *mem,