ICU-3346 support DBCS-only and other delta (extension-only) .cnv files

X-SVN-Rev: 13637
This commit is contained in:
Markus Scherer 2003-11-07 23:57:24 +00:00
parent 558442a420
commit 693cbae3a7
5 changed files with 321 additions and 152 deletions

View file

@ -43,6 +43,23 @@ conversion {
toUnicode {
Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
Cases {
// DBCS-only extensions
{
"ibm-16684",
:bin{ 430e4395ecc1404042e1 },
"\ufffd\u30C8\u30C8\u309A\u3000\u20ac",
:intvector{ 0, 2, 4, 4, 6, 8 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{
"ibm-1399",
:bin{ 430e4395ecc140400fe1 },
"\uff62\u30C8\u30C8\u309A\u3000\u20ac",
:intvector{ 0, 2, 4, 4, 6, 9 },
:int{1}, :int{0}, "", "?", :bin{""}
}
// extensions
{
"ibm-1390",
@ -144,6 +161,31 @@ conversion {
fromUnicode {
Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
Cases {
// DBCS-only extensions
{
"ibm-1390,swaplfnl",
"\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
:bin{ 430e4395ecc140400fc1e115 },
:intvector{ 0, 1, 1, 1, 2, 2, 4, 4, 5, 5, 6, 7 },
:int{1}, :int{0}, "", "?", ""
}
{
"ibm-16684",
"\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
:bin{ fefe4395ecc14040fefe42e1fefe },
:intvector{ 0, 0, 1, 1, 2, 2, 4, 4, 5, 5, 6, 6, 7, 7 },
:int{1}, :int{0}, "", "?", ""
}
{
"ibm-1399",
"\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
:bin{ 440e4395ecc140400fc1e125 },
:intvector{ 0, 1, 1, 1, 2, 2, 4, 4, 5, 5, 6, 7 },
:int{1}, :int{0}, "", "?", ""
}
// <subchar1> from |2 mappings
{
"ibm-1390",
@ -296,6 +338,16 @@ conversion {
// which - numeric UConverterUnicodeSet value
Headers { "charset", "map", "mapnot", "which" }
Cases {
// DBCS-only
{
"ibm-16684",
"[\xa0\xa1\xa4\xa6-\xab\xad-\u017f\u0254\u309b-\u30ff\u4e00-\u4e05\U00023d00\U000243bc\U0002a6b2"
"{\u0254\u0300}{\u0254\u0301}{\u304b\u309a}{\u30ad\u309a}{\u30af\u309a}]",
"[\x00-0x9f\xa2\xa3\xa5\xac\u0200-\u024f\U00010000-\U0001ffff\U0002a61b-\U0002a6b1]",
:int{0}
}
// extensions
{
"ibm-1390",
"[\x00-\x0d\x10-\u017f\u0254\u309b-\u30ff\u4e00-\u4e05\U00023d00\U000243bc\U0002a6b2"

View file

@ -29,7 +29,7 @@
#include "uoptions.h"
#include "unicode/udata.h"
#include "unewdata.h"
#include "ucmpwrit.h"
#include "uparse.h"
#include "ucm.h"
#include "makeconv.h"
#include "genmbcs.h"
@ -305,18 +305,7 @@ int main(int argc, char* argv[])
const char *basename;
/* find the last file sepator */
basename = uprv_strrchr(arg, U_FILE_SEP_CHAR);
if (basename == NULL) {
basename = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
if (basename == NULL) {
basename = arg;
} else {
++basename;
}
} else {
++basename;
}
basename = findBasename(arg);
uprv_strcpy(outBasename, basename);
}
else
@ -593,53 +582,6 @@ readHeader(ConvData *data,
}
}
static void
readTable(ConvData *data, FileStream* convFile,
UBool forBase, UCMStates *baseStates,
UErrorCode *pErrorCode) {
char line[500];
char *end;
UBool isOK;
if(U_FAILURE(*pErrorCode)) {
return;
}
isOK=TRUE;
for(;;) {
/* read the next line */
if(!T_FileStream_readLine(convFile, line, sizeof(line))) {
fprintf(stderr, "incomplete charmap section\n");
isOK=FALSE;
break;
}
/* remove CR LF */
end=uprv_strchr(line, 0);
while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) {
--end;
}
*end=0;
/* ignore empty and comment lines */
if(line[0]==0 || line[0]=='#') {
continue;
}
/* stop at the end of the mapping table */
if(0==uprv_strcmp(line, "END CHARMAP")) {
break;
}
isOK&=ucm_addMappingFromLine(data->ucm, line, forBase, baseStates);
}
if(!isOK) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}
/* return TRUE if a base table was read, FALSE for an extension table */
static UBool
readFile(ConvData *data, const char* converterName,
@ -647,6 +589,8 @@ readFile(ConvData *data, const char* converterName,
char line[200];
char *end;
FileStream *convFile;
UCMStates *baseStates;
UBool dataIsBase;
if(U_FAILURE(*pErrorCode)) {
@ -668,37 +612,39 @@ readFile(ConvData *data, const char* converterName,
if(data->ucm->baseName[0]==0) {
dataIsBase=TRUE;
ucm_processStates(&data->ucm->states);
/* read the base table */
readTable(data, convFile, TRUE, &data->ucm->states, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return FALSE;
}
/* read an extension table if there is one */
while(T_FileStream_readLine(convFile, line, sizeof(line))) {
end=uprv_strchr(line, 0);
while(line<end &&
(*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
--end;
}
*end=0;
if(0==uprv_strcmp(line, "CHARMAP")) {
/* read the extension table */
readTable(data, convFile, FALSE, &data->ucm->states, pErrorCode);
break;
}
}
baseStates=&data->ucm->states;
ucm_processStates(baseStates);
} else {
/* read only the extension table */
dataIsBase=FALSE;
readTable(data, convFile, FALSE, NULL, pErrorCode);
baseStates=NULL;
}
/* ### TODO enable extension-only tables, Jitterbug 3346 */
fprintf(stderr, "error: delta/extension-only conversion tables are not yet supported\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
/* read the base table */
ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return FALSE;
}
/* read an extension table if there is one */
while(T_FileStream_readLine(convFile, line, sizeof(line))) {
end=uprv_strchr(line, 0);
while(line<end &&
(*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
--end;
}
*end=0;
if(line[0]=='#' || u_skipWhitespace(line)==end) {
continue; /* ignore empty and comment lines */
}
if(0==uprv_strcmp(line, "CHARMAP")) {
/* read the extension table */
ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
} else {
fprintf(stderr, "unexpected text after the base mapping table\n");
}
break;
}
T_FileStream_close(convFile);
@ -712,7 +658,7 @@ readFile(ConvData *data, const char* converterName,
}
static void
createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode) {
createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
ConvData baseData;
UBool dataIsBase;
@ -722,17 +668,11 @@ createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCod
initConvData(data);
/* ### TODO if there is an extension table:
1. the base table must use precision flags
2. check base vs. extension for mappings overlap
*/
dataIsBase=readFile(data, converterName, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
initConvData(&baseData);
if(dataIsBase) {
data->cnvData=MBCSOpen(data->ucm);
if(data->cnvData==NULL) {
@ -751,7 +691,7 @@ createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCod
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
} else if(
!ucm_checkBaseExt(&data->ucm->states, data->ucm->base, data->ucm->ext, TRUE) ||
!ucm_checkBaseExt(&data->ucm->states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ||
!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
@ -765,20 +705,41 @@ createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCod
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
} else {
/* ### TODO assemble a path/filename for data->ucm->states.baseName */
/* must be TRUE */readFile(&baseData, ""/*extConverterName*/, pErrorCode);
/* ### TODO read extension table */
/* ### TODO - actually write the mappings into genmbcs or into ext */
char baseFilename[500];
char *basename;
if( !ucm_checkValidity(data->ucm->ext, &baseData.ucm->states) ||
!ucm_checkBaseExt(&baseData.ucm->states, baseData.ucm->base, data->ucm->ext, FALSE) ||
!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
) {
initConvData(&baseData);
/* assemble a path/filename for data->ucm->baseName */
uprv_strcpy(baseFilename, converterName);
basename=(char *)findBasename(baseFilename);
uprv_strcpy(basename, data->ucm->baseName);
uprv_strcat(basename, ".ucm");
/* read the base table */
dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
} else if(!dataIsBase) {
fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}
} else {
/* prepare the extension table */
data->extData=CnvExtOpen(data->ucm);
if(data->extData==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
cleanupConvData(&baseData);
} else if(
!ucm_checkValidity(data->ucm->ext, &baseData.ucm->states) ||
!ucm_checkBaseExt(&baseData.ucm->states, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ||
!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}
cleanupConvData(&baseData);
}
}
/*

View file

@ -59,18 +59,19 @@ getLongPathname(const char *pathname) {
U_CAPI const char * U_EXPORT2
findBasename(const char *filename) {
const char *basename=uprv_strrchr(filename, U_FILE_SEP_CHAR);
#if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR
if(basename==NULL) {
/* Use lenient matching on Windows, which can accept either \ or /
This is useful for environments like Win32+CygWin which have both.
*/
basename=uprv_strrchr(filename, U_FILE_ALT_SEP_CHAR);
}
#endif
if(basename!=NULL) {
return basename+1;
} else {
#ifdef WIN32
/* Use lenient matching on Windows, which can accept either \ or /
This is useful for CygWin environments which has both
*/
basename=uprv_strrchr(filename, '/');
if(basename!=NULL) {
return basename+1;
}
#endif
return filename;
}
}

View file

@ -26,8 +26,10 @@
#include "unicode/ustring.h"
#include "cstring.h"
#include "cmemory.h"
#include "filestrm.h"
#include "uarrsort.h"
#include "ucnvmbcs.h"
#include "ucnv_bld.h"
#include "ucnv_ext.h"
#include "uparse.h"
#include "ucm.h"
@ -217,6 +219,10 @@ ucm_sortTable(UCMTable *t) {
UErrorCode errorCode;
int32_t i;
if(t->isSorted) {
return;
}
errorCode=U_ZERO_ERROR;
/* 1. sort by Unicode first */
@ -252,17 +258,18 @@ ucm_sortTable(UCMTable *t) {
u_errorName(errorCode));
exit(errorCode);
}
t->isSorted=TRUE;
}
enum {
MOVE_TO_EXT=0x10,
REMOVE_MAPPING=0x20,
MOVE_ANY=0x30
MOVE_TO_EXT=1,
REMOVE_MAPPING=2
};
/*
* move mappings with MOVE_ANY ored into their flags from the base table
* to the extension table
* move mappings with their move flag set from the base table
* and optionally to the extension table
*
* works only with explicit precision flags because it uses some of the
* flags bits
@ -276,10 +283,10 @@ moveMappings(UCMTable *base, UCMTable *ext) {
mbLimit=mb+base->mappingsLength;
while(mb<mbLimit) {
flag=mb->f;
if(flag&MOVE_ANY) {
/* restore the original flag value */
mb->f=flag&~MOVE_ANY;
flag=mb->moveFlag;
if(flag!=0) {
/* reset the move flag */
mb->moveFlag=0;
if(ext!=NULL && (flag&MOVE_TO_EXT)) {
/* add the mapping to the extension table */
@ -292,6 +299,7 @@ moveMappings(UCMTable *base, UCMTable *ext) {
}
--mbLimit;
--base->mappingsLength;
base->isSorted=FALSE;
} else {
++mb;
}
@ -304,10 +312,12 @@ enum {
};
static uint8_t
checkBaseExtUnicode(UCMTable *base, UCMTable *ext, UBool moveToExt) {
checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
UBool moveToExt, UBool intersectBase) {
UCMapping *mb, *me, *mbLimit, *meLimit;
int32_t cmp;
uint8_t result;
UBool isSISO;
mb=base->mappings;
mbLimit=mb+base->mappingsLength;
@ -317,6 +327,8 @@ checkBaseExtUnicode(UCMTable *base, UCMTable *ext, UBool moveToExt) {
result=0;
isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO);
for(;;) {
/* skip irrelevant mappings on both sides */
for(;;) {
@ -346,21 +358,32 @@ checkBaseExtUnicode(UCMTable *base, UCMTable *ext, UBool moveToExt) {
/* compare the base and extension mappings */
cmp=compareUnicode(base, mb, ext, me);
if(cmp<0) {
if(intersectBase && (!(isSISO && intersectBase==2) || mb->bLen>1)) {
/*
* mapping in base but not in ext, move it
*
* if base is EBCDIC_STATEFUL and ext is DBCS, move DBCS mappings here
* and check SBCS ones for Unicode prefix below
*/
mb->moveFlag|=MOVE_TO_EXT;
result|=NEEDS_MOVE;
/* does mb map from an input sequence that is a prefix of me's? */
if( mb->uLen<me->uLen &&
} else if( mb->uLen<me->uLen &&
0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen)
) {
if(moveToExt) {
/* mark this mapping to be moved to the extension table */
mb->f|=MOVE_TO_EXT;
mb->moveFlag|=MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
"ucm error: the base table contains a mapping whose input sequence\n"
" is a prefix of the input sequence of an extension mapping\n");
ucm_printMapping(base, mb, stderr);
ucm_printMapping(ext, me, stderr);
result|=HAS_ERRORS;
}
result|=NEEDS_MOVE;
}
++mb;
@ -372,7 +395,11 @@ checkBaseExtUnicode(UCMTable *base, UCMTable *ext, UBool moveToExt) {
if( mb->f==me->f && mb->bLen==me->bLen &&
0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen)
) {
me->f|=REMOVE_MAPPING;
me->moveFlag|=REMOVE_MAPPING;
result|=NEEDS_MOVE;
} else if(intersectBase) {
/* mapping in base but not in ext, move it */
mb->moveFlag|=MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
@ -392,7 +419,8 @@ checkBaseExtUnicode(UCMTable *base, UCMTable *ext, UBool moveToExt) {
}
static uint8_t
checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool moveToExt) {
checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
UBool moveToExt, UBool intersectBase) {
UCMapping *mb, *me;
int32_t *baseMap, *extMap;
int32_t b, e, bLimit, eLimit, cmp;
@ -412,17 +440,23 @@ checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool mo
for(;;) {
/* skip irrelevant mappings on both sides */
for(;;) {
for(;; ++b) {
if(b==bLimit) {
return result;
}
mb=base->mappings+baseMap[b];
if(isSISO && intersectBase==2 && mb->bLen==1) {
/*
* comparing an EBCDIC_STATEFUL base against a DBCS extension:
* leave SBCS base mappings alone
*/
continue;
}
if(mb->f==0 || mb->f==3) {
break;
}
++b;
}
for(;;) {
@ -441,18 +475,23 @@ checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool mo
/* compare the base and extension mappings */
cmp=compareBytes(base, mb, ext, me, TRUE);
if(cmp<0) {
if(intersectBase) {
/* mapping in base but not in ext, move it */
mb->moveFlag|=MOVE_TO_EXT;
result|=NEEDS_MOVE;
/*
* does mb map from an input sequence that is a prefix of me's?
* for SI/SO tables, a single byte is never a prefix because it
* occurs in a separate single-byte state
*/
if( mb->bLen<me->bLen &&
} else if( mb->bLen<me->bLen &&
(!isSISO || mb->bLen>1) &&
0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen)
) {
if(moveToExt) {
/* mark this mapping to be moved to the extension table */
mb->f|=MOVE_TO_EXT;
mb->moveFlag|=MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
@ -473,7 +512,11 @@ checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool mo
if( mb->f==me->f && mb->uLen==me->uLen &&
0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen)
) {
me->f|=REMOVE_MAPPING;
me->moveFlag|=REMOVE_MAPPING;
result|=NEEDS_MOVE;
} else if(intersectBase) {
/* mapping in base but not in ext, move it */
mb->moveFlag|=MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
@ -515,12 +558,18 @@ ucm_checkValidity(UCMTable *table, UCMStates *baseStates) {
}
U_CAPI UBool U_EXPORT2
ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool moveToExt) {
ucm_checkBaseExt(UCMStates *baseStates,
UCMTable *base, UCMTable *ext, UCMTable *moveTarget,
UBool intersectBase) {
uint8_t result;
/* if we have an extension table, we must always use precision flags */
if(base->flagsType!=UCM_FLAGS_EXPLICIT || ext->flagsType!=UCM_FLAGS_EXPLICIT) {
fprintf(stderr, "ucm error: the base or extension table contains mappings without precision flags\n");
if(base->flagsType&UCM_FLAGS_IMPLICIT) {
fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n");
return FALSE;
}
if(ext->flagsType&UCM_FLAGS_IMPLICIT) {
fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n");
return FALSE;
}
@ -530,8 +579,8 @@ ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool mov
/* check */
result=
checkBaseExtUnicode(base, ext, moveToExt)|
checkBaseExtBytes(baseStates, base, ext, moveToExt);
checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)|
checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase);
if(result&HAS_ERRORS) {
return FALSE;
@ -539,9 +588,12 @@ ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool mov
if(result&NEEDS_MOVE) {
moveMappings(ext, NULL);
moveMappings(base, ext);
moveMappings(base, moveTarget);
ucm_sortTable(base);
ucm_sortTable(ext);
if(moveTarget!=NULL) {
ucm_sortTable(moveTarget);
}
}
return TRUE;
@ -640,6 +692,8 @@ ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable,
++toUMapping;
++toUIndex;
}
fromUTable->isSorted=FALSE;
}
/* separate extension mappings out of base table for rptp2ucm --------------- */
@ -662,7 +716,7 @@ ucm_separateMappings(UCMFile *ucm, UBool isSISO) {
if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) {
fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n");
ucm_printMapping(table, m, stderr);
m->f|=REMOVE_MAPPING;
m->moveFlag|=REMOVE_MAPPING;
needsMove=TRUE;
continue;
}
@ -675,7 +729,7 @@ ucm_separateMappings(UCMFile *ucm, UBool isSISO) {
printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr);
isOK=FALSE;
} else if(type>0) {
m->f|=MOVE_TO_EXT;
m->moveFlag|=MOVE_TO_EXT;
needsMove=TRUE;
}
}
@ -685,7 +739,7 @@ ucm_separateMappings(UCMFile *ucm, UBool isSISO) {
}
if(needsMove) {
moveMappings(ucm->base, ucm->ext);
return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, TRUE);
return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE);
} else {
ucm_sortTable(ucm->base);
return TRUE;
@ -852,6 +906,17 @@ ucm_closeTable(UCMTable *table) {
}
}
U_CAPI void U_EXPORT2
ucm_resetTable(UCMTable *table) {
if(table!=NULL) {
table->mappingsLength=0;
table->flagsType=0;
table->unicodeMask=0;
table->bytesLength=table->codePointsLength=0;
table->isSorted=FALSE;
}
}
U_CAPI void U_EXPORT2
ucm_addMapping(UCMTable *table,
UCMapping *m,
@ -946,6 +1011,8 @@ ucm_addMapping(UCMTable *table,
tm=table->mappings+table->mappingsLength++;
uprv_memcpy(tm, m, sizeof(UCMapping));
table->isSorted=FALSE;
}
U_CAPI UCMFile * U_EXPORT2
@ -1051,7 +1118,61 @@ ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates
UChar32 codePoints[UCNV_EXT_MAX_UCHARS];
uint8_t bytes[UCNV_EXT_MAX_BYTES];
const char *s;
/* ignore empty and comment lines */
if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') {
return TRUE;
}
return
ucm_parseMappingLine(&m, codePoints, bytes, line) &&
ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes);
}
U_CAPI void U_EXPORT2
ucm_readTable(UCMFile *ucm, FileStream* convFile,
UBool forBase, UCMStates *baseStates,
UErrorCode *pErrorCode) {
char line[500];
char *end;
UBool isOK;
if(U_FAILURE(*pErrorCode)) {
return;
}
isOK=TRUE;
for(;;) {
/* read the next line */
if(!T_FileStream_readLine(convFile, line, sizeof(line))) {
fprintf(stderr, "incomplete charmap section\n");
isOK=FALSE;
break;
}
/* remove CR LF */
end=uprv_strchr(line, 0);
while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) {
--end;
}
*end=0;
/* ignore empty and comment lines */
if(line[0]==0 || line[0]=='#') {
continue;
}
/* stop at the end of the mapping table */
if(0==uprv_strcmp(line, "END CHARMAP")) {
break;
}
isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates);
}
if(!isOK) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}

View file

@ -22,6 +22,7 @@
#include "unicode/utypes.h"
#include "ucnvmbcs.h"
#include "ucnv_ext.h"
#include "filestrm.h"
#include <stdio.h>
U_CDECL_BEGIN
@ -46,7 +47,7 @@ typedef struct UCMapping {
uint32_t index;
uint8_t bytes[4];
} b;
int8_t uLen, bLen, f;
int8_t uLen, bLen, f, moveFlag;
} UCMapping;
enum {
@ -71,6 +72,7 @@ typedef struct UCMTable {
uint8_t unicodeMask;
int8_t flagsType; /* UCM_FLAGS_INITIAL etc. */
UBool isSorted;
} UCMTable;
enum {
@ -140,9 +142,21 @@ ucm_openTable(void);
U_CAPI void U_EXPORT2
ucm_closeTable(UCMTable *table);
U_CAPI void U_EXPORT2
ucm_resetTable(UCMTable *table);
U_CAPI void U_EXPORT2
ucm_sortTable(UCMTable *t);
/**
* Read a table from a .ucm file, from after the CHARMAP line to
* including the END CHARMAP line.
*/
U_CAPI void U_EXPORT2
ucm_readTable(UCMFile *ucm, FileStream* convFile,
UBool forBase, UCMStates *baseStates,
UErrorCode *pErrorCode);
/**
* Check the validity of mappings against a base table's states;
* necessary for extension-only tables that were read before their base tables.
@ -152,9 +166,22 @@ ucm_checkValidity(UCMTable *ext, UCMStates *baseStates);
/**
* Check a base table against an extension table.
* Set moveToExt=TRUE for where base and extension tables are parsed
* from a single file,
* and moveToExt=FALSE for where the extension table is in a separate file.
* Set the moveTarget!=NULL if it is possible to move mappings from the base.
* This is the case where base and extension tables are parsed from a single file
* (moveTarget==ext)
* or when delta file mappings are subtracted from a base table.
*
* When a base table cannot be modified because a delta file is parsed in makeconv,
* then set moveTarget=NULL.
*
* if(intersectBase) then mappings that exist in the base table but not in
* the extension table are moved to moveTarget instead of showing an error.
*
* Special mode: If the base table is an SISO table (indicated in the baseStates)
* and intersectBase==2 for a DBCS extension table, then SBCS mappings are
* not moved out of the base unless their Unicode input requires it.
* This helps ucmkbase generate base tables for where the dbcsonly converter
* option will be employed.
*
* For both tables in the same file, the extension table is automatically
* built.
@ -164,6 +191,12 @@ ucm_checkValidity(UCMTable *ext, UCMStates *baseStates);
*
* Sort both tables, and then for each mapping direction:
*
* If intersectBase is TRUE and the base table contains a mapping
* that does not exist in the extension table, then this mapping is moved
* to moveTarget.
*
* - otherwise -
*
* If the base table contains a mapping for which the input sequence is
* the same as the extension input, then
* - if the output is the same: remove the extension mapping
@ -171,13 +204,14 @@ ucm_checkValidity(UCMTable *ext, UCMStates *baseStates);
*
* If the base table contains a mapping for which the input sequence is
* a prefix of the extension input, then
* - if moveToExt: move the base mapping to the extension table
* - if moveTarget!=NULL: move the base mapping to the moveTarget table
* - else: error
*
* @return FALSE in case of an irreparable error
*/
U_CAPI UBool U_EXPORT2
ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext, UBool moveToExt);
ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
UCMTable *moveTarget, UBool intersectBase);
U_CAPI void U_EXPORT2
ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode);