ICU-3346 support DBCS-only and other delta (extension-only) .cnv files

X-SVN-Rev: 13638
This commit is contained in:
Markus Scherer 2003-11-08 00:09:50 +00:00
parent 693cbae3a7
commit b72a1b75cc
9 changed files with 487 additions and 187 deletions

View file

@ -957,23 +957,23 @@ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
int outputType)
{
const uint16_t *table=sharedData->table->mbcs.fromUnicodeTable;
const uint16_t *table=sharedData->mbcs.fromUnicodeTable;
uint32_t stage2Entry;
uint32_t myValue=0;
const uint8_t *p;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if(c<0x10000 || (sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
/* get the bytes and the length for the output */
if(outputType==MBCS_OUTPUT_2){
myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(myValue<=0xff) {
*length=1;
} else {
*length=2;
}
}else if(outputType==MBCS_OUTPUT_3){
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
if(myValue<=0xff) {
*length=1;
@ -1016,13 +1016,13 @@ MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
const uint16_t *table;
int32_t value;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if(c>=0x10000 && !(sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
value= -1;
}
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
table=sharedData->table->mbcs.fromUnicodeTable;
table=sharedData->mbcs.fromUnicodeTable;
/* get the byte for the output */
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->table->mbcs.fromUnicodeBytes, c);
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
/* is this code point assigned, or do we use fallbacks? */
if(useFallback ? value>=0x800 : value>=0xc00) {
value &=0xff;

View file

@ -226,12 +226,16 @@ ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
/* copy initial values from the static structure for this type */
uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
#if 0 /* made UConverterMBCSTable part of UConverterSharedData -- markus 20031107 */
/*
* It would be much more efficient if the table were a direct member, not a pointer.
* However, that would add to the size of all UConverterSharedData objects
* even if they do not use this table (especially algorithmic ones).
* If this changes, then the static templates from converterData[type]
* need more entries.
*
* In principle, it would be cleaner if the load() function below
* allocated the table.
*/
data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable));
if(data->table == NULL) {
@ -240,7 +244,8 @@ ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
return NULL;
}
uprv_memset(data->table, 0, sizeof(UConverterTable));
#endif
data->staticData = source;
data->sharedDataCached = FALSE;
@ -285,6 +290,13 @@ static UConverterSharedData *createConverterFromFile(const char* pkg, const char
return NULL;
}
/*
* TODO Store pkg in a field in the shared data so that delta-only converters
* can load base converters from the same package.
* If the pkg name is longer than the field, then either do not load the converter
* in the first place, or just set the pkg field to "".
*/
return sharedData;
}
@ -464,6 +476,66 @@ ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)
return TRUE;
}
/**
* Load a non-algorithmic converter.
* If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
*/
UConverterSharedData *
ucnv_load(const char *pkg, const char *realName, UErrorCode *err) {
UConverterSharedData *mySharedConverterData;
if(err == NULL || U_FAILURE(*err)) {
return NULL;
}
if(pkg != NULL && *pkg != 0) {
/* application-provided converters are not currently cached */
return createConverterFromFile(pkg, realName, err);
}
mySharedConverterData = ucnv_getSharedConverterData(realName);
if (mySharedConverterData == NULL)
{
/*Not cached, we need to stream it in from file */
mySharedConverterData = createConverterFromFile(NULL, realName, err);
if (U_FAILURE (*err) || (mySharedConverterData == NULL))
{
return NULL;
}
else
{
/* share it with other library clients */
ucnv_shareConverterData(mySharedConverterData);
}
}
else
{
/* The data for this converter was already in the cache. */
/* Update the reference counter on the shared data: one more client */
mySharedConverterData->referenceCounter++;
}
return mySharedConverterData;
}
/**
* Unload a non-algorithmic converter.
* It must be sharedData->referenceCounter != ~0
* and this function must be called inside umtx_lock(&cnvCacheMutex).
*/
void
ucnv_unload(UConverterSharedData *sharedData) {
if(sharedData != NULL) {
if (sharedData->referenceCounter > 0) {
sharedData->referenceCounter--;
}
if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
ucnv_deleteSharedConverterData(sharedData);
}
}
}
void
ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
{
@ -471,15 +543,12 @@ ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
/*
Double checking doesn't work on some platforms.
Don't check referenceCounter outside of a mutex block.
TODO We should be able to check for ~0 outside of the mutex,
improving performance for opening and closing of algorithmic converters.
*/
if (sharedData->referenceCounter != ~0) {
if (sharedData->referenceCounter > 0) {
sharedData->referenceCounter--;
}
if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
ucnv_deleteSharedConverterData(sharedData);
}
ucnv_unload(sharedData);
}
umtx_unlock(&cnvCacheMutex);
}
@ -635,29 +704,12 @@ ucnv_createConverter(UConverter *myUConverter, const char *converterName, UError
/* to prevent other threads from modifying the cache during the */
/* process. */
umtx_lock(&cnvCacheMutex);
mySharedConverterData = ucnv_getSharedConverterData(realName);
if (mySharedConverterData == NULL)
{
/*Not cached, we need to stream it in from file */
mySharedConverterData = createConverterFromFile(NULL, realName, err);
if (U_FAILURE (*err) || (mySharedConverterData == NULL))
{
umtx_unlock(&cnvCacheMutex);
return NULL;
}
else
{
/* share it with other library clients */
ucnv_shareConverterData(mySharedConverterData);
}
}
else
{
/* The data for this converter was already in the cache. */
/* Update the reference counter on the shared data: one more client */
mySharedConverterData->referenceCounter++;
}
mySharedConverterData = ucnv_load(NULL, realName, err);
umtx_unlock(&cnvCacheMutex);
if (U_FAILURE (*err) || (mySharedConverterData == NULL))
{
return NULL;
}
}
myUConverter = ucnv_createConverterFromSharedData(myUConverter, mySharedConverterData, realName, locale, options, err);
@ -798,10 +850,11 @@ U_CAPI int32_t U_EXPORT2
ucnv_flushCache ()
{
UConverterSharedData *mySharedData = NULL;
int32_t pos = -1;
int32_t pos;
int32_t tableDeletedNum = 0;
const UHashElement *e;
UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;
int32_t i, remaining;
/* Close the default converter without creating a new one so that everything will be flushed. */
ucnv_close(u_getDefaultConverter(&status));
@ -824,21 +877,34 @@ ucnv_flushCache ()
* is protected by cnvCacheMutex.
*/
umtx_lock(&cnvCacheMutex);
while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
{
mySharedData = (UConverterSharedData *) e->value.pointer;
/*deletes only if reference counter == 0 */
if (mySharedData->referenceCounter == 0)
/*
* double loop: A delta/extension-only converter has a pointer to its base table's
* shared data; the first iteration of the outer loop may see the delta converter
* before the base converter, and unloading the delta converter may get the base
* converter's reference counter down to 0.
*/
i = 0;
do {
remaining = 0;
pos = -1;
while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
{
tableDeletedNum++;
mySharedData = (UConverterSharedData *) e->value.pointer;
/*deletes only if reference counter == 0 */
if (mySharedData->referenceCounter == 0)
{
tableDeletedNum++;
UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
uhash_removeElement(SHARED_DATA_HASHTABLE, e);
mySharedData->sharedDataCached = FALSE;
ucnv_deleteSharedConverterData (mySharedData);
uhash_removeElement(SHARED_DATA_HASHTABLE, e);
mySharedData->sharedDataCached = FALSE;
ucnv_deleteSharedConverterData (mySharedData);
} else {
++remaining;
}
}
}
} while(++i == 1 && remaining > 0);
umtx_unlock(&cnvCacheMutex);
ucnv_io_flushAvailableConverterCache();

View file

@ -20,6 +20,8 @@
#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_cnv.h"
#include "ucnvmbcs.h"
#include "ucnv_ext.h"
#include "udataswp.h"
@ -42,7 +44,10 @@ U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
work.
*/
union UConverterTable;
union UConverterTable {
UConverterMBCSTable mbcs;
};
typedef union UConverterTable UConverterTable;
struct UConverterImpl;
@ -86,7 +91,7 @@ struct UConverterSharedData {
uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */
const void *dataMemory; /* from udata_openChoice() - for cleanup */
UConverterTable *table; /* Pointer to conversion data */
void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */
const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
@ -97,9 +102,23 @@ struct UConverterSharedData {
/*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
};
typedef struct UConverterSharedData UConverterSharedData;
/*
* Shared data structures currently come in two flavors:
* - readonly for built-in algorithmic converters
* - allocated for MBCS, with a pointer to an allocated UConverterTable
* which always has a UConverterMBCSTable
*
* To eliminate one allocation, I am making the UConverterMBCSTable
* a member of the shared data. It is the last member so that static
* definitions of UConverterSharedData work as before.
* The table field above also remains to avoid updating all static
* definitions, but is now unused.
*
* markus 2003-nov-07
*/
UConverterMBCSTable mbcs;
};
/* Defines a UConverter, the lightweight mutable part the user sees */
@ -208,6 +227,21 @@ UConverterDataLMBCS;
#define CONVERTER_FILE_EXTENSION ".cnv"
/**
* Load a non-algorithmic converter.
* If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
*/
UConverterSharedData *
ucnv_load(const char *pkg, const char *name, UErrorCode *err);
/**
* Unload a non-algorithmic converter.
* It must be sharedData->referenceCounter != ~0
* and this function must be called inside umtx_lock(&cnvCacheMutex).
*/
void
ucnv_unload(UConverterSharedData *sharedData);
/**
* Swap ICU .cnv conversion tables. See udataswp.h.
* @internal

View file

@ -20,6 +20,7 @@
#include "unicode/ucnv.h"
#include "unicode/uset.h"
#include "ucnv_cnv.h"
#include "ucnv_bld.h"
#include "cmemory.h"
U_CFUNC void

View file

@ -21,14 +21,6 @@
#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
#include "ucnvmbcs.h"
union UConverterTable
{
UConverterMBCSTable mbcs;
};
U_CDECL_BEGIN
@ -43,7 +35,11 @@ U_CDECL_BEGIN
* U+ffff "illegal"
*/
/** Forward declaration, see ucnv_bld.h */
struct UConverterSharedData;
typedef struct UConverterSharedData UConverterSharedData;
/* function types for UConverterImpl ---------------------------------------- */
typedef void (*UConverterLoad) (UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
typedef void (*UConverterUnload) (UConverterSharedData *sharedData);

View file

@ -264,13 +264,15 @@ ucnv_extWriteToU(UConverter *cnv, const int32_t *cx,
/*
* get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
* or 1 for DBCS-only,
* or -1 if the converter is not SI/SO stateful
*
* Note: For SI/SO stateful converters getting here,
* cnv->mode==0 is equivalent to firstLength==1.
*/
#define UCNV_SISO_STATE(cnv) \
((cnv)->sharedData->table->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : -1)
((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \
(cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1)
/*
* target<targetLimit; set error code for overflow
@ -376,7 +378,7 @@ ucnv_extContinueMatchToU(UConverter *cnv,
uint32_t value;
int32_t match, length;
match=ucnv_extMatchToU(cnv->sharedData->table->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
cnv->preToU, cnv->preToULength,
pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
&value,
@ -394,7 +396,7 @@ ucnv_extContinueMatchToU(UConverter *cnv,
}
/* write result */
ucnv_extWriteToU(cnv, cnv->sharedData->table->mbcs.extIndexes,
ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes,
value,
&pArgs->target, pArgs->targetLimit,
&pArgs->offsets, srcIndex,
@ -674,7 +676,7 @@ ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
const uint8_t *result;
int32_t length, prevLength;
length=(int32_t)UCNV_EXT_FROM_U_GET_LENGTH(value);
length=UCNV_EXT_FROM_U_GET_LENGTH(value);
value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
/* output the result */
@ -756,7 +758,12 @@ ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
*src, (int32_t)(srcLimit-*src),
&value,
cnv->useFallback, flush);
if(match>=2) {
/* reject a match if the result is a single byte for DBCS-only */
if( match>=2 &&
!(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 &&
cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY)
) {
/* advance src pointer for the consumed input */
*src+=match-2; /* remove 2 for the initial code point */
@ -815,7 +822,7 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
/* write result for simple, single-character conversion */
int32_t length;
length=(int32_t)UCNV_EXT_FROM_U_GET_LENGTH(value);
length=UCNV_EXT_FROM_U_GET_LENGTH(value);
value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
@ -856,7 +863,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
uint32_t value;
int32_t match;
match=ucnv_extMatchFromU(cnv->sharedData->table->mbcs.extIndexes,
match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
cnv->preFromUFirstCP,
cnv->preFromU, cnv->preFromULength,
pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
@ -880,7 +887,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
cnv->preFromUFirstCP=U_SENTINEL;
/* write result */
ucnv_extWriteFromU(cnv, cnv->sharedData->table->mbcs.extIndexes,
ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes,
value,
&pArgs->target, pArgs->targetLimit,
&pArgs->offsets, srcIndex,
@ -939,6 +946,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
const int32_t *cx,
USet *set,
UConverterUnicodeSet which,
int32_t minLength,
UChar32 c,
UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
int32_t sectionIndex,
@ -958,7 +966,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
if( value!=0 &&
UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>0
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
if(c>=0) {
/* add the initial code point */
@ -978,13 +986,13 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
/* no mapping, do nothing */
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
ucnv_extGetUnicodeSetString(
cnv, cx, set, which,
cnv, cx, set, which, minLength,
U_SENTINEL, s, length+1,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
} else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>0
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
uset_addString(set, s, length+1);
}
@ -1001,13 +1009,13 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
const uint32_t *stage3b;
uint32_t value;
int32_t st1, stage1Length, st2, st3;
int32_t st1, stage1Length, st2, st3, minLength;
UChar s[UCNV_EXT_MAX_UCHARS];
UChar32 c;
int32_t length;
cx=cnv->sharedData->table->mbcs.extIndexes;
cx=cnv->sharedData->mbcs.extIndexes;
if(cx==NULL) {
return;
}
@ -1021,6 +1029,13 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
/* enumerate the from-Unicode trie table */
c=0; /* keep track of the current code point while enumerating */
if(cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
/* DBCS-only, ignore single-byte results */
minLength=2;
} else {
minLength=1;
}
/*
* the trie enumeration is almost the same as
* in _MBCSGetUnicodeSet() for MBCS_OUTPUT_1
@ -1048,13 +1063,13 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
length=0;
U16_APPEND_UNSAFE(s, length, c);
ucnv_extGetUnicodeSetString(
cnv, cx, set, which,
cnv, cx, set, which, minLength,
c, s, length,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
} else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>0
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
uset_add(set, c);
}

View file

@ -439,7 +439,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
#define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)
/* use after masking off the roundtrip flag */
#define UCNV_EXT_FROM_U_GET_LENGTH(value) (((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
#define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
/* get bytes or bytes index */
#define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK)

View file

@ -442,7 +442,7 @@ _MBCSGetUnicodeSet(const UConverter *cnv,
}
/* enumerate the from-Unicode trie table */
mbcsTable=&cnv->sharedData->table->mbcs;
mbcsTable=&cnv->sharedData->mbcs;
table=mbcsTable->fromUnicodeTable;
if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
maxStage1=0x440;
@ -486,6 +486,48 @@ _MBCSGetUnicodeSet(const UConverter *cnv,
c+=1024; /* empty stage 2 block */
}
}
} else if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY) {
/* ignore single-byte results */
const uint32_t *stage2;
const uint16_t *stage3, *results;
results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
for(st1=0; st1<maxStage1; ++st1) {
st2=table[st1];
if(st2>(maxStage1>>1)) {
stage2=(const uint32_t *)table+st2;
for(st2=0; st2<64; ++st2) {
if((st3=stage2[st2])!=0) {
/* read the stage 3 block */
stage3=results+16*(uint32_t)(uint16_t)st3;
/* get the roundtrip flags for the stage 3 block */
st3>>=16;
/*
* Add code points for which the roundtrip flag is set.
* Once we get a set for fallback mappings, we have to check
* non-roundtrip stage 3 results for whether they are 0.
* See _MBCSFromUnicodeWithOffsets() for details.
*
* Ignore single-byte results (<0x100).
*/
do {
if((st3&1)!=0 && *stage3>=0x100) {
uset_add(set, c);
}
st3>>=1;
++stage3;
} while((++c&0xf)!=0);
} else {
c+=16; /* empty stage 3 block */
}
}
} else {
c+=1024; /* empty stage 2 block */
}
}
} else {
const uint32_t *stage2;
@ -552,7 +594,7 @@ _extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
cnv->useSubChar1=FALSE;
if( (cx=sharedData->table->mbcs.extIndexes)!=NULL &&
if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
ucnv_extInitialMatchFromU(
cnv, cx,
cp, source, sourceLimit,
@ -617,7 +659,7 @@ _extToU(UConverter *cnv, const UConverterSharedData *sharedData,
UErrorCode *pErrorCode) {
const int32_t *cx;
if( (cx=sharedData->table->mbcs.extIndexes)!=NULL &&
if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
ucnv_extInitialMatchToU(
cnv, cx,
length, source, sourceLimit,
@ -708,7 +750,7 @@ _EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) {
uint32_t stage2Entry;
uint32_t size, sizeofFromUBytes;
mbcsTable=&sharedData->table->mbcs;
mbcsTable=&sharedData->mbcs;
table=mbcsTable->fromUnicodeTable;
bytes=mbcsTable->fromUnicodeBytes;
@ -824,7 +866,7 @@ _MBCSLoad(UConverterSharedData *sharedData,
const uint8_t *raw,
UErrorCode *pErrorCode) {
UDataInfo info;
UConverterMBCSTable *mbcsTable=&sharedData->table->mbcs;
UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
_MBCSHeader *header=(_MBCSHeader *)raw;
uint32_t offset;
@ -833,15 +875,6 @@ _MBCSLoad(UConverterSharedData *sharedData,
return;
}
mbcsTable->countStates=(uint8_t)header->countStates;
mbcsTable->countToUFallbacks=header->countToUFallbacks;
mbcsTable->stateTable=(const int32_t (*)[256])(raw+sizeof(_MBCSHeader));
mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
mbcsTable->fromUBytesLength=header->fromUBytesLength;
mbcsTable->outputType=(uint8_t)header->flags;
/* extension data, header version 4.2 and higher */
@ -850,22 +883,106 @@ _MBCSLoad(UConverterSharedData *sharedData,
mbcsTable->extIndexes=(const int32_t *)(raw+offset);
}
/* make sure that the output type is known */
switch(mbcsTable->outputType) {
case MBCS_OUTPUT_1:
case MBCS_OUTPUT_2:
case MBCS_OUTPUT_3:
case MBCS_OUTPUT_4:
case MBCS_OUTPUT_3_EUC:
case MBCS_OUTPUT_4_EUC:
case MBCS_OUTPUT_2_SISO:
/* OK */
break;
case MBCS_OUTPUT_EXT_ONLY:
/* ### TODO */
default:
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
UConverterSharedData *baseSharedData;
const int32_t *extIndexes;
const char *baseName;
/* extension-only file, load the base table and set values appropriately */
if((extIndexes=mbcsTable->extIndexes)==NULL) {
/* extension-only file without extension */
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
/* load the base table */
baseName=(const char *)(header+1);
if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
/* forbid loading this same extension-only file */
/* TODO better prevention of loading another extension table */
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
/* TODO pass package name, same as current converter (see ucnv_bld.c) and/or parse out of prefix of base name */
baseSharedData=ucnv_load(NULL, baseName, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
if( baseSharedData->staticData->conversionType!=UCNV_MBCS ||
baseSharedData->mbcs.baseSharedData!=NULL
) {
ucnv_unload(baseSharedData);
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
/* copy the base table data */
uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
/* overwrite values with relevant ones for the extension converter */
mbcsTable->baseSharedData=baseSharedData;
mbcsTable->extIndexes=extIndexes;
/*
* It would be possible to share the swapLFNL data with a base converter,
* but the generated name would have to be different, and the memory
* would have to be free'd only once.
* It is easier to just create the data for the extension converter
* separately when it is requested.
*/
mbcsTable->swapLFNLStateTable=NULL;
mbcsTable->swapLFNLFromUnicodeBytes=NULL;
mbcsTable->swapLFNLName=NULL;
/*
* Set a special, runtime-only outputType if the extension converter
* is a DBCS version of an SI/SO-stateful base converter.
*/
if( baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO &&
(sharedData->staticData->conversionType==UCNV_DBCS ||
(sharedData->staticData->conversionType==UCNV_MBCS &&
sharedData->staticData->minBytesPerChar>=2))
) {
int32_t entry;
/* get the dbcs state from the state table entry for SO=0x0e */
entry=mbcsTable->stateTable[0][0xe];
if( MBCS_ENTRY_IS_FINAL(entry) &&
MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
MBCS_ENTRY_FINAL_STATE(entry)!=0
) {
mbcsTable->dbcsOnlyState=MBCS_ENTRY_FINAL_STATE(entry);
mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
}
}
} else {
/* make sure that the output type is known */
switch(mbcsTable->outputType) {
case MBCS_OUTPUT_1:
case MBCS_OUTPUT_2:
case MBCS_OUTPUT_3:
case MBCS_OUTPUT_4:
case MBCS_OUTPUT_3_EUC:
case MBCS_OUTPUT_4_EUC:
case MBCS_OUTPUT_2_SISO:
/* OK */
break;
default:
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
mbcsTable->countStates=(uint8_t)header->countStates;
mbcsTable->countToUFallbacks=header->countToUFallbacks;
mbcsTable->stateTable=(const int32_t (*)[256])(raw+sizeof(_MBCSHeader));
mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
mbcsTable->fromUBytesLength=header->fromUBytesLength;
}
/*
@ -885,7 +1002,7 @@ _MBCSLoad(UConverterSharedData *sharedData,
static void
_MBCSUnload(UConverterSharedData *sharedData) {
UConverterMBCSTable *mbcsTable=&sharedData->table->mbcs;
UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
if(mbcsTable->swapLFNLStateTable!=NULL) {
uprv_free(mbcsTable->swapLFNLStateTable);
@ -898,22 +1015,31 @@ _MBCSOpen(UConverter *cnv,
const char *locale,
uint32_t options,
UErrorCode *pErrorCode) {
UConverterMBCSTable *mbcsTable;
const int32_t *extIndexes;
uint8_t outputType;
int8_t maxBytesPerUChar;
mbcsTable=&cnv->sharedData->mbcs;
outputType=mbcsTable->outputType;
if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
/* the swaplfnl option does not apply, remove it */
cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
}
if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
/* do this because double-checked locking is broken */
UBool isCached;
umtx_lock(NULL);
isCached=cnv->sharedData->table->mbcs.swapLFNLStateTable!=NULL;
isCached=mbcsTable->swapLFNLStateTable!=NULL;
umtx_unlock(NULL);
if(!isCached) {
if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
/* the option does not apply, remove it */
cnv->options&=~UCNV_OPTION_SWAP_LFNL;
cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
}
}
}
@ -926,12 +1052,11 @@ _MBCSOpen(UConverter *cnv,
}
/* fix maxBytesPerUChar depending on outputType and options etc. */
outputType=cnv->sharedData->table->mbcs.outputType;
if(outputType==MBCS_OUTPUT_2_SISO) {
cnv->maxBytesPerUChar=3; /* SO+DBCS */
}
extIndexes=cnv->sharedData->table->mbcs.extIndexes;
extIndexes=mbcsTable->extIndexes;
if(extIndexes!=NULL) {
maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
if(outputType==MBCS_OUTPUT_2_SISO) {
@ -962,8 +1087,8 @@ _MBCSOpen(UConverter *cnv,
static const char *
_MBCSGetName(const UConverter *cnv) {
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->table->mbcs.swapLFNLName!=NULL) {
return cnv->sharedData->table->mbcs.swapLFNLName;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
return cnv->sharedData->mbcs.swapLFNLName;
} else {
return cnv->sharedData->staticData->name;
}
@ -1026,9 +1151,9 @@ _MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
offsets=pArgs->offsets;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
} else {
stateTable=cnv->sharedData->table->mbcs.stateTable;
stateTable=cnv->sharedData->mbcs.stateTable;
}
/* sourceIndex=-1 if the current character began in the previous buffer */
@ -1177,9 +1302,9 @@ _MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
offsets=pArgs->offsets;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
} else {
stateTable=cnv->sharedData->table->mbcs.stateTable;
stateTable=cnv->sharedData->mbcs.stateTable;
}
/* sourceIndex=-1 if the current character began in the previous buffer */
@ -1411,8 +1536,8 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
}
}
if(cnv->sharedData->table->mbcs.countStates==1) {
if(!(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
if(cnv->sharedData->mbcs.countStates==1) {
if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
} else {
_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
@ -1428,18 +1553,26 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
offsets=pArgs->offsets;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
} else {
stateTable=cnv->sharedData->table->mbcs.stateTable;
stateTable=cnv->sharedData->mbcs.stateTable;
}
unicodeCodeUnits=cnv->sharedData->table->mbcs.unicodeCodeUnits;
unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
/* get the converter state from UConverter */
offset=cnv->toUnicodeStatus;
state=(uint8_t)(cnv->mode);
byteIndex=cnv->toULength;
bytes=cnv->toUBytes;
/*
* if we are in the SBCS state for a DBCS-only converter,
* then load the DBCS state from the MBCS data
* (dbcsOnlyState==0 if it is not a DBCS-only converter)
*/
if((state=(uint8_t)(cnv->mode))==0) {
state=cnv->sharedData->mbcs.dbcsOnlyState;
}
/* sourceIndex=-1 if the current character began in the previous buffer */
sourceIndex=byteIndex==0 ? 0 : -1;
nextSourceIndex=0;
@ -1569,6 +1702,9 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
continue;
}
/* save the previous state for proper extension mapping with SI/SO-stateful converters */
cnv->mode=state;
/* set the next state early so that we can reuse the entry variable */
state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
@ -1588,7 +1724,7 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
}
byteIndex=0;
} else if(c==0xfffe) {
if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)_MBCSGetFallback(&cnv->sharedData->table->mbcs, offset))!=0xfffe) {
if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
/* output fallback BMP code point */
*target++=(UChar)entry;
if(offsets!=NULL) {
@ -1682,7 +1818,15 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
* The 21 unused bits may later be used for more sophisticated
* state transitions.
*/
byteIndex=0;
if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
byteIndex=0;
} else {
/* SI/SO are illegal for DBCS-only conversion */
state=(uint8_t)(cnv->mode); /* restore the previous state */
/* callback(illegal) */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
}
} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
if(UCNV_TO_U_USE_FALLBACK(cnv)) {
/* output BMP code point */
@ -1713,10 +1857,6 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
} else /* unassigned sequences indicated with byteIndex>0 */ {
/* try an extension mapping */
pArgs->source=(const char *)source;
/* save the state for proper extension mapping with SI/SO-stateful converters */
cnv->mode=state;
byteIndex=_extToU(cnv, cnv->sharedData,
byteIndex, (const char **)&source, (const char *)sourceLimit,
&target, targetLimit,
@ -1762,9 +1902,9 @@ _MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
source=(const uint8_t *)pArgs->source;
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
} else {
stateTable=cnv->sharedData->table->mbcs.stateTable;
stateTable=cnv->sharedData->mbcs.stateTable;
}
/* conversion loop */
@ -1856,14 +1996,14 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
}
if(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
/*
* Using the generic ucnv_getNextUChar() code lets us deal correctly
* with the rare case of a codepage that maps single surrogates
* without adding the complexity to this already complicated function here.
*/
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
} else if(cnv->sharedData->table->mbcs.countStates==1) {
} else if(cnv->sharedData->mbcs.countStates==1) {
return _MBCSSingleGetNextUChar(pArgs, pErrorCode);
}
@ -1872,15 +2012,23 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
} else {
stateTable=cnv->sharedData->table->mbcs.stateTable;
stateTable=cnv->sharedData->mbcs.stateTable;
}
unicodeCodeUnits=cnv->sharedData->table->mbcs.unicodeCodeUnits;
unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
/* get the converter state from UConverter */
offset=cnv->toUnicodeStatus;
state=(uint8_t)(cnv->mode);
/*
* if we are in the SBCS state for a DBCS-only converter,
* then load the DBCS state from the MBCS data
* (dbcsOnlyState==0 if it is not a DBCS-only converter)
*/
if((state=(uint8_t)(cnv->mode))==0) {
state=cnv->sharedData->mbcs.dbcsOnlyState;
}
/* conversion loop */
c=U_SENTINEL;
@ -1902,6 +2050,9 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
break;
}
} else {
/* save the previous state for proper extension mapping with SI/SO-stateful converters */
cnv->mode=state;
/* set the next state early so that we can reuse the entry variable */
state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
@ -1921,7 +2072,7 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
/* output BMP code point */
break;
} else if(c==0xfffe) {
if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=_MBCSGetFallback(&cnv->sharedData->table->mbcs, offset))!=0xfffe) {
if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
break;
}
} else {
@ -1960,6 +2111,13 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
* The 21 unused bits may later be used for more sophisticated
* state transitions.
*/
if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
/* SI/SO are illegal for DBCS-only conversion */
state=(uint8_t)(cnv->mode); /* restore the previous state */
/* callback(illegal) */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
}
} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
if(UCNV_TO_U_USE_FALLBACK(cnv)) {
/* output BMP code point */
@ -2037,7 +2195,7 @@ _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
int32_t entry;
uint8_t action;
entry=sharedData->table->mbcs.stateTable[0][b];
entry=sharedData->mbcs.stateTable[0][b];
/* MBCS_ENTRY_IS_FINAL(entry) */
if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
@ -2115,21 +2273,21 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
/*
* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
* TODO In future releases, verify that this function is never called for SBCS
* conversions, i.e., that sharedData->table->mbcs.countStates==1 is still true.
* conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
* Removal improves code coverage.
*/
/* use optimized function if possible */
if(sharedData->table->mbcs.countStates==1) {
if(sharedData->mbcs.countStates==1) {
return _MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)(*(*pSource)++), useFallback);
}
#endif
stateTable=sharedData->table->mbcs.stateTable;
unicodeCodeUnits=sharedData->table->mbcs.unicodeCodeUnits;
stateTable=sharedData->mbcs.stateTable;
unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
/* converter state */
offset=0;
state=0;
state=sharedData->mbcs.dbcsOnlyState;
/* conversion loop */
do {
@ -2151,7 +2309,7 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
if(entry!=0xfffe) {
return (UChar32)entry;
} else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
return _MBCSGetFallback(&sharedData->table->mbcs, offset);
return _MBCSGetFallback(&sharedData->mbcs, offset);
} else {
return 0xfffe;
}
@ -2198,6 +2356,10 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
* The 21 unused bits may later be used for more sophisticated
* state transitions.
*/
if(sharedData->mbcs.dbcsOnlyState!=0) {
/* SI/SO are illegal for DBCS-only conversion */
return 0xffff;
}
if(source==(const uint8_t *)sourceLimit) {
/* if there are only state changes, then return "unassigned" */
return 0xfffe;
@ -2246,7 +2408,7 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
/* use optimized function if possible */
cnv=pArgs->converter;
unicodeMask=cnv->sharedData->table->mbcs.unicodeMask;
unicodeMask=cnv->sharedData->mbcs.unicodeMask;
/* set up the local pointers */
source=pArgs->source;
@ -2255,11 +2417,11 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
table=cnv->sharedData->mbcs.fromUnicodeTable;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
bytes=cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
} else {
bytes=cnv->sharedData->table->mbcs.fromUnicodeBytes;
bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
}
/* get the converter state from UConverter */
@ -2461,11 +2623,11 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
table=cnv->sharedData->mbcs.fromUnicodeTable;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
results=(uint16_t *)cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
} else {
results=(uint16_t *)cnv->sharedData->table->mbcs.fromUnicodeBytes;
results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
}
if(cnv->useFallback) {
@ -2475,7 +2637,7 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
/* use only roundtrips and fallbacks from private-use characters */
minValue=0xc00;
}
hasSupplementary=(UBool)(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
/* get the converter state from UConverter */
c=cnv->fromUChar32;
@ -2631,11 +2793,11 @@ _MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
table=cnv->sharedData->mbcs.fromUnicodeTable;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
results=(uint16_t *)cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
} else {
results=(uint16_t *)cnv->sharedData->table->mbcs.fromUnicodeBytes;
results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
}
if(cnv->useFallback) {
@ -2881,8 +3043,8 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
}
/* use optimized function if possible */
outputType=cnv->sharedData->table->mbcs.outputType;
unicodeMask=cnv->sharedData->table->mbcs.unicodeMask;
outputType=cnv->sharedData->mbcs.outputType;
unicodeMask=cnv->sharedData->mbcs.unicodeMask;
if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
@ -2902,12 +3064,12 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
table=cnv->sharedData->mbcs.fromUnicodeTable;
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
bytes=cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
} else {
bytes=cnv->sharedData->table->mbcs.fromUnicodeBytes;
bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
}
/* get the converter state from UConverter */
@ -3085,6 +3247,17 @@ getTrail:
}
}
break;
case MBCS_OUTPUT_DBCS_ONLY:
/* 1/2-byte stateful table but only DBCS mappings used */
value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
if(value<=0xff) {
/* no mapping or SBCS result, not taken for DBCS-only */
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
length=0;
} else {
length=2;
}
break;
case MBCS_OUTPUT_3:
p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
@ -3150,7 +3323,7 @@ getTrail:
* Not having a default branch also causes warnings with
* some compilers.
*/
value=0;
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
length=0;
break;
}
@ -3367,7 +3540,7 @@ U_CFUNC int32_t
_MBCSFromUChar32(UConverterSharedData *sharedData,
UChar32 c, uint32_t *pValue,
UBool useFallback) {
const uint16_t *table=sharedData->table->mbcs.fromUnicodeTable;
const uint16_t *table=sharedData->mbcs.fromUnicodeTable;
const uint8_t *p;
uint32_t stage2Entry;
uint32_t value;
@ -3376,13 +3549,13 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
/* ### TODO extension mapping */
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if(c>=0x10000 && !(sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
return 0;
}
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
if(sharedData->table->mbcs.outputType==MBCS_OUTPUT_1) {
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->table->mbcs.fromUnicodeBytes, c);
if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
/* is this code point assigned, or do we use fallbacks? */
if(useFallback ? value>=0x800 : value>=0xc00) {
*pValue=value&0xff;
@ -3395,17 +3568,28 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
/* get the bytes and the length for the output */
switch(sharedData->table->mbcs.outputType) {
switch(sharedData->mbcs.outputType) {
case MBCS_OUTPUT_2:
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
length=1;
} else {
length=2;
}
break;
case MBCS_OUTPUT_DBCS_ONLY:
/* 1/2-byte stateful table but only DBCS mappings used */
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
/* no mapping or SBCS result, not taken for DBCS-only */
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
length=0;
} else {
length=2;
}
break;
case MBCS_OUTPUT_3:
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
if(value<=0xff) {
length=1;
@ -3416,7 +3600,7 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
}
break;
case MBCS_OUTPUT_4:
value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
length=1;
} else if(value<=0xffff) {
@ -3428,7 +3612,7 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
}
break;
case MBCS_OUTPUT_3_EUC:
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
/* EUC 16-bit fixed-length representation */
if(value<=0xff) {
length=1;
@ -3443,7 +3627,7 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
}
break;
case MBCS_OUTPUT_4_EUC:
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
/* EUC 16-bit fixed-length representation applied to the first two bytes */
if(value<=0xff) {
@ -3505,15 +3689,15 @@ _MBCSSingleFromUChar32(UConverterSharedData *sharedData,
int32_t value;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if(c>=0x10000 && !(sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
return -1;
}
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
table=sharedData->table->mbcs.fromUnicodeTable;
table=sharedData->mbcs.fromUnicodeTable;
/* get the byte for the output */
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->table->mbcs.fromUnicodeBytes, c);
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
/* is this code point assigned, or do we use fallbacks? */
if(useFallback ? value>=0x800 : value>=0xc00) {
return value&0xff;
@ -3529,9 +3713,10 @@ static void
_MBCSGetStarters(const UConverter* cnv,
UBool starters[256],
UErrorCode *pErrorCode) {
const int32_t *state0=cnv->sharedData->table->mbcs.stateTable[0];
const int32_t *state0;
int i;
state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
for(i=0; i<256; ++i) {
/* all bytes that cause a state transition from state 0 are lead bytes */
starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
@ -3544,7 +3729,7 @@ _MBCSGetStarters(const UConverter* cnv,
*/
U_CFUNC UBool
_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->table->mbcs.stateTable[0][(uint8_t)byte]);
return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
}
static void
@ -3558,7 +3743,7 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
/* first, select between subChar and subChar1 */
if( cnv->subChar1!=0 &&
(cnv->sharedData->table->mbcs.extIndexes!=NULL ?
(cnv->sharedData->mbcs.extIndexes!=NULL ?
cnv->useSubChar1 :
(cnv->invalidUCharBuffer[0]<=0xff))
) {
@ -3574,7 +3759,7 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
/* reset the selector for the next code point */
cnv->useSubChar1=FALSE;
switch(cnv->sharedData->table->mbcs.outputType) {
switch(cnv->sharedData->mbcs.outputType) {
case MBCS_OUTPUT_2_SISO:
p=buffer;
@ -3616,9 +3801,9 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
U_CFUNC UConverterType
_MBCSGetType(const UConverter* converter) {
/* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
if(converter->sharedData->table->mbcs.countStates==1) {
if(converter->sharedData->mbcs.countStates==1) {
return (UConverterType)UCNV_SBCS;
} else if((converter->sharedData->table->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
} else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
return (UConverterType)UCNV_EBCDIC_STATEFUL;
} else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
return (UConverterType)UCNV_DBCS;

View file

@ -19,7 +19,7 @@
#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
/**
* ICU conversion (.cnv) data file structure, following the usual UDataInfo
@ -201,7 +201,9 @@ enum {
MBCS_OUTPUT_EXT_ONLY, /* e */
MBCS_OUTPUT_COUNT
MBCS_OUTPUT_COUNT,
MBCS_OUTPUT_DBCS_ONLY=0xdb /* runtime-only type for DBCS-only handling of SISO tables */
};
/**
@ -219,7 +221,7 @@ typedef struct {
*/
typedef struct UConverterMBCSTable {
/* toUnicode */
uint8_t countStates;
uint8_t countStates, dbcsOnlyState;
uint32_t countToUFallbacks;
const int32_t (*stateTable)/*[countStates]*/[256];
@ -238,6 +240,7 @@ typedef struct UConverterMBCSTable {
char *swapLFNLName;
/* extension data */
struct UConverterSharedData *baseSharedData;
const int32_t *extIndexes;
} UConverterMBCSTable;
@ -288,7 +291,7 @@ _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
* returns fallback values.
*/
#define _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, b) \
(UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->table->mbcs.stateTable[0][(uint8_t)(b)])
(UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->mbcs.stateTable[0][(uint8_t)(b)])
/**
* This is an internal function that allows other converter implementations
@ -299,7 +302,7 @@ _MBCSIsLeadByte(UConverterSharedData *sharedData, char byte);
/** This is a macro version of _MBCSIsLeadByte(). */
#define _MBCS_IS_LEAD_BYTE(sharedData, byte) \
(UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->table->mbcs.stateTable[0][(uint8_t)(byte)])
(UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)])
/**
* This is another simple conversion function for internal use by other