mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-3346 support DBCS-only and other delta (extension-only) .cnv files
X-SVN-Rev: 13638
This commit is contained in:
parent
693cbae3a7
commit
b72a1b75cc
9 changed files with 487 additions and 187 deletions
|
@ -957,23 +957,23 @@ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
|
|||
int outputType)
|
||||
{
|
||||
|
||||
const uint16_t *table=sharedData->table->mbcs.fromUnicodeTable;
|
||||
const uint16_t *table=sharedData->mbcs.fromUnicodeTable;
|
||||
uint32_t stage2Entry;
|
||||
uint32_t myValue=0;
|
||||
const uint8_t *p;
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
if(c<0x10000 || (sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
|
||||
/* get the bytes and the length for the output */
|
||||
if(outputType==MBCS_OUTPUT_2){
|
||||
myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
if(myValue<=0xff) {
|
||||
*length=1;
|
||||
} else {
|
||||
*length=2;
|
||||
}
|
||||
}else if(outputType==MBCS_OUTPUT_3){
|
||||
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
|
||||
if(myValue<=0xff) {
|
||||
*length=1;
|
||||
|
@ -1016,13 +1016,13 @@ MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
|
|||
const uint16_t *table;
|
||||
int32_t value;
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
if(c>=0x10000 && !(sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
value= -1;
|
||||
}
|
||||
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
|
||||
table=sharedData->table->mbcs.fromUnicodeTable;
|
||||
table=sharedData->mbcs.fromUnicodeTable;
|
||||
/* get the byte for the output */
|
||||
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->table->mbcs.fromUnicodeBytes, c);
|
||||
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
|
||||
/* is this code point assigned, or do we use fallbacks? */
|
||||
if(useFallback ? value>=0x800 : value>=0xc00) {
|
||||
value &=0xff;
|
||||
|
|
|
@ -226,12 +226,16 @@ ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
|
|||
/* copy initial values from the static structure for this type */
|
||||
uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
|
||||
|
||||
#if 0 /* made UConverterMBCSTable part of UConverterSharedData -- markus 20031107 */
|
||||
/*
|
||||
* It would be much more efficient if the table were a direct member, not a pointer.
|
||||
* However, that would add to the size of all UConverterSharedData objects
|
||||
* even if they do not use this table (especially algorithmic ones).
|
||||
* If this changes, then the static templates from converterData[type]
|
||||
* need more entries.
|
||||
*
|
||||
* In principle, it would be cleaner if the load() function below
|
||||
* allocated the table.
|
||||
*/
|
||||
data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable));
|
||||
if(data->table == NULL) {
|
||||
|
@ -240,7 +244,8 @@ ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
|
|||
return NULL;
|
||||
}
|
||||
uprv_memset(data->table, 0, sizeof(UConverterTable));
|
||||
|
||||
#endif
|
||||
|
||||
data->staticData = source;
|
||||
|
||||
data->sharedDataCached = FALSE;
|
||||
|
@ -285,6 +290,13 @@ static UConverterSharedData *createConverterFromFile(const char* pkg, const char
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO Store pkg in a field in the shared data so that delta-only converters
|
||||
* can load base converters from the same package.
|
||||
* If the pkg name is longer than the field, then either do not load the converter
|
||||
* in the first place, or just set the pkg field to "".
|
||||
*/
|
||||
|
||||
return sharedData;
|
||||
}
|
||||
|
||||
|
@ -464,6 +476,66 @@ ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a non-algorithmic converter.
|
||||
* If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
|
||||
*/
|
||||
UConverterSharedData *
|
||||
ucnv_load(const char *pkg, const char *realName, UErrorCode *err) {
|
||||
UConverterSharedData *mySharedConverterData;
|
||||
|
||||
if(err == NULL || U_FAILURE(*err)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(pkg != NULL && *pkg != 0) {
|
||||
/* application-provided converters are not currently cached */
|
||||
return createConverterFromFile(pkg, realName, err);
|
||||
}
|
||||
|
||||
mySharedConverterData = ucnv_getSharedConverterData(realName);
|
||||
if (mySharedConverterData == NULL)
|
||||
{
|
||||
/*Not cached, we need to stream it in from file */
|
||||
mySharedConverterData = createConverterFromFile(NULL, realName, err);
|
||||
if (U_FAILURE (*err) || (mySharedConverterData == NULL))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* share it with other library clients */
|
||||
ucnv_shareConverterData(mySharedConverterData);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The data for this converter was already in the cache. */
|
||||
/* Update the reference counter on the shared data: one more client */
|
||||
mySharedConverterData->referenceCounter++;
|
||||
}
|
||||
|
||||
return mySharedConverterData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unload a non-algorithmic converter.
|
||||
* It must be sharedData->referenceCounter != ~0
|
||||
* and this function must be called inside umtx_lock(&cnvCacheMutex).
|
||||
*/
|
||||
void
|
||||
ucnv_unload(UConverterSharedData *sharedData) {
|
||||
if(sharedData != NULL) {
|
||||
if (sharedData->referenceCounter > 0) {
|
||||
sharedData->referenceCounter--;
|
||||
}
|
||||
|
||||
if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
|
||||
ucnv_deleteSharedConverterData(sharedData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
|
||||
{
|
||||
|
@ -471,15 +543,12 @@ ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
|
|||
/*
|
||||
Double checking doesn't work on some platforms.
|
||||
Don't check referenceCounter outside of a mutex block.
|
||||
|
||||
TODO We should be able to check for ~0 outside of the mutex,
|
||||
improving performance for opening and closing of algorithmic converters.
|
||||
*/
|
||||
if (sharedData->referenceCounter != ~0) {
|
||||
if (sharedData->referenceCounter > 0) {
|
||||
sharedData->referenceCounter--;
|
||||
}
|
||||
|
||||
if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
|
||||
ucnv_deleteSharedConverterData(sharedData);
|
||||
}
|
||||
ucnv_unload(sharedData);
|
||||
}
|
||||
umtx_unlock(&cnvCacheMutex);
|
||||
}
|
||||
|
@ -635,29 +704,12 @@ ucnv_createConverter(UConverter *myUConverter, const char *converterName, UError
|
|||
/* to prevent other threads from modifying the cache during the */
|
||||
/* process. */
|
||||
umtx_lock(&cnvCacheMutex);
|
||||
mySharedConverterData = ucnv_getSharedConverterData(realName);
|
||||
if (mySharedConverterData == NULL)
|
||||
{
|
||||
/*Not cached, we need to stream it in from file */
|
||||
mySharedConverterData = createConverterFromFile(NULL, realName, err);
|
||||
if (U_FAILURE (*err) || (mySharedConverterData == NULL))
|
||||
{
|
||||
umtx_unlock(&cnvCacheMutex);
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* share it with other library clients */
|
||||
ucnv_shareConverterData(mySharedConverterData);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The data for this converter was already in the cache. */
|
||||
/* Update the reference counter on the shared data: one more client */
|
||||
mySharedConverterData->referenceCounter++;
|
||||
}
|
||||
mySharedConverterData = ucnv_load(NULL, realName, err);
|
||||
umtx_unlock(&cnvCacheMutex);
|
||||
if (U_FAILURE (*err) || (mySharedConverterData == NULL))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
myUConverter = ucnv_createConverterFromSharedData(myUConverter, mySharedConverterData, realName, locale, options, err);
|
||||
|
@ -798,10 +850,11 @@ U_CAPI int32_t U_EXPORT2
|
|||
ucnv_flushCache ()
|
||||
{
|
||||
UConverterSharedData *mySharedData = NULL;
|
||||
int32_t pos = -1;
|
||||
int32_t pos;
|
||||
int32_t tableDeletedNum = 0;
|
||||
const UHashElement *e;
|
||||
UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
int32_t i, remaining;
|
||||
|
||||
/* Close the default converter without creating a new one so that everything will be flushed. */
|
||||
ucnv_close(u_getDefaultConverter(&status));
|
||||
|
@ -824,21 +877,34 @@ ucnv_flushCache ()
|
|||
* is protected by cnvCacheMutex.
|
||||
*/
|
||||
umtx_lock(&cnvCacheMutex);
|
||||
while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
|
||||
{
|
||||
mySharedData = (UConverterSharedData *) e->value.pointer;
|
||||
/*deletes only if reference counter == 0 */
|
||||
if (mySharedData->referenceCounter == 0)
|
||||
/*
|
||||
* double loop: A delta/extension-only converter has a pointer to its base table's
|
||||
* shared data; the first iteration of the outer loop may see the delta converter
|
||||
* before the base converter, and unloading the delta converter may get the base
|
||||
* converter's reference counter down to 0.
|
||||
*/
|
||||
i = 0;
|
||||
do {
|
||||
remaining = 0;
|
||||
pos = -1;
|
||||
while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
|
||||
{
|
||||
tableDeletedNum++;
|
||||
mySharedData = (UConverterSharedData *) e->value.pointer;
|
||||
/*deletes only if reference counter == 0 */
|
||||
if (mySharedData->referenceCounter == 0)
|
||||
{
|
||||
tableDeletedNum++;
|
||||
|
||||
UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
|
||||
UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
|
||||
|
||||
uhash_removeElement(SHARED_DATA_HASHTABLE, e);
|
||||
mySharedData->sharedDataCached = FALSE;
|
||||
ucnv_deleteSharedConverterData (mySharedData);
|
||||
uhash_removeElement(SHARED_DATA_HASHTABLE, e);
|
||||
mySharedData->sharedDataCached = FALSE;
|
||||
ucnv_deleteSharedConverterData (mySharedData);
|
||||
} else {
|
||||
++remaining;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while(++i == 1 && remaining > 0);
|
||||
umtx_unlock(&cnvCacheMutex);
|
||||
|
||||
ucnv_io_flushAvailableConverterCache();
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "ucnvmbcs.h"
|
||||
#include "ucnv_ext.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
|
@ -42,7 +44,10 @@ U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
|
|||
work.
|
||||
*/
|
||||
|
||||
union UConverterTable;
|
||||
union UConverterTable {
|
||||
UConverterMBCSTable mbcs;
|
||||
};
|
||||
|
||||
typedef union UConverterTable UConverterTable;
|
||||
|
||||
struct UConverterImpl;
|
||||
|
@ -86,7 +91,7 @@ struct UConverterSharedData {
|
|||
uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */
|
||||
|
||||
const void *dataMemory; /* from udata_openChoice() - for cleanup */
|
||||
UConverterTable *table; /* Pointer to conversion data */
|
||||
void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */
|
||||
|
||||
const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
|
||||
|
||||
|
@ -97,9 +102,23 @@ struct UConverterSharedData {
|
|||
|
||||
/*initial values of some members of the mutable part of object */
|
||||
uint32_t toUnicodeStatus;
|
||||
};
|
||||
|
||||
typedef struct UConverterSharedData UConverterSharedData;
|
||||
/*
|
||||
* Shared data structures currently come in two flavors:
|
||||
* - readonly for built-in algorithmic converters
|
||||
* - allocated for MBCS, with a pointer to an allocated UConverterTable
|
||||
* which always has a UConverterMBCSTable
|
||||
*
|
||||
* To eliminate one allocation, I am making the UConverterMBCSTable
|
||||
* a member of the shared data. It is the last member so that static
|
||||
* definitions of UConverterSharedData work as before.
|
||||
* The table field above also remains to avoid updating all static
|
||||
* definitions, but is now unused.
|
||||
*
|
||||
* markus 2003-nov-07
|
||||
*/
|
||||
UConverterMBCSTable mbcs;
|
||||
};
|
||||
|
||||
/* Defines a UConverter, the lightweight mutable part the user sees */
|
||||
|
||||
|
@ -208,6 +227,21 @@ UConverterDataLMBCS;
|
|||
|
||||
#define CONVERTER_FILE_EXTENSION ".cnv"
|
||||
|
||||
/**
|
||||
* Load a non-algorithmic converter.
|
||||
* If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
|
||||
*/
|
||||
UConverterSharedData *
|
||||
ucnv_load(const char *pkg, const char *name, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Unload a non-algorithmic converter.
|
||||
* It must be sharedData->referenceCounter != ~0
|
||||
* and this function must be called inside umtx_lock(&cnvCacheMutex).
|
||||
*/
|
||||
void
|
||||
ucnv_unload(UConverterSharedData *sharedData);
|
||||
|
||||
/**
|
||||
* Swap ICU .cnv conversion tables. See udataswp.h.
|
||||
* @internal
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_CFUNC void
|
||||
|
|
|
@ -21,14 +21,6 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnvmbcs.h"
|
||||
|
||||
union UConverterTable
|
||||
{
|
||||
UConverterMBCSTable mbcs;
|
||||
};
|
||||
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
|
@ -43,7 +35,11 @@ U_CDECL_BEGIN
|
|||
* U+ffff "illegal"
|
||||
*/
|
||||
|
||||
/** Forward declaration, see ucnv_bld.h */
|
||||
struct UConverterSharedData;
|
||||
typedef struct UConverterSharedData UConverterSharedData;
|
||||
|
||||
/* function types for UConverterImpl ---------------------------------------- */
|
||||
|
||||
typedef void (*UConverterLoad) (UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
|
||||
typedef void (*UConverterUnload) (UConverterSharedData *sharedData);
|
||||
|
|
|
@ -264,13 +264,15 @@ ucnv_extWriteToU(UConverter *cnv, const int32_t *cx,
|
|||
|
||||
/*
|
||||
* get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
|
||||
* or 1 for DBCS-only,
|
||||
* or -1 if the converter is not SI/SO stateful
|
||||
*
|
||||
* Note: For SI/SO stateful converters getting here,
|
||||
* cnv->mode==0 is equivalent to firstLength==1.
|
||||
*/
|
||||
#define UCNV_SISO_STATE(cnv) \
|
||||
((cnv)->sharedData->table->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : -1)
|
||||
((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \
|
||||
(cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1)
|
||||
|
||||
/*
|
||||
* target<targetLimit; set error code for overflow
|
||||
|
@ -376,7 +378,7 @@ ucnv_extContinueMatchToU(UConverter *cnv,
|
|||
uint32_t value;
|
||||
int32_t match, length;
|
||||
|
||||
match=ucnv_extMatchToU(cnv->sharedData->table->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
|
||||
match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
|
||||
cnv->preToU, cnv->preToULength,
|
||||
pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
|
||||
&value,
|
||||
|
@ -394,7 +396,7 @@ ucnv_extContinueMatchToU(UConverter *cnv,
|
|||
}
|
||||
|
||||
/* write result */
|
||||
ucnv_extWriteToU(cnv, cnv->sharedData->table->mbcs.extIndexes,
|
||||
ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes,
|
||||
value,
|
||||
&pArgs->target, pArgs->targetLimit,
|
||||
&pArgs->offsets, srcIndex,
|
||||
|
@ -674,7 +676,7 @@ ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
|
|||
const uint8_t *result;
|
||||
int32_t length, prevLength;
|
||||
|
||||
length=(int32_t)UCNV_EXT_FROM_U_GET_LENGTH(value);
|
||||
length=UCNV_EXT_FROM_U_GET_LENGTH(value);
|
||||
value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
|
||||
|
||||
/* output the result */
|
||||
|
@ -756,7 +758,12 @@ ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
|
|||
*src, (int32_t)(srcLimit-*src),
|
||||
&value,
|
||||
cnv->useFallback, flush);
|
||||
if(match>=2) {
|
||||
|
||||
/* reject a match if the result is a single byte for DBCS-only */
|
||||
if( match>=2 &&
|
||||
!(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 &&
|
||||
cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY)
|
||||
) {
|
||||
/* advance src pointer for the consumed input */
|
||||
*src+=match-2; /* remove 2 for the initial code point */
|
||||
|
||||
|
@ -815,7 +822,7 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
|
|||
/* write result for simple, single-character conversion */
|
||||
int32_t length;
|
||||
|
||||
length=(int32_t)UCNV_EXT_FROM_U_GET_LENGTH(value);
|
||||
length=UCNV_EXT_FROM_U_GET_LENGTH(value);
|
||||
value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
|
||||
|
||||
if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
|
||||
|
@ -856,7 +863,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
|
|||
uint32_t value;
|
||||
int32_t match;
|
||||
|
||||
match=ucnv_extMatchFromU(cnv->sharedData->table->mbcs.extIndexes,
|
||||
match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
|
||||
cnv->preFromUFirstCP,
|
||||
cnv->preFromU, cnv->preFromULength,
|
||||
pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
|
||||
|
@ -880,7 +887,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
|
|||
cnv->preFromUFirstCP=U_SENTINEL;
|
||||
|
||||
/* write result */
|
||||
ucnv_extWriteFromU(cnv, cnv->sharedData->table->mbcs.extIndexes,
|
||||
ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes,
|
||||
value,
|
||||
&pArgs->target, pArgs->targetLimit,
|
||||
&pArgs->offsets, srcIndex,
|
||||
|
@ -939,6 +946,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
|
|||
const int32_t *cx,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
int32_t minLength,
|
||||
UChar32 c,
|
||||
UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
|
||||
int32_t sectionIndex,
|
||||
|
@ -958,7 +966,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
|
|||
|
||||
if( value!=0 &&
|
||||
UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) &&
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>0
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
|
||||
) {
|
||||
if(c>=0) {
|
||||
/* add the initial code point */
|
||||
|
@ -978,13 +986,13 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
|
|||
/* no mapping, do nothing */
|
||||
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
|
||||
ucnv_extGetUnicodeSetString(
|
||||
cnv, cx, set, which,
|
||||
cnv, cx, set, which, minLength,
|
||||
U_SENTINEL, s, length+1,
|
||||
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
|
||||
pErrorCode);
|
||||
} else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
|
||||
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>0
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
|
||||
) {
|
||||
uset_addString(set, s, length+1);
|
||||
}
|
||||
|
@ -1001,13 +1009,13 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
|
|||
const uint32_t *stage3b;
|
||||
|
||||
uint32_t value;
|
||||
int32_t st1, stage1Length, st2, st3;
|
||||
int32_t st1, stage1Length, st2, st3, minLength;
|
||||
|
||||
UChar s[UCNV_EXT_MAX_UCHARS];
|
||||
UChar32 c;
|
||||
int32_t length;
|
||||
|
||||
cx=cnv->sharedData->table->mbcs.extIndexes;
|
||||
cx=cnv->sharedData->mbcs.extIndexes;
|
||||
if(cx==NULL) {
|
||||
return;
|
||||
}
|
||||
|
@ -1021,6 +1029,13 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
|
|||
/* enumerate the from-Unicode trie table */
|
||||
c=0; /* keep track of the current code point while enumerating */
|
||||
|
||||
if(cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
|
||||
/* DBCS-only, ignore single-byte results */
|
||||
minLength=2;
|
||||
} else {
|
||||
minLength=1;
|
||||
}
|
||||
|
||||
/*
|
||||
* the trie enumeration is almost the same as
|
||||
* in _MBCSGetUnicodeSet() for MBCS_OUTPUT_1
|
||||
|
@ -1048,13 +1063,13 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
|
|||
length=0;
|
||||
U16_APPEND_UNSAFE(s, length, c);
|
||||
ucnv_extGetUnicodeSetString(
|
||||
cnv, cx, set, which,
|
||||
cnv, cx, set, which, minLength,
|
||||
c, s, length,
|
||||
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
|
||||
pErrorCode);
|
||||
} else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
|
||||
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>0
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
|
||||
) {
|
||||
uset_add(set, c);
|
||||
}
|
||||
|
|
|
@ -439,7 +439,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
|
|||
#define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)
|
||||
|
||||
/* use after masking off the roundtrip flag */
|
||||
#define UCNV_EXT_FROM_U_GET_LENGTH(value) (((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
|
||||
#define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
|
||||
|
||||
/* get bytes or bytes index */
|
||||
#define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK)
|
||||
|
|
|
@ -442,7 +442,7 @@ _MBCSGetUnicodeSet(const UConverter *cnv,
|
|||
}
|
||||
|
||||
/* enumerate the from-Unicode trie table */
|
||||
mbcsTable=&cnv->sharedData->table->mbcs;
|
||||
mbcsTable=&cnv->sharedData->mbcs;
|
||||
table=mbcsTable->fromUnicodeTable;
|
||||
if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
|
||||
maxStage1=0x440;
|
||||
|
@ -486,6 +486,48 @@ _MBCSGetUnicodeSet(const UConverter *cnv,
|
|||
c+=1024; /* empty stage 2 block */
|
||||
}
|
||||
}
|
||||
} else if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY) {
|
||||
/* ignore single-byte results */
|
||||
const uint32_t *stage2;
|
||||
const uint16_t *stage3, *results;
|
||||
|
||||
results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
|
||||
|
||||
for(st1=0; st1<maxStage1; ++st1) {
|
||||
st2=table[st1];
|
||||
if(st2>(maxStage1>>1)) {
|
||||
stage2=(const uint32_t *)table+st2;
|
||||
for(st2=0; st2<64; ++st2) {
|
||||
if((st3=stage2[st2])!=0) {
|
||||
/* read the stage 3 block */
|
||||
stage3=results+16*(uint32_t)(uint16_t)st3;
|
||||
|
||||
/* get the roundtrip flags for the stage 3 block */
|
||||
st3>>=16;
|
||||
|
||||
/*
|
||||
* Add code points for which the roundtrip flag is set.
|
||||
* Once we get a set for fallback mappings, we have to check
|
||||
* non-roundtrip stage 3 results for whether they are 0.
|
||||
* See _MBCSFromUnicodeWithOffsets() for details.
|
||||
*
|
||||
* Ignore single-byte results (<0x100).
|
||||
*/
|
||||
do {
|
||||
if((st3&1)!=0 && *stage3>=0x100) {
|
||||
uset_add(set, c);
|
||||
}
|
||||
st3>>=1;
|
||||
++stage3;
|
||||
} while((++c&0xf)!=0);
|
||||
} else {
|
||||
c+=16; /* empty stage 3 block */
|
||||
}
|
||||
}
|
||||
} else {
|
||||
c+=1024; /* empty stage 2 block */
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const uint32_t *stage2;
|
||||
|
||||
|
@ -552,7 +594,7 @@ _extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
|
|||
|
||||
cnv->useSubChar1=FALSE;
|
||||
|
||||
if( (cx=sharedData->table->mbcs.extIndexes)!=NULL &&
|
||||
if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
|
||||
ucnv_extInitialMatchFromU(
|
||||
cnv, cx,
|
||||
cp, source, sourceLimit,
|
||||
|
@ -617,7 +659,7 @@ _extToU(UConverter *cnv, const UConverterSharedData *sharedData,
|
|||
UErrorCode *pErrorCode) {
|
||||
const int32_t *cx;
|
||||
|
||||
if( (cx=sharedData->table->mbcs.extIndexes)!=NULL &&
|
||||
if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
|
||||
ucnv_extInitialMatchToU(
|
||||
cnv, cx,
|
||||
length, source, sourceLimit,
|
||||
|
@ -708,7 +750,7 @@ _EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) {
|
|||
uint32_t stage2Entry;
|
||||
uint32_t size, sizeofFromUBytes;
|
||||
|
||||
mbcsTable=&sharedData->table->mbcs;
|
||||
mbcsTable=&sharedData->mbcs;
|
||||
|
||||
table=mbcsTable->fromUnicodeTable;
|
||||
bytes=mbcsTable->fromUnicodeBytes;
|
||||
|
@ -824,7 +866,7 @@ _MBCSLoad(UConverterSharedData *sharedData,
|
|||
const uint8_t *raw,
|
||||
UErrorCode *pErrorCode) {
|
||||
UDataInfo info;
|
||||
UConverterMBCSTable *mbcsTable=&sharedData->table->mbcs;
|
||||
UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
|
||||
_MBCSHeader *header=(_MBCSHeader *)raw;
|
||||
uint32_t offset;
|
||||
|
||||
|
@ -833,15 +875,6 @@ _MBCSLoad(UConverterSharedData *sharedData,
|
|||
return;
|
||||
}
|
||||
|
||||
mbcsTable->countStates=(uint8_t)header->countStates;
|
||||
mbcsTable->countToUFallbacks=header->countToUFallbacks;
|
||||
mbcsTable->stateTable=(const int32_t (*)[256])(raw+sizeof(_MBCSHeader));
|
||||
mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
|
||||
mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
|
||||
|
||||
mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
|
||||
mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
|
||||
mbcsTable->fromUBytesLength=header->fromUBytesLength;
|
||||
mbcsTable->outputType=(uint8_t)header->flags;
|
||||
|
||||
/* extension data, header version 4.2 and higher */
|
||||
|
@ -850,22 +883,106 @@ _MBCSLoad(UConverterSharedData *sharedData,
|
|||
mbcsTable->extIndexes=(const int32_t *)(raw+offset);
|
||||
}
|
||||
|
||||
/* make sure that the output type is known */
|
||||
switch(mbcsTable->outputType) {
|
||||
case MBCS_OUTPUT_1:
|
||||
case MBCS_OUTPUT_2:
|
||||
case MBCS_OUTPUT_3:
|
||||
case MBCS_OUTPUT_4:
|
||||
case MBCS_OUTPUT_3_EUC:
|
||||
case MBCS_OUTPUT_4_EUC:
|
||||
case MBCS_OUTPUT_2_SISO:
|
||||
/* OK */
|
||||
break;
|
||||
case MBCS_OUTPUT_EXT_ONLY:
|
||||
/* ### TODO */
|
||||
default:
|
||||
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
||||
return;
|
||||
if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
|
||||
UConverterSharedData *baseSharedData;
|
||||
const int32_t *extIndexes;
|
||||
const char *baseName;
|
||||
|
||||
/* extension-only file, load the base table and set values appropriately */
|
||||
if((extIndexes=mbcsTable->extIndexes)==NULL) {
|
||||
/* extension-only file without extension */
|
||||
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* load the base table */
|
||||
baseName=(const char *)(header+1);
|
||||
if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
|
||||
/* forbid loading this same extension-only file */
|
||||
/* TODO better prevention of loading another extension table */
|
||||
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* TODO pass package name, same as current converter (see ucnv_bld.c) and/or parse out of prefix of base name */
|
||||
baseSharedData=ucnv_load(NULL, baseName, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if( baseSharedData->staticData->conversionType!=UCNV_MBCS ||
|
||||
baseSharedData->mbcs.baseSharedData!=NULL
|
||||
) {
|
||||
ucnv_unload(baseSharedData);
|
||||
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* copy the base table data */
|
||||
uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
|
||||
|
||||
/* overwrite values with relevant ones for the extension converter */
|
||||
mbcsTable->baseSharedData=baseSharedData;
|
||||
mbcsTable->extIndexes=extIndexes;
|
||||
|
||||
/*
|
||||
* It would be possible to share the swapLFNL data with a base converter,
|
||||
* but the generated name would have to be different, and the memory
|
||||
* would have to be free'd only once.
|
||||
* It is easier to just create the data for the extension converter
|
||||
* separately when it is requested.
|
||||
*/
|
||||
mbcsTable->swapLFNLStateTable=NULL;
|
||||
mbcsTable->swapLFNLFromUnicodeBytes=NULL;
|
||||
mbcsTable->swapLFNLName=NULL;
|
||||
|
||||
/*
|
||||
* Set a special, runtime-only outputType if the extension converter
|
||||
* is a DBCS version of an SI/SO-stateful base converter.
|
||||
*/
|
||||
if( baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO &&
|
||||
(sharedData->staticData->conversionType==UCNV_DBCS ||
|
||||
(sharedData->staticData->conversionType==UCNV_MBCS &&
|
||||
sharedData->staticData->minBytesPerChar>=2))
|
||||
) {
|
||||
int32_t entry;
|
||||
|
||||
/* get the dbcs state from the state table entry for SO=0x0e */
|
||||
entry=mbcsTable->stateTable[0][0xe];
|
||||
if( MBCS_ENTRY_IS_FINAL(entry) &&
|
||||
MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
|
||||
MBCS_ENTRY_FINAL_STATE(entry)!=0
|
||||
) {
|
||||
mbcsTable->dbcsOnlyState=MBCS_ENTRY_FINAL_STATE(entry);
|
||||
|
||||
mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* make sure that the output type is known */
|
||||
switch(mbcsTable->outputType) {
|
||||
case MBCS_OUTPUT_1:
|
||||
case MBCS_OUTPUT_2:
|
||||
case MBCS_OUTPUT_3:
|
||||
case MBCS_OUTPUT_4:
|
||||
case MBCS_OUTPUT_3_EUC:
|
||||
case MBCS_OUTPUT_4_EUC:
|
||||
case MBCS_OUTPUT_2_SISO:
|
||||
/* OK */
|
||||
break;
|
||||
default:
|
||||
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
||||
return;
|
||||
}
|
||||
|
||||
mbcsTable->countStates=(uint8_t)header->countStates;
|
||||
mbcsTable->countToUFallbacks=header->countToUFallbacks;
|
||||
mbcsTable->stateTable=(const int32_t (*)[256])(raw+sizeof(_MBCSHeader));
|
||||
mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
|
||||
mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
|
||||
|
||||
mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
|
||||
mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
|
||||
mbcsTable->fromUBytesLength=header->fromUBytesLength;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -885,7 +1002,7 @@ _MBCSLoad(UConverterSharedData *sharedData,
|
|||
|
||||
static void
|
||||
_MBCSUnload(UConverterSharedData *sharedData) {
|
||||
UConverterMBCSTable *mbcsTable=&sharedData->table->mbcs;
|
||||
UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
|
||||
|
||||
if(mbcsTable->swapLFNLStateTable!=NULL) {
|
||||
uprv_free(mbcsTable->swapLFNLStateTable);
|
||||
|
@ -898,22 +1015,31 @@ _MBCSOpen(UConverter *cnv,
|
|||
const char *locale,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
UConverterMBCSTable *mbcsTable;
|
||||
const int32_t *extIndexes;
|
||||
uint8_t outputType;
|
||||
int8_t maxBytesPerUChar;
|
||||
|
||||
mbcsTable=&cnv->sharedData->mbcs;
|
||||
outputType=mbcsTable->outputType;
|
||||
|
||||
if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
|
||||
/* the swaplfnl option does not apply, remove it */
|
||||
cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
|
||||
}
|
||||
|
||||
if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
/* do this because double-checked locking is broken */
|
||||
UBool isCached;
|
||||
|
||||
umtx_lock(NULL);
|
||||
isCached=cnv->sharedData->table->mbcs.swapLFNLStateTable!=NULL;
|
||||
isCached=mbcsTable->swapLFNLStateTable!=NULL;
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if(!isCached) {
|
||||
if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
|
||||
/* the option does not apply, remove it */
|
||||
cnv->options&=~UCNV_OPTION_SWAP_LFNL;
|
||||
cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -926,12 +1052,11 @@ _MBCSOpen(UConverter *cnv,
|
|||
}
|
||||
|
||||
/* fix maxBytesPerUChar depending on outputType and options etc. */
|
||||
outputType=cnv->sharedData->table->mbcs.outputType;
|
||||
if(outputType==MBCS_OUTPUT_2_SISO) {
|
||||
cnv->maxBytesPerUChar=3; /* SO+DBCS */
|
||||
}
|
||||
|
||||
extIndexes=cnv->sharedData->table->mbcs.extIndexes;
|
||||
extIndexes=mbcsTable->extIndexes;
|
||||
if(extIndexes!=NULL) {
|
||||
maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
|
||||
if(outputType==MBCS_OUTPUT_2_SISO) {
|
||||
|
@ -962,8 +1087,8 @@ _MBCSOpen(UConverter *cnv,
|
|||
|
||||
static const char *
|
||||
_MBCSGetName(const UConverter *cnv) {
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->table->mbcs.swapLFNLName!=NULL) {
|
||||
return cnv->sharedData->table->mbcs.swapLFNLName;
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
|
||||
return cnv->sharedData->mbcs.swapLFNLName;
|
||||
} else {
|
||||
return cnv->sharedData->staticData->name;
|
||||
}
|
||||
|
@ -1026,9 +1151,9 @@ _MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
offsets=pArgs->offsets;
|
||||
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
|
||||
} else {
|
||||
stateTable=cnv->sharedData->table->mbcs.stateTable;
|
||||
stateTable=cnv->sharedData->mbcs.stateTable;
|
||||
}
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
|
@ -1177,9 +1302,9 @@ _MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
offsets=pArgs->offsets;
|
||||
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
|
||||
} else {
|
||||
stateTable=cnv->sharedData->table->mbcs.stateTable;
|
||||
stateTable=cnv->sharedData->mbcs.stateTable;
|
||||
}
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
|
@ -1411,8 +1536,8 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
}
|
||||
|
||||
if(cnv->sharedData->table->mbcs.countStates==1) {
|
||||
if(!(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
if(cnv->sharedData->mbcs.countStates==1) {
|
||||
if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
|
||||
} else {
|
||||
_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
|
||||
|
@ -1428,18 +1553,26 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
offsets=pArgs->offsets;
|
||||
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
|
||||
} else {
|
||||
stateTable=cnv->sharedData->table->mbcs.stateTable;
|
||||
stateTable=cnv->sharedData->mbcs.stateTable;
|
||||
}
|
||||
unicodeCodeUnits=cnv->sharedData->table->mbcs.unicodeCodeUnits;
|
||||
unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
offset=cnv->toUnicodeStatus;
|
||||
state=(uint8_t)(cnv->mode);
|
||||
byteIndex=cnv->toULength;
|
||||
bytes=cnv->toUBytes;
|
||||
|
||||
/*
|
||||
* if we are in the SBCS state for a DBCS-only converter,
|
||||
* then load the DBCS state from the MBCS data
|
||||
* (dbcsOnlyState==0 if it is not a DBCS-only converter)
|
||||
*/
|
||||
if((state=(uint8_t)(cnv->mode))==0) {
|
||||
state=cnv->sharedData->mbcs.dbcsOnlyState;
|
||||
}
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex=byteIndex==0 ? 0 : -1;
|
||||
nextSourceIndex=0;
|
||||
|
@ -1569,6 +1702,9 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* save the previous state for proper extension mapping with SI/SO-stateful converters */
|
||||
cnv->mode=state;
|
||||
|
||||
/* set the next state early so that we can reuse the entry variable */
|
||||
state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
|
||||
|
@ -1588,7 +1724,7 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
byteIndex=0;
|
||||
} else if(c==0xfffe) {
|
||||
if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)_MBCSGetFallback(&cnv->sharedData->table->mbcs, offset))!=0xfffe) {
|
||||
if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
|
||||
/* output fallback BMP code point */
|
||||
*target++=(UChar)entry;
|
||||
if(offsets!=NULL) {
|
||||
|
@ -1682,7 +1818,15 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
* The 21 unused bits may later be used for more sophisticated
|
||||
* state transitions.
|
||||
*/
|
||||
byteIndex=0;
|
||||
if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
|
||||
byteIndex=0;
|
||||
} else {
|
||||
/* SI/SO are illegal for DBCS-only conversion */
|
||||
state=(uint8_t)(cnv->mode); /* restore the previous state */
|
||||
|
||||
/* callback(illegal) */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
|
||||
if(UCNV_TO_U_USE_FALLBACK(cnv)) {
|
||||
/* output BMP code point */
|
||||
|
@ -1713,10 +1857,6 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
} else /* unassigned sequences indicated with byteIndex>0 */ {
|
||||
/* try an extension mapping */
|
||||
pArgs->source=(const char *)source;
|
||||
|
||||
/* save the state for proper extension mapping with SI/SO-stateful converters */
|
||||
cnv->mode=state;
|
||||
|
||||
byteIndex=_extToU(cnv, cnv->sharedData,
|
||||
byteIndex, (const char **)&source, (const char *)sourceLimit,
|
||||
&target, targetLimit,
|
||||
|
@ -1762,9 +1902,9 @@ _MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
source=(const uint8_t *)pArgs->source;
|
||||
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
|
||||
} else {
|
||||
stateTable=cnv->sharedData->table->mbcs.stateTable;
|
||||
stateTable=cnv->sharedData->mbcs.stateTable;
|
||||
}
|
||||
|
||||
/* conversion loop */
|
||||
|
@ -1856,14 +1996,14 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
|
||||
}
|
||||
|
||||
if(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
|
||||
if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
|
||||
/*
|
||||
* Using the generic ucnv_getNextUChar() code lets us deal correctly
|
||||
* with the rare case of a codepage that maps single surrogates
|
||||
* without adding the complexity to this already complicated function here.
|
||||
*/
|
||||
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
|
||||
} else if(cnv->sharedData->table->mbcs.countStates==1) {
|
||||
} else if(cnv->sharedData->mbcs.countStates==1) {
|
||||
return _MBCSSingleGetNextUChar(pArgs, pErrorCode);
|
||||
}
|
||||
|
||||
|
@ -1872,15 +2012,23 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
|
||||
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->table->mbcs.swapLFNLStateTable;
|
||||
stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
|
||||
} else {
|
||||
stateTable=cnv->sharedData->table->mbcs.stateTable;
|
||||
stateTable=cnv->sharedData->mbcs.stateTable;
|
||||
}
|
||||
unicodeCodeUnits=cnv->sharedData->table->mbcs.unicodeCodeUnits;
|
||||
unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
offset=cnv->toUnicodeStatus;
|
||||
state=(uint8_t)(cnv->mode);
|
||||
|
||||
/*
|
||||
* if we are in the SBCS state for a DBCS-only converter,
|
||||
* then load the DBCS state from the MBCS data
|
||||
* (dbcsOnlyState==0 if it is not a DBCS-only converter)
|
||||
*/
|
||||
if((state=(uint8_t)(cnv->mode))==0) {
|
||||
state=cnv->sharedData->mbcs.dbcsOnlyState;
|
||||
}
|
||||
|
||||
/* conversion loop */
|
||||
c=U_SENTINEL;
|
||||
|
@ -1902,6 +2050,9 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
break;
|
||||
}
|
||||
} else {
|
||||
/* save the previous state for proper extension mapping with SI/SO-stateful converters */
|
||||
cnv->mode=state;
|
||||
|
||||
/* set the next state early so that we can reuse the entry variable */
|
||||
state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
|
||||
|
@ -1921,7 +2072,7 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
/* output BMP code point */
|
||||
break;
|
||||
} else if(c==0xfffe) {
|
||||
if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=_MBCSGetFallback(&cnv->sharedData->table->mbcs, offset))!=0xfffe) {
|
||||
if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -1960,6 +2111,13 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
* The 21 unused bits may later be used for more sophisticated
|
||||
* state transitions.
|
||||
*/
|
||||
if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
|
||||
/* SI/SO are illegal for DBCS-only conversion */
|
||||
state=(uint8_t)(cnv->mode); /* restore the previous state */
|
||||
|
||||
/* callback(illegal) */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
|
||||
if(UCNV_TO_U_USE_FALLBACK(cnv)) {
|
||||
/* output BMP code point */
|
||||
|
@ -2037,7 +2195,7 @@ _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
int32_t entry;
|
||||
uint8_t action;
|
||||
|
||||
entry=sharedData->table->mbcs.stateTable[0][b];
|
||||
entry=sharedData->mbcs.stateTable[0][b];
|
||||
/* MBCS_ENTRY_IS_FINAL(entry) */
|
||||
|
||||
if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
|
||||
|
@ -2115,21 +2273,21 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
/*
|
||||
* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
|
||||
* TODO In future releases, verify that this function is never called for SBCS
|
||||
* conversions, i.e., that sharedData->table->mbcs.countStates==1 is still true.
|
||||
* conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
|
||||
* Removal improves code coverage.
|
||||
*/
|
||||
/* use optimized function if possible */
|
||||
if(sharedData->table->mbcs.countStates==1) {
|
||||
if(sharedData->mbcs.countStates==1) {
|
||||
return _MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)(*(*pSource)++), useFallback);
|
||||
}
|
||||
#endif
|
||||
|
||||
stateTable=sharedData->table->mbcs.stateTable;
|
||||
unicodeCodeUnits=sharedData->table->mbcs.unicodeCodeUnits;
|
||||
stateTable=sharedData->mbcs.stateTable;
|
||||
unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
|
||||
|
||||
/* converter state */
|
||||
offset=0;
|
||||
state=0;
|
||||
state=sharedData->mbcs.dbcsOnlyState;
|
||||
|
||||
/* conversion loop */
|
||||
do {
|
||||
|
@ -2151,7 +2309,7 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
if(entry!=0xfffe) {
|
||||
return (UChar32)entry;
|
||||
} else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
|
||||
return _MBCSGetFallback(&sharedData->table->mbcs, offset);
|
||||
return _MBCSGetFallback(&sharedData->mbcs, offset);
|
||||
} else {
|
||||
return 0xfffe;
|
||||
}
|
||||
|
@ -2198,6 +2356,10 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
* The 21 unused bits may later be used for more sophisticated
|
||||
* state transitions.
|
||||
*/
|
||||
if(sharedData->mbcs.dbcsOnlyState!=0) {
|
||||
/* SI/SO are illegal for DBCS-only conversion */
|
||||
return 0xffff;
|
||||
}
|
||||
if(source==(const uint8_t *)sourceLimit) {
|
||||
/* if there are only state changes, then return "unassigned" */
|
||||
return 0xfffe;
|
||||
|
@ -2246,7 +2408,7 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
/* use optimized function if possible */
|
||||
cnv=pArgs->converter;
|
||||
unicodeMask=cnv->sharedData->table->mbcs.unicodeMask;
|
||||
unicodeMask=cnv->sharedData->mbcs.unicodeMask;
|
||||
|
||||
/* set up the local pointers */
|
||||
source=pArgs->source;
|
||||
|
@ -2255,11 +2417,11 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
targetCapacity=pArgs->targetLimit-pArgs->target;
|
||||
offsets=pArgs->offsets;
|
||||
|
||||
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
|
||||
table=cnv->sharedData->mbcs.fromUnicodeTable;
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
bytes=cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
|
||||
bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
|
||||
} else {
|
||||
bytes=cnv->sharedData->table->mbcs.fromUnicodeBytes;
|
||||
bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
|
||||
}
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
|
@ -2461,11 +2623,11 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
targetCapacity=pArgs->targetLimit-pArgs->target;
|
||||
offsets=pArgs->offsets;
|
||||
|
||||
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
|
||||
table=cnv->sharedData->mbcs.fromUnicodeTable;
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
results=(uint16_t *)cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
|
||||
results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
|
||||
} else {
|
||||
results=(uint16_t *)cnv->sharedData->table->mbcs.fromUnicodeBytes;
|
||||
results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
|
||||
}
|
||||
|
||||
if(cnv->useFallback) {
|
||||
|
@ -2475,7 +2637,7 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
/* use only roundtrips and fallbacks from private-use characters */
|
||||
minValue=0xc00;
|
||||
}
|
||||
hasSupplementary=(UBool)(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
|
||||
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUChar32;
|
||||
|
@ -2631,11 +2793,11 @@ _MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
targetCapacity=pArgs->targetLimit-pArgs->target;
|
||||
offsets=pArgs->offsets;
|
||||
|
||||
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
|
||||
table=cnv->sharedData->mbcs.fromUnicodeTable;
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
results=(uint16_t *)cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
|
||||
results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
|
||||
} else {
|
||||
results=(uint16_t *)cnv->sharedData->table->mbcs.fromUnicodeBytes;
|
||||
results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
|
||||
}
|
||||
|
||||
if(cnv->useFallback) {
|
||||
|
@ -2881,8 +3043,8 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* use optimized function if possible */
|
||||
outputType=cnv->sharedData->table->mbcs.outputType;
|
||||
unicodeMask=cnv->sharedData->table->mbcs.unicodeMask;
|
||||
outputType=cnv->sharedData->mbcs.outputType;
|
||||
unicodeMask=cnv->sharedData->mbcs.unicodeMask;
|
||||
if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
|
||||
if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
|
||||
|
@ -2902,12 +3064,12 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
targetCapacity=pArgs->targetLimit-pArgs->target;
|
||||
offsets=pArgs->offsets;
|
||||
|
||||
table=cnv->sharedData->table->mbcs.fromUnicodeTable;
|
||||
table=cnv->sharedData->mbcs.fromUnicodeTable;
|
||||
|
||||
if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
bytes=cnv->sharedData->table->mbcs.swapLFNLFromUnicodeBytes;
|
||||
bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
|
||||
} else {
|
||||
bytes=cnv->sharedData->table->mbcs.fromUnicodeBytes;
|
||||
bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
|
||||
}
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
|
@ -3085,6 +3247,17 @@ getTrail:
|
|||
}
|
||||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_DBCS_ONLY:
|
||||
/* 1/2-byte stateful table but only DBCS mappings used */
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
|
||||
if(value<=0xff) {
|
||||
/* no mapping or SBCS result, not taken for DBCS-only */
|
||||
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
|
||||
length=0;
|
||||
} else {
|
||||
length=2;
|
||||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_3:
|
||||
p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
|
||||
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
|
||||
|
@ -3150,7 +3323,7 @@ getTrail:
|
|||
* Not having a default branch also causes warnings with
|
||||
* some compilers.
|
||||
*/
|
||||
value=0;
|
||||
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
|
||||
length=0;
|
||||
break;
|
||||
}
|
||||
|
@ -3367,7 +3540,7 @@ U_CFUNC int32_t
|
|||
_MBCSFromUChar32(UConverterSharedData *sharedData,
|
||||
UChar32 c, uint32_t *pValue,
|
||||
UBool useFallback) {
|
||||
const uint16_t *table=sharedData->table->mbcs.fromUnicodeTable;
|
||||
const uint16_t *table=sharedData->mbcs.fromUnicodeTable;
|
||||
const uint8_t *p;
|
||||
uint32_t stage2Entry;
|
||||
uint32_t value;
|
||||
|
@ -3376,13 +3549,13 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
/* ### TODO extension mapping */
|
||||
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
if(c>=0x10000 && !(sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
|
||||
if(sharedData->table->mbcs.outputType==MBCS_OUTPUT_1) {
|
||||
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->table->mbcs.fromUnicodeBytes, c);
|
||||
if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
|
||||
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
|
||||
/* is this code point assigned, or do we use fallbacks? */
|
||||
if(useFallback ? value>=0x800 : value>=0xc00) {
|
||||
*pValue=value&0xff;
|
||||
|
@ -3395,17 +3568,28 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
|
||||
|
||||
/* get the bytes and the length for the output */
|
||||
switch(sharedData->table->mbcs.outputType) {
|
||||
switch(sharedData->mbcs.outputType) {
|
||||
case MBCS_OUTPUT_2:
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
if(value<=0xff) {
|
||||
length=1;
|
||||
} else {
|
||||
length=2;
|
||||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_DBCS_ONLY:
|
||||
/* 1/2-byte stateful table but only DBCS mappings used */
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
if(value<=0xff) {
|
||||
/* no mapping or SBCS result, not taken for DBCS-only */
|
||||
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
|
||||
length=0;
|
||||
} else {
|
||||
length=2;
|
||||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_3:
|
||||
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
|
||||
if(value<=0xff) {
|
||||
length=1;
|
||||
|
@ -3416,7 +3600,7 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_4:
|
||||
value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
if(value<=0xff) {
|
||||
length=1;
|
||||
} else if(value<=0xffff) {
|
||||
|
@ -3428,7 +3612,7 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_3_EUC:
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
/* EUC 16-bit fixed-length representation */
|
||||
if(value<=0xff) {
|
||||
length=1;
|
||||
|
@ -3443,7 +3627,7 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_4_EUC:
|
||||
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
|
||||
/* EUC 16-bit fixed-length representation applied to the first two bytes */
|
||||
if(value<=0xff) {
|
||||
|
@ -3505,15 +3689,15 @@ _MBCSSingleFromUChar32(UConverterSharedData *sharedData,
|
|||
int32_t value;
|
||||
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
if(c>=0x10000 && !(sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
|
||||
table=sharedData->table->mbcs.fromUnicodeTable;
|
||||
table=sharedData->mbcs.fromUnicodeTable;
|
||||
|
||||
/* get the byte for the output */
|
||||
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->table->mbcs.fromUnicodeBytes, c);
|
||||
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
|
||||
/* is this code point assigned, or do we use fallbacks? */
|
||||
if(useFallback ? value>=0x800 : value>=0xc00) {
|
||||
return value&0xff;
|
||||
|
@ -3529,9 +3713,10 @@ static void
|
|||
_MBCSGetStarters(const UConverter* cnv,
|
||||
UBool starters[256],
|
||||
UErrorCode *pErrorCode) {
|
||||
const int32_t *state0=cnv->sharedData->table->mbcs.stateTable[0];
|
||||
const int32_t *state0;
|
||||
int i;
|
||||
|
||||
state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
|
||||
for(i=0; i<256; ++i) {
|
||||
/* all bytes that cause a state transition from state 0 are lead bytes */
|
||||
starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
|
||||
|
@ -3544,7 +3729,7 @@ _MBCSGetStarters(const UConverter* cnv,
|
|||
*/
|
||||
U_CFUNC UBool
|
||||
_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
|
||||
return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->table->mbcs.stateTable[0][(uint8_t)byte]);
|
||||
return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3558,7 +3743,7 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
/* first, select between subChar and subChar1 */
|
||||
if( cnv->subChar1!=0 &&
|
||||
(cnv->sharedData->table->mbcs.extIndexes!=NULL ?
|
||||
(cnv->sharedData->mbcs.extIndexes!=NULL ?
|
||||
cnv->useSubChar1 :
|
||||
(cnv->invalidUCharBuffer[0]<=0xff))
|
||||
) {
|
||||
|
@ -3574,7 +3759,7 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
|
|||
/* reset the selector for the next code point */
|
||||
cnv->useSubChar1=FALSE;
|
||||
|
||||
switch(cnv->sharedData->table->mbcs.outputType) {
|
||||
switch(cnv->sharedData->mbcs.outputType) {
|
||||
case MBCS_OUTPUT_2_SISO:
|
||||
p=buffer;
|
||||
|
||||
|
@ -3616,9 +3801,9 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
|
|||
U_CFUNC UConverterType
|
||||
_MBCSGetType(const UConverter* converter) {
|
||||
/* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
|
||||
if(converter->sharedData->table->mbcs.countStates==1) {
|
||||
if(converter->sharedData->mbcs.countStates==1) {
|
||||
return (UConverterType)UCNV_SBCS;
|
||||
} else if((converter->sharedData->table->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
|
||||
} else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
|
||||
return (UConverterType)UCNV_EBCDIC_STATEFUL;
|
||||
} else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
|
||||
return (UConverterType)UCNV_DBCS;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
/**
|
||||
* ICU conversion (.cnv) data file structure, following the usual UDataInfo
|
||||
|
@ -201,7 +201,9 @@ enum {
|
|||
|
||||
MBCS_OUTPUT_EXT_ONLY, /* e */
|
||||
|
||||
MBCS_OUTPUT_COUNT
|
||||
MBCS_OUTPUT_COUNT,
|
||||
|
||||
MBCS_OUTPUT_DBCS_ONLY=0xdb /* runtime-only type for DBCS-only handling of SISO tables */
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -219,7 +221,7 @@ typedef struct {
|
|||
*/
|
||||
typedef struct UConverterMBCSTable {
|
||||
/* toUnicode */
|
||||
uint8_t countStates;
|
||||
uint8_t countStates, dbcsOnlyState;
|
||||
uint32_t countToUFallbacks;
|
||||
|
||||
const int32_t (*stateTable)/*[countStates]*/[256];
|
||||
|
@ -238,6 +240,7 @@ typedef struct UConverterMBCSTable {
|
|||
char *swapLFNLName;
|
||||
|
||||
/* extension data */
|
||||
struct UConverterSharedData *baseSharedData;
|
||||
const int32_t *extIndexes;
|
||||
} UConverterMBCSTable;
|
||||
|
||||
|
@ -288,7 +291,7 @@ _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
* returns fallback values.
|
||||
*/
|
||||
#define _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, b) \
|
||||
(UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->table->mbcs.stateTable[0][(uint8_t)(b)])
|
||||
(UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->mbcs.stateTable[0][(uint8_t)(b)])
|
||||
|
||||
/**
|
||||
* This is an internal function that allows other converter implementations
|
||||
|
@ -299,7 +302,7 @@ _MBCSIsLeadByte(UConverterSharedData *sharedData, char byte);
|
|||
|
||||
/** This is a macro version of _MBCSIsLeadByte(). */
|
||||
#define _MBCS_IS_LEAD_BYTE(sharedData, byte) \
|
||||
(UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->table->mbcs.stateTable[0][(uint8_t)(byte)])
|
||||
(UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)])
|
||||
|
||||
/**
|
||||
* This is another simple conversion function for internal use by other
|
||||
|
|
Loading…
Add table
Reference in a new issue