mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-3346 support stateless DBCS-only and simple (single-character) conversions
X-SVN-Rev: 13655
This commit is contained in:
parent
31a8625180
commit
506bc1495f
4 changed files with 162 additions and 112 deletions
|
@ -26,12 +26,6 @@
|
|||
#include "ucnv_ext.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/*
|
||||
* ### TODO: probably need pointer to baseTableSharedData
|
||||
* and also copy the base table's pointers for the base table arrays etc.
|
||||
* into this sharedData
|
||||
*/
|
||||
|
||||
/* to Unicode --------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
|
@ -331,25 +325,24 @@ ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* ### TODO */
|
||||
|
||||
U_CFUNC UChar32
|
||||
ucnv_extSimpleMatchToU(const int32_t *cx,
|
||||
UChar32 cp,
|
||||
UBool useFallback,
|
||||
UErrorCode *pErrorCode) {
|
||||
const char *source, int32_t length,
|
||||
UBool useFallback) {
|
||||
uint32_t value;
|
||||
int32_t match;
|
||||
|
||||
if(length<=0) {
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/* try to match */
|
||||
match=ucnv_extMatchToU(cx, -1,
|
||||
cp,
|
||||
NULL, 0,
|
||||
source, length,
|
||||
NULL, 0,
|
||||
&value,
|
||||
useFallback, TRUE);
|
||||
if(match>0) {
|
||||
if(match==length) {
|
||||
/* write result for simple, single-character conversion */
|
||||
if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
|
||||
return UCNV_EXT_TO_U_GET_CODE_POINT(value);
|
||||
|
@ -359,14 +352,13 @@ ucnv_extSimpleMatchToU(const int32_t *cx,
|
|||
/*
|
||||
* return no match because
|
||||
* - match>0 && value points to string: simple conversion cannot handle multiple code points
|
||||
* - match>0 && match!=length: not all input consumed, forbidden for this function
|
||||
* - match==0: no match found in the first place
|
||||
* - match<0: partial match, not supported for simple conversion (and flush==TRUE)
|
||||
*/
|
||||
return 0;
|
||||
return 0xfffe;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* continue partial match with new input
|
||||
* never called for simple, single-character conversion
|
||||
|
@ -800,14 +792,10 @@ ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* ### TODO */
|
||||
|
||||
U_CFUNC int32_t
|
||||
ucnv_extSimpleMatchFromU(const int32_t *cx,
|
||||
UChar32 cp, uint32_t *pValue,
|
||||
UBool useFallback,
|
||||
UErrorCode *pErrorCode) {
|
||||
UBool useFallback) {
|
||||
uint32_t value;
|
||||
int32_t match;
|
||||
|
||||
|
@ -828,6 +816,7 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
|
|||
if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
|
||||
*pValue=value;
|
||||
return length;
|
||||
#if 0 /* not currently used */
|
||||
} else if(length==4) {
|
||||
/* de-serialize a 4-byte result */
|
||||
const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
|
||||
|
@ -837,6 +826,7 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
|
|||
((uint32_t)result[2]<<8)|
|
||||
result[3];
|
||||
return 4;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -850,8 +840,6 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* continue partial match with new input, requires cnv->preFromUFirstCP>=0
|
||||
* never called for simple, single-character conversion
|
||||
|
|
|
@ -342,6 +342,11 @@ ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
|
|||
UBool flush,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC UChar32
|
||||
ucnv_extSimpleMatchToU(const int32_t *cx,
|
||||
const char *source, int32_t length,
|
||||
UBool useFallback);
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_extContinueMatchToU(UConverter *cnv,
|
||||
UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
|
||||
|
@ -360,8 +365,7 @@ ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
|
|||
U_CFUNC int32_t
|
||||
ucnv_extSimpleMatchFromU(const int32_t *cx,
|
||||
UChar32 cp, uint32_t *pValue,
|
||||
UBool useFallback,
|
||||
UErrorCode *pErrorCode);
|
||||
UBool useFallback);
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_extContinueMatchFromU(UConverter *cnv,
|
||||
|
|
|
@ -937,22 +937,64 @@ _MBCSLoad(UConverterSharedData *sharedData,
|
|||
|
||||
/*
|
||||
* Set a special, runtime-only outputType if the extension converter
|
||||
* is a DBCS version of an SI/SO-stateful base converter.
|
||||
* is a DBCS version of a base converter that also maps single bytes.
|
||||
*/
|
||||
if( baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO &&
|
||||
(sharedData->staticData->conversionType==UCNV_DBCS ||
|
||||
if( sharedData->staticData->conversionType==UCNV_DBCS ||
|
||||
(sharedData->staticData->conversionType==UCNV_MBCS &&
|
||||
sharedData->staticData->minBytesPerChar>=2))
|
||||
sharedData->staticData->minBytesPerChar>=2)
|
||||
) {
|
||||
int32_t entry;
|
||||
if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
|
||||
/* the base converter is SI/SO-stateful */
|
||||
int32_t entry;
|
||||
|
||||
/* get the dbcs state from the state table entry for SO=0x0e */
|
||||
entry=mbcsTable->stateTable[0][0xe];
|
||||
if( MBCS_ENTRY_IS_FINAL(entry) &&
|
||||
MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
|
||||
MBCS_ENTRY_FINAL_STATE(entry)!=0
|
||||
/* get the dbcs state from the state table entry for SO=0x0e */
|
||||
entry=mbcsTable->stateTable[0][0xe];
|
||||
if( MBCS_ENTRY_IS_FINAL(entry) &&
|
||||
MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
|
||||
MBCS_ENTRY_FINAL_STATE(entry)!=0
|
||||
) {
|
||||
mbcsTable->dbcsOnlyState=MBCS_ENTRY_FINAL_STATE(entry);
|
||||
|
||||
mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
|
||||
}
|
||||
} else if(
|
||||
baseSharedData->staticData->conversionType==UCNV_MBCS &&
|
||||
baseSharedData->staticData->minBytesPerChar==1 &&
|
||||
baseSharedData->staticData->maxBytesPerChar==2 &&
|
||||
mbcsTable->countStates<=127
|
||||
) {
|
||||
mbcsTable->dbcsOnlyState=MBCS_ENTRY_FINAL_STATE(entry);
|
||||
/* non-stateful base converter, need to modify the state table */
|
||||
int32_t (*newStateTable)[256];
|
||||
int32_t *state;
|
||||
int32_t i, count;
|
||||
|
||||
/* allocate a new state table and copy the base state table contents */
|
||||
count=mbcsTable->countStates;
|
||||
newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024);
|
||||
if(newStateTable==NULL) {
|
||||
ucnv_unload(baseSharedData);
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024);
|
||||
|
||||
/* change all final single-byte entries to go to a new all-illegal state */
|
||||
state=newStateTable[0];
|
||||
for(i=0; i<256; ++i) {
|
||||
if(MBCS_ENTRY_IS_FINAL(state[i])) {
|
||||
state[i]=MBCS_ENTRY_TRANSITION(count, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* build the new all-illegal state */
|
||||
state=newStateTable[count];
|
||||
for(i=0; i<256; ++i) {
|
||||
state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
|
||||
}
|
||||
mbcsTable->stateTable=newStateTable;
|
||||
mbcsTable->countStates=(uint8_t)(count+1);
|
||||
mbcsTable->stateTableOwned=TRUE;
|
||||
|
||||
mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
|
||||
}
|
||||
|
@ -1007,6 +1049,12 @@ _MBCSUnload(UConverterSharedData *sharedData) {
|
|||
if(mbcsTable->swapLFNLStateTable!=NULL) {
|
||||
uprv_free(mbcsTable->swapLFNLStateTable);
|
||||
}
|
||||
if(mbcsTable->stateTableOwned) {
|
||||
uprv_free((void *)mbcsTable->stateTable);
|
||||
}
|
||||
if(mbcsTable->baseSharedData!=NULL) {
|
||||
ucnv_unload(mbcsTable->baseSharedData);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2235,11 +2283,12 @@ _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
#endif
|
||||
|
||||
/*
|
||||
* This is a simple version of getNextUChar() that is used
|
||||
* This is a simple version of _MBCSGetNextUChar() that is used
|
||||
* by other converter implementations.
|
||||
* It only returns an "assigned" result if it consumes the entire input.
|
||||
* It does not use state from the converter, nor error codes.
|
||||
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
|
||||
* It does not handle conversion extensions (_extToU()).
|
||||
* It handles conversion extensions but not GB 18030.
|
||||
*
|
||||
* Return value:
|
||||
* U+fffe unassigned
|
||||
|
@ -2248,27 +2297,22 @@ _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
*/
|
||||
U_CFUNC UChar32
|
||||
_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
||||
const char **pSource, const char *sourceLimit,
|
||||
const char *source, int32_t length,
|
||||
UBool useFallback) {
|
||||
const uint8_t *source;
|
||||
|
||||
const int32_t (*stateTable)[256];
|
||||
const uint16_t *unicodeCodeUnits;
|
||||
|
||||
uint32_t offset;
|
||||
uint8_t state, action;
|
||||
|
||||
int32_t entry;
|
||||
UChar32 c;
|
||||
int32_t i, entry;
|
||||
|
||||
/* set up the local pointers */
|
||||
source=(const uint8_t *)*pSource;
|
||||
if(source>=(const uint8_t *)sourceLimit) {
|
||||
if(length<=0) {
|
||||
/* no input at all: "illegal" */
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/* ### TODO extension */
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
|
||||
|
@ -2278,10 +2322,15 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
*/
|
||||
/* use optimized function if possible */
|
||||
if(sharedData->mbcs.countStates==1) {
|
||||
return _MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)(*(*pSource)++), useFallback);
|
||||
if(length==1) {
|
||||
return _MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
|
||||
} else {
|
||||
return 0xffff; /* illegal: more than a single byte for an SBCS converter */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* set up the local pointers */
|
||||
stateTable=sharedData->mbcs.stateTable;
|
||||
unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
|
||||
|
||||
|
@ -2290,14 +2339,16 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
state=sharedData->mbcs.dbcsOnlyState;
|
||||
|
||||
/* conversion loop */
|
||||
do {
|
||||
entry=stateTable[state][*source++];
|
||||
for(i=0;;) {
|
||||
entry=stateTable[state][(uint8_t)source[i++]];
|
||||
if(MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
|
||||
offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
|
||||
} else {
|
||||
*pSource=(const char *)source;
|
||||
|
||||
if(i==length) {
|
||||
return 0xffff; /* truncated character */
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* An if-else-if chain provides more reliable performance for
|
||||
* the most common cases compared to a switch.
|
||||
|
@ -2305,81 +2356,82 @@ _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
|||
action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
|
||||
if(action==MBCS_STATE_VALID_16) {
|
||||
offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
entry=unicodeCodeUnits[offset];
|
||||
if(entry!=0xfffe) {
|
||||
return (UChar32)entry;
|
||||
c=unicodeCodeUnits[offset];
|
||||
if(c!=0xfffe) {
|
||||
/* done */
|
||||
} else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
|
||||
return _MBCSGetFallback(&sharedData->mbcs, offset);
|
||||
} else {
|
||||
return 0xfffe;
|
||||
c=_MBCSGetFallback(&sharedData->mbcs, offset);
|
||||
/* else done with 0xfffe */
|
||||
}
|
||||
break;
|
||||
} else if(action==MBCS_STATE_VALID_DIRECT_16) {
|
||||
/* output BMP code point */
|
||||
return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
break;
|
||||
} else if(action==MBCS_STATE_VALID_16_PAIR) {
|
||||
offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
entry=unicodeCodeUnits[offset++];
|
||||
if(entry<0xd800) {
|
||||
c=unicodeCodeUnits[offset++];
|
||||
if(c<0xd800) {
|
||||
/* output BMP code point below 0xd800 */
|
||||
return (UChar32)entry;
|
||||
} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? entry<=0xdfff : entry<=0xdbff) {
|
||||
} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
|
||||
/* output roundtrip or fallback supplementary code point */
|
||||
return (UChar32)(((entry&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
|
||||
} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (entry&0xfffe)==0xe000 : entry==0xe000) {
|
||||
c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
|
||||
} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
|
||||
/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
|
||||
return unicodeCodeUnits[offset];
|
||||
} else if(entry==0xffff) {
|
||||
c=unicodeCodeUnits[offset];
|
||||
} else if(c==0xffff) {
|
||||
return 0xffff;
|
||||
} else {
|
||||
return 0xfffe;
|
||||
c=0xfffe;
|
||||
}
|
||||
break;
|
||||
} else if(action==MBCS_STATE_VALID_DIRECT_20) {
|
||||
/* output supplementary code point */
|
||||
return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
break;
|
||||
} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
|
||||
if(!TO_U_USE_FALLBACK(useFallback)) {
|
||||
return 0xfffe;
|
||||
c=0xfffe;
|
||||
break;
|
||||
}
|
||||
/* output BMP code point */
|
||||
return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
break;
|
||||
} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
|
||||
if(!TO_U_USE_FALLBACK(useFallback)) {
|
||||
return 0xfffe;
|
||||
c=0xfffe;
|
||||
break;
|
||||
}
|
||||
/* output supplementary code point */
|
||||
return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
} else if(action==MBCS_STATE_CHANGE_ONLY) {
|
||||
/*
|
||||
* This serves as a state change without any output.
|
||||
* It is useful for reading simple stateful encodings,
|
||||
* for example using just Shift-In/Shift-Out codes.
|
||||
* The 21 unused bits may later be used for more sophisticated
|
||||
* state transitions.
|
||||
*/
|
||||
if(sharedData->mbcs.dbcsOnlyState!=0) {
|
||||
/* SI/SO are illegal for DBCS-only conversion */
|
||||
return 0xffff;
|
||||
}
|
||||
if(source==(const uint8_t *)sourceLimit) {
|
||||
/* if there are only state changes, then return "unassigned" */
|
||||
return 0xfffe;
|
||||
}
|
||||
c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
break;
|
||||
} else if(action==MBCS_STATE_UNASSIGNED) {
|
||||
return 0xfffe;
|
||||
} else if(action==MBCS_STATE_ILLEGAL) {
|
||||
return 0xffff;
|
||||
} else {
|
||||
/* reserved, must never occur */
|
||||
c=0xfffe;
|
||||
break;
|
||||
}
|
||||
|
||||
/* state change only - prepare for a new character */
|
||||
state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
offset=0;
|
||||
/*
|
||||
* forbid MBCS_STATE_CHANGE_ONLY for this function,
|
||||
* and MBCS_STATE_ILLEGAL and reserved action codes
|
||||
*/
|
||||
return 0xffff;
|
||||
}
|
||||
} while(source<(const uint8_t *)sourceLimit);
|
||||
}
|
||||
|
||||
*pSource=(const char *)source;
|
||||
return 0xffff;
|
||||
if(i!=length) {
|
||||
/* illegal for this function: not all input consumed */
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
if(c==0xfffe) {
|
||||
/* try an extension mapping */
|
||||
const int32_t *cx=sharedData->mbcs.extIndexes;
|
||||
if(cx!=NULL) {
|
||||
return ucnv_extSimpleMatchToU(cx, source, length, useFallback);
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/* MBCS-from-Unicode conversion functions ----------------------------------- */
|
||||
|
@ -3248,7 +3300,7 @@ getTrail:
|
|||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_DBCS_ONLY:
|
||||
/* 1/2-byte stateful table but only DBCS mappings used */
|
||||
/* table with single-byte results, but only DBCS mappings used */
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
|
||||
if(value<=0xff) {
|
||||
/* no mapping or SBCS result, not taken for DBCS-only */
|
||||
|
@ -3524,7 +3576,7 @@ unassigned:
|
|||
* conversion implementations.
|
||||
* It does not use the converter state nor call callbacks.
|
||||
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
|
||||
* It does not handle conversion extensions (_extFromU()).
|
||||
* It handles conversion extensions but not GB 18030.
|
||||
*
|
||||
* It converts one single Unicode code point into codepage bytes, encoded
|
||||
* as one 32-bit value. The function returns the number of bytes in *pValue:
|
||||
|
@ -3546,8 +3598,6 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
uint32_t value;
|
||||
int32_t length;
|
||||
|
||||
/* ### TODO extension mapping */
|
||||
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
return 0;
|
||||
|
@ -3578,7 +3628,7 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
}
|
||||
break;
|
||||
case MBCS_OUTPUT_DBCS_ONLY:
|
||||
/* 1/2-byte stateful table but only DBCS mappings used */
|
||||
/* table with single-byte results, but only DBCS mappings used */
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
if(value<=0xff) {
|
||||
/* no mapping or SBCS result, not taken for DBCS-only */
|
||||
|
@ -3663,6 +3713,12 @@ _MBCSFromUChar32(UConverterSharedData *sharedData,
|
|||
*pValue=value;
|
||||
return length;
|
||||
} else {
|
||||
const int32_t *cx=sharedData->mbcs.extIndexes;
|
||||
if(cx!=NULL) {
|
||||
return ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
|
||||
}
|
||||
|
||||
/* unassigned */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -221,7 +221,7 @@ typedef struct {
|
|||
*/
|
||||
typedef struct UConverterMBCSTable {
|
||||
/* toUnicode */
|
||||
uint8_t countStates, dbcsOnlyState;
|
||||
uint8_t countStates, dbcsOnlyState, stateTableOwned;
|
||||
uint32_t countToUFallbacks;
|
||||
|
||||
const int32_t (*stateTable)/*[countStates]*/[256];
|
||||
|
@ -258,12 +258,13 @@ typedef struct {
|
|||
fromUBytesLength;
|
||||
} _MBCSHeader;
|
||||
|
||||
/**
|
||||
/*
|
||||
* This is a simple version of _MBCSGetNextUChar() that is used
|
||||
* by other converter implementations.
|
||||
* It only returns an "assigned" result if it consumes the entire input.
|
||||
* It does not use state from the converter, nor error codes.
|
||||
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
|
||||
* It does not handle conversion extensions (_extToU()).
|
||||
* It handles conversion extensions but not GB 18030.
|
||||
*
|
||||
* Return value:
|
||||
* U+fffe unassigned
|
||||
|
@ -272,7 +273,7 @@ typedef struct {
|
|||
*/
|
||||
U_CFUNC UChar32
|
||||
_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
|
||||
const char **pSource, const char *sourceLimit,
|
||||
const char *source, int32_t length,
|
||||
UBool useFallback);
|
||||
|
||||
/**
|
||||
|
@ -304,11 +305,12 @@ _MBCSIsLeadByte(UConverterSharedData *sharedData, char byte);
|
|||
#define _MBCS_IS_LEAD_BYTE(sharedData, byte) \
|
||||
(UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)])
|
||||
|
||||
/**
|
||||
/*
|
||||
* This is another simple conversion function for internal use by other
|
||||
* conversion implementations.
|
||||
* It does not use the converter state nor call callbacks.
|
||||
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
|
||||
* It handles conversion extensions but not GB 18030.
|
||||
*
|
||||
* It converts one single Unicode code point into codepage bytes, encoded
|
||||
* as one 32-bit value. The function returns the number of bytes in *pValue:
|
||||
|
|
Loading…
Add table
Reference in a new issue