mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-2404 support SI/SO stateful conversion
X-SVN-Rev: 13517
This commit is contained in:
parent
1a48a8181b
commit
9f6cc41061
2 changed files with 50 additions and 17 deletions
|
@ -85,13 +85,13 @@ ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) {
|
|||
|
||||
if(i<=4) {
|
||||
/* linear search for the last part */
|
||||
if(word<=toUSection[start]) {
|
||||
if(word0<=toUSection[start]) {
|
||||
break;
|
||||
}
|
||||
if(++start<limit && word<=toUSection[start]) {
|
||||
if(++start<limit && word0<=toUSection[start]) {
|
||||
break;
|
||||
}
|
||||
if(++start<limit && word<=toUSection[start]) {
|
||||
if(++start<limit && word0<=toUSection[start]) {
|
||||
break;
|
||||
}
|
||||
/* always break at start==limit-1 */
|
||||
|
@ -115,6 +115,13 @@ ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* TRUE if not an SI/SO stateful converter,
|
||||
* or if the match length fits with the current converter state
|
||||
*/
|
||||
#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \
|
||||
((sisoState)<0 || ((sisoState)==0) == (match==1))
|
||||
|
||||
/*
|
||||
* this works like ucnv_extMatchFromU() except
|
||||
* - the first character is in pre
|
||||
|
@ -122,7 +129,7 @@ ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) {
|
|||
* - the returned matchLength is not offset by 2
|
||||
*/
|
||||
static int32_t
|
||||
ucnv_extMatchToU(const int32_t *cx,
|
||||
ucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
|
||||
const char *pre, int32_t preLength,
|
||||
const char *src, int32_t srcLength,
|
||||
uint32_t *pMatchValue,
|
||||
|
@ -144,6 +151,20 @@ ucnv_extMatchToU(const int32_t *cx,
|
|||
matchValue=0;
|
||||
i=j=matchLength=0;
|
||||
|
||||
if(sisoState==0) {
|
||||
/* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
|
||||
if(preLength>1) {
|
||||
return 0; /* no match of a DBCS sequence in SBCS mode */
|
||||
} else if(preLength==1) {
|
||||
srcLength=0;
|
||||
} else /* preLength==0 */ {
|
||||
if(srcLength>1) {
|
||||
srcLength=1;
|
||||
}
|
||||
}
|
||||
flush=TRUE;
|
||||
}
|
||||
|
||||
/* we must not remember fallback matches when not using fallbacks */
|
||||
|
||||
/* match input units until there is a full match or the input is consumed */
|
||||
|
@ -157,7 +178,8 @@ ucnv_extMatchToU(const int32_t *cx,
|
|||
value=UCNV_EXT_TO_U_GET_VALUE(value);
|
||||
if( value!=0 &&
|
||||
(UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
|
||||
TO_U_USE_FALLBACK(useFallback))
|
||||
TO_U_USE_FALLBACK(useFallback)) &&
|
||||
UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
|
||||
) {
|
||||
/* remember longest match so far */
|
||||
matchValue=value;
|
||||
|
@ -194,8 +216,9 @@ ucnv_extMatchToU(const int32_t *cx,
|
|||
/* partial match, continue */
|
||||
index=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
|
||||
} else {
|
||||
if( UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
|
||||
TO_U_USE_FALLBACK(useFallback)
|
||||
if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
|
||||
TO_U_USE_FALLBACK(useFallback)) &&
|
||||
UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
|
||||
) {
|
||||
/* full match, stop with result */
|
||||
matchValue=value;
|
||||
|
@ -246,12 +269,14 @@ ucnv_extWriteToU(UConverter *cnv, const int32_t *cx,
|
|||
}
|
||||
|
||||
/*
|
||||
* TRUE if not an SI/SO stateful charset,
|
||||
* or if the match length fits with the current converter state
|
||||
* get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
|
||||
* or -1 if the converter is not SI/SO stateful
|
||||
*
|
||||
* Note: For SI/SO stateful converters getting here,
|
||||
* cnv->mode==0 is equivalent to firstLength==1.
|
||||
*/
|
||||
#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(cnv, match) \
|
||||
((cnv)->sharedData->table->mbcs.outputType!=MBCS_OUTPUT_2_SISO || \
|
||||
((uint8_t)(cnv->mode)==0) == (match==1))
|
||||
#define UCNV_SISO_STATE(cnv) \
|
||||
((cnv)->sharedData->table->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : -1)
|
||||
|
||||
/*
|
||||
* target<targetLimit; set error code for overflow
|
||||
|
@ -268,12 +293,12 @@ ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
|
|||
int32_t match;
|
||||
|
||||
/* try to match */
|
||||
match=ucnv_extMatchToU(cx,
|
||||
match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv),
|
||||
(const char *)cnv->toUBytes, firstLength,
|
||||
*src, (int32_t)(srcLimit-*src),
|
||||
&value,
|
||||
cnv->useFallback, flush);
|
||||
if(match>0 && UCNV_EXT_TO_U_VERIFY_SISO_MATCH(cnv, match)) {
|
||||
if(match>0) {
|
||||
/* advance src pointer for the consumed input */
|
||||
*src+=match-firstLength;
|
||||
|
||||
|
@ -322,7 +347,7 @@ ucnv_extSimpleMatchToU(const int32_t *cx,
|
|||
int32_t match;
|
||||
|
||||
/* try to match */
|
||||
match=ucnv_extMatchToU(cx,
|
||||
match=ucnv_extMatchToU(cx, -1,
|
||||
cp,
|
||||
NULL, 0,
|
||||
NULL, 0,
|
||||
|
@ -357,12 +382,12 @@ ucnv_extContinueMatchToU(UConverter *cnv,
|
|||
uint32_t value;
|
||||
int32_t match, length;
|
||||
|
||||
match=ucnv_extMatchToU(cnv->sharedData->table->mbcs.extIndexes,
|
||||
match=ucnv_extMatchToU(cnv->sharedData->table->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
|
||||
cnv->preToU, cnv->preToULength,
|
||||
pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
|
||||
&value,
|
||||
cnv->useFallback, pArgs->flush);
|
||||
if(match>0 && UCNV_EXT_TO_U_VERIFY_SISO_MATCH(cnv, match)) {
|
||||
if(match>0) {
|
||||
if(match>=cnv->preToULength) {
|
||||
/* advance src pointer for the consumed input */
|
||||
pArgs->source+=match-cnv->preToULength;
|
||||
|
|
8
icu4c/source/test/testdata/conversion.txt
vendored
8
icu4c/source/test/testdata/conversion.txt
vendored
|
@ -44,6 +44,14 @@ conversion {
|
|||
Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
|
||||
Cases {
|
||||
// extensions
|
||||
{
|
||||
"ibm-1390",
|
||||
:bin{ 43860eececec8bec8cec8d4386ecb5ecb6ecb70fec },
|
||||
"\uff63\uff76\ufffd\u31f6\u31f7\u31f8\u30ab\u304b\u309a\u304d\u309a\u304f\u309a\x1a",
|
||||
:intvector{ 0, 1, 3, 5, 7, 9, 11, 13, 13, 15, 15, 17, 17, 20 },
|
||||
:int{1}, :int{0}, "", "?", :bin{""}
|
||||
}
|
||||
|
||||
{
|
||||
"*test3",
|
||||
:bin{ 00050601020b0701020a01020c },
|
||||
|
|
Loading…
Add table
Reference in a new issue