mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-5188 add ucnv_setSubstString()
X-SVN-Rev: 19789
This commit is contained in:
parent
820ca02a9f
commit
8e5f03bcd1
13 changed files with 394 additions and 34 deletions
|
@ -249,6 +249,19 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
|
|||
uprv_memcpy(localConverter, cnv, sizeof(UConverter));
|
||||
localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
|
||||
|
||||
/* copy the substitution string */
|
||||
if (cnv->subChars == (uint8_t *)cnv->subUChars) {
|
||||
localConverter->subChars = (uint8_t *)localConverter->subUChars;
|
||||
} else {
|
||||
localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
|
||||
if (localConverter->subChars == NULL) {
|
||||
uprv_free(allocatedConverter);
|
||||
UTRACE_EXIT_STATUS(*status);
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
|
||||
}
|
||||
|
||||
/* now either call the safeclone fcn or not */
|
||||
if (cnv->sharedData->impl->safeClone != NULL) {
|
||||
/* call the custom safeClone function */
|
||||
|
@ -256,6 +269,9 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
|
|||
}
|
||||
|
||||
if(localConverter==NULL || U_FAILURE(*status)) {
|
||||
if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
|
||||
uprv_free(allocatedConverter->subChars);
|
||||
}
|
||||
uprv_free(allocatedConverter);
|
||||
UTRACE_EXIT_STATUS(*status);
|
||||
return NULL;
|
||||
|
@ -348,6 +364,10 @@ ucnv_close (UConverter * converter)
|
|||
converter->sharedData->impl->close(converter);
|
||||
}
|
||||
|
||||
if (converter->subChars != (uint8_t *)converter->subUChars) {
|
||||
uprv_free(converter->subChars);
|
||||
}
|
||||
|
||||
/*
|
||||
Checking whether it's an algorithic converter is okay
|
||||
in multithreaded applications because the value never changes.
|
||||
|
@ -395,15 +415,19 @@ ucnv_getSubstChars (const UConverter * converter,
|
|||
if (U_FAILURE (*err))
|
||||
return;
|
||||
|
||||
if (converter->subCharLen <= 0) {
|
||||
/* Unicode string or empty string from ucnv_setSubstString(). */
|
||||
*len = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (*len < converter->subCharLen) /*not enough space in subChars */
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
|
||||
*len = converter->subCharLen; /*store # of bytes copied to buffer */
|
||||
uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
|
||||
uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
|
||||
*len = converter->subCharLen; /*store # of bytes copied to buffer */
|
||||
}
|
||||
|
||||
|
@ -424,7 +448,7 @@ ucnv_setSubstChars (UConverter * converter,
|
|||
return;
|
||||
}
|
||||
|
||||
uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
|
||||
uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
|
||||
converter->subCharLen = len; /*sets the new len */
|
||||
|
||||
/*
|
||||
|
@ -437,6 +461,91 @@ ucnv_setSubstChars (UConverter * converter,
|
|||
return;
|
||||
}
|
||||
|
||||
U_DRAFT void U_EXPORT2
|
||||
ucnv_setSubstString(UConverter *cnv,
|
||||
const UChar *s,
|
||||
int32_t length,
|
||||
UErrorCode *err) {
|
||||
UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
|
||||
char chars[UCNV_ERROR_BUFFER_LENGTH];
|
||||
|
||||
UConverter *clone;
|
||||
uint8_t *subChars;
|
||||
int32_t cloneSize, length8;
|
||||
|
||||
/* Let the following functions check all arguments. */
|
||||
cloneSize = sizeof(cloneBuffer);
|
||||
clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
|
||||
ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
|
||||
length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
|
||||
ucnv_close(clone);
|
||||
if (U_FAILURE(*err)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (cnv->sharedData->impl->writeSub == NULL ||
|
||||
(cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
|
||||
ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
|
||||
) {
|
||||
/* The converter is not stateful. Store the charset bytes as a fixed string. */
|
||||
subChars = (uint8_t *)chars;
|
||||
} else {
|
||||
/*
|
||||
* The converter has a non-default writeSub() function, indicating
|
||||
* that it is stateful.
|
||||
* Store the Unicode string for on-the-fly conversion for correct
|
||||
* state handling.
|
||||
*/
|
||||
if (length > UCNV_ERROR_BUFFER_LENGTH) {
|
||||
/*
|
||||
* Should not occur. The converter should output at least one byte
|
||||
* per UChar, which means that ucnv_fromUChars() should catch all
|
||||
* overflows.
|
||||
*/
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
return;
|
||||
}
|
||||
subChars = (uint8_t *)s;
|
||||
if (length < 0) {
|
||||
length = u_strlen(s);
|
||||
}
|
||||
length8 = length * U_SIZEOF_UCHAR;
|
||||
}
|
||||
|
||||
/*
|
||||
* For storing the substitution string, select either the small buffer inside
|
||||
* UConverter or allocate a subChars buffer.
|
||||
*/
|
||||
if (length8 > UCNV_MAX_SUBCHAR_LEN) {
|
||||
/* Use a separate buffer for the string. Outside UConverter to not make it too large. */
|
||||
if (cnv->subChars == (uint8_t *)cnv->subUChars) {
|
||||
/* Allocate a new buffer for the string. */
|
||||
cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
|
||||
if (cnv->subChars == NULL) {
|
||||
cnv->subChars = (uint8_t *)cnv->subUChars;
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy the substitution string into the UConverter or its subChars buffer. */
|
||||
if (length8 == 0) {
|
||||
cnv->subCharLen = 0;
|
||||
} else {
|
||||
uprv_memcpy(cnv->subChars, subChars, length8);
|
||||
if (subChars == (uint8_t *)chars) {
|
||||
cnv->subCharLen = (int8_t)length8;
|
||||
} else /* subChars == s */ {
|
||||
cnv->subCharLen = (int8_t)-length;
|
||||
}
|
||||
}
|
||||
|
||||
/* See comment in ucnv_setSubstChars(). */
|
||||
cnv->subChar1 = 0;
|
||||
}
|
||||
|
||||
/*resets the internal states of a converter
|
||||
*goal : have the same behaviour than a freshly created converter
|
||||
*/
|
||||
|
|
|
@ -470,7 +470,7 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
|
|||
}
|
||||
|
||||
uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
|
||||
uprv_memcpy(cnv->subChar, myConverterData->currentConverter->subChar, 4);
|
||||
uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
|
||||
cnv->subCharLen = myConverterData->currentConverter->subCharLen;
|
||||
}else{
|
||||
myConverterData->currentConverter=ucnv_open("ibm-949",errorCode);
|
||||
|
@ -2869,7 +2869,7 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
|
|||
char buffer[8];
|
||||
int32_t length;
|
||||
|
||||
subchar=(char *)cnv->subChar;
|
||||
subchar=(char *)cnv->subChars;
|
||||
length=cnv->subCharLen; /* assume length==1 for most variants */
|
||||
|
||||
p = buffer;
|
||||
|
@ -2924,14 +2924,24 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
|
|||
}
|
||||
break;
|
||||
} else {
|
||||
/* let the subconverter write the subchar */
|
||||
args->converter = myConverterData->currentConverter;
|
||||
uprv_memcpy(myConverterData->currentConverter->subChar, subchar, 4);
|
||||
/* save the subconverter's substitution string */
|
||||
uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
|
||||
int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
|
||||
|
||||
/* set our substitution string into the subconverter */
|
||||
myConverterData->currentConverter->subChars = (uint8_t *)subchar;
|
||||
myConverterData->currentConverter->subCharLen = (int8_t)length;
|
||||
|
||||
/* let the subconverter write the subchar, set/retrieve fromUChar32 state */
|
||||
args->converter = myConverterData->currentConverter;
|
||||
myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
|
||||
ucnv_cbFromUWriteSub(args, 0, err);
|
||||
cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
|
||||
args->converter = cnv;
|
||||
|
||||
/* restore the subconverter's substitution string */
|
||||
myConverterData->currentConverter->subChars = currentSubChars;
|
||||
myConverterData->currentConverter->subCharLen = currentSubCharLen;
|
||||
|
||||
if(*err == U_BUFFER_OVERFLOW_ERROR) {
|
||||
if(myConverterData->currentConverter->charErrorBufferLength > 0) {
|
||||
|
@ -2943,7 +2953,6 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
|
|||
cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
|
||||
myConverterData->currentConverter->charErrorBufferLength = 0;
|
||||
}
|
||||
args->converter = cnv;
|
||||
return;
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -912,7 +912,8 @@ ucnv_createConverterFromSharedData(UConverter *myUConverter,
|
|||
myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar;
|
||||
myUConverter->subChar1 = mySharedConverterData->staticData->subChar1;
|
||||
myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen;
|
||||
uprv_memcpy(myUConverter->subChar, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
|
||||
myUConverter->subChars = (uint8_t *)myUConverter->subUChars;
|
||||
uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
|
||||
|
||||
if(mySharedConverterData->impl->open != NULL) {
|
||||
mySharedConverterData->impl->open(myUConverter, realName, locale, options, err);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
|
@ -157,6 +157,17 @@ struct UConverter {
|
|||
const void *fromUContext;
|
||||
const void *toUContext;
|
||||
|
||||
/*
|
||||
* Pointer to charset bytes for substitution string if subCharLen>0,
|
||||
* or pointer to Unicode string (UChar *) if subCharLen<0.
|
||||
* subCharLen==0 is equivalent to using a skip callback.
|
||||
* If the pointer is !=subUChars then it is allocated with
|
||||
* UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes.
|
||||
* The subUChars field is declared as UChar[] not uint8_t[] to
|
||||
* guarantee alignment for UChars.
|
||||
*/
|
||||
uint8_t *subChars;
|
||||
|
||||
UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */
|
||||
|
||||
uint32_t options; /* options flags from UConverterOpen, may contain additional bits */
|
||||
|
@ -200,9 +211,9 @@ struct UConverter {
|
|||
|
||||
uint8_t subChar1; /* single-byte substitution character if different from subChar */
|
||||
UBool useSubChar1;
|
||||
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* codepage specific character sequence */
|
||||
char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */
|
||||
uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
|
||||
UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */
|
||||
|
||||
UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */
|
||||
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
|
||||
|
|
|
@ -177,10 +177,31 @@ ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
|
|||
UErrorCode * err)
|
||||
{
|
||||
UConverter *converter;
|
||||
int32_t length;
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
return;
|
||||
}
|
||||
converter = args->converter;
|
||||
length = converter->subCharLen;
|
||||
|
||||
if(length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(length < 0) {
|
||||
/*
|
||||
* Write/convert the substitution string. Its real length is -length.
|
||||
* Unlike the escape callback, we need not change the converter's
|
||||
* callback function because ucnv_setSubstString() verified that
|
||||
* the string can be converted, so we will not get a conversion error
|
||||
* and will not recurse.
|
||||
* At worst we should get a U_BUFFER_OVERFLOW_ERROR.
|
||||
*/
|
||||
const UChar *source = (const UChar *)converter->subChars;
|
||||
ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
|
||||
return;
|
||||
}
|
||||
|
||||
if(converter->sharedData->impl->writeSub!=NULL) {
|
||||
converter->sharedData->impl->writeSub(args, offsetIndex, err);
|
||||
|
@ -196,7 +217,7 @@ ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
|
|||
}
|
||||
else {
|
||||
ucnv_cbFromUWriteBytes(args,
|
||||
(const char *)converter->subChar, converter->subCharLen,
|
||||
(const char *)converter->subChars, length,
|
||||
offsetIndex, err);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -460,7 +460,7 @@ _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *e
|
|||
*p++= UCNV_CLOSE_BRACE;
|
||||
convData->isTargetUCharDBCS=FALSE;
|
||||
}
|
||||
*p++= cnv->subChar[0];
|
||||
*p++= (char)cnv->subChars[0];
|
||||
|
||||
ucnv_cbFromUWriteBytes(args,
|
||||
buffer, (int32_t)(p - buffer),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2005, International Business Machines
|
||||
* Copyright (C) 2000-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnvisci.c
|
||||
|
@ -1319,7 +1319,7 @@ _ISCII_SafeClone(const UConverter *cnv,
|
|||
}
|
||||
|
||||
localClone = (struct cloneISCIIStruct *)stackBuffer;
|
||||
uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter));
|
||||
/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
|
||||
|
||||
uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
|
||||
localClone->cnv.extraInfo = &localClone->mydata;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2005, International Business Machines
|
||||
* Copyright (C) 2000-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -3877,7 +3877,7 @@ ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
|
|||
length=1;
|
||||
} else {
|
||||
/* select subChar in all other cases */
|
||||
subchar=(char *)cnv->subChar;
|
||||
subchar=(char *)cnv->subChars;
|
||||
length=cnv->subCharLen;
|
||||
}
|
||||
|
||||
|
|
|
@ -523,6 +523,8 @@ ucnv_close(UConverter * converter);
|
|||
/**
|
||||
* Fills in the output parameter, subChars, with the substitution characters
|
||||
* as multiple bytes.
|
||||
* If ucnv_setSubstString() set a Unicode string because the converter is
|
||||
* stateful, then subChars will be an empty string.
|
||||
*
|
||||
* @param converter the Unicode converter
|
||||
* @param subChars the subsitution characters
|
||||
|
@ -531,6 +533,7 @@ ucnv_close(UConverter * converter);
|
|||
* @param err the outgoing error status code.
|
||||
* If the substitution character array is too small, an
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
|
||||
* @see ucnv_setSubstString
|
||||
* @see ucnv_setSubstChars
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -543,12 +546,19 @@ ucnv_getSubstChars(const UConverter *converter,
|
|||
/**
|
||||
* Sets the substitution chars when converting from unicode to a codepage. The
|
||||
* substitution is specified as a string of 1-4 bytes, and may contain
|
||||
* <TT>NULL</TT> byte.
|
||||
* <TT>NULL</TT> bytes.
|
||||
* The subChars must represent a single character. The caller needs to know the
|
||||
* byte sequence of a valid character in the converter's charset.
|
||||
* For some converters, for example some ISO 2022 variants, only single-byte
|
||||
* substitution characters may be supported.
|
||||
* The newer ucnv_setSubstString() function relaxes these limitations.
|
||||
*
|
||||
* @param converter the Unicode converter
|
||||
* @param subChars the substitution character byte sequence we want set
|
||||
* @param len the number of bytes in subChars
|
||||
* @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
|
||||
* len is bigger than the maximum number of bytes allowed in subchars
|
||||
* @see ucnv_setSubstString
|
||||
* @see ucnv_getSubstChars
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -558,6 +568,39 @@ ucnv_setSubstChars(UConverter *converter,
|
|||
int8_t len,
|
||||
UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Set a substitution string for converting from Unicode to a charset.
|
||||
* The caller need not know the charset byte sequence for each charset.
|
||||
*
|
||||
* Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
|
||||
* for a single character, this function takes a Unicode string with
|
||||
* zero, one or more characters, and immediately verifies that the string can be
|
||||
* converted to the charset.
|
||||
* If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
|
||||
* then the function returns with an error accordingly.
|
||||
*
|
||||
* Also unlike ucnv_setSubstChars(), this function works for stateful charsets
|
||||
* by converting on the fly at the point of substitution rather than setting
|
||||
* a fixed byte sequence.
|
||||
*
|
||||
* @param cnv The UConverter object.
|
||||
* @param s The Unicode string.
|
||||
* @param length The number of UChars in s, or -1 for a NUL-terminated string.
|
||||
* @param err Pointer to a standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*
|
||||
* @see ucnv_setSubstChars
|
||||
* @see ucnv_getSubstChars
|
||||
* @draft ICU 3.6
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
ucnv_setSubstString(UConverter *cnv,
|
||||
const UChar *s,
|
||||
int32_t length,
|
||||
UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Fills in the output parameter, errBytes, with the error characters from the
|
||||
* last failing conversion.
|
||||
|
@ -1820,7 +1863,7 @@ ucnv_usesFallback(const UConverter *cnv);
|
|||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
|
||||
* int32_t signatureLength = 0;
|
||||
* char *encoding = ucnv_detectUnicodeSignatures(input,sizeof(input),&signatureLength,&err);
|
||||
* char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
|
||||
* UConverter *conv = NULL;
|
||||
* UChar output[100];
|
||||
* UChar *target = output, *out;
|
||||
|
|
|
@ -110,6 +110,7 @@ static void TestToUCountPending(void);
|
|||
static void TestFromUCountPending(void);
|
||||
static void TestDefaultName(void);
|
||||
static void TestCompareNames(void);
|
||||
static void TestSubstString(void);
|
||||
|
||||
void addTestConvert(TestNode** root);
|
||||
|
||||
|
@ -134,6 +135,7 @@ void addTestConvert(TestNode** root)
|
|||
addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
|
||||
addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
|
||||
addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
|
||||
addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
|
||||
}
|
||||
|
||||
static void ListNames(void) {
|
||||
|
@ -3105,3 +3107,93 @@ TestCompareNames() {
|
|||
compareNames(lessMac);
|
||||
compareNames(lessUTF080);
|
||||
}
|
||||
|
||||
static void
|
||||
TestSubstString() {
|
||||
static const UChar surrogate[1]={ 0xd900 };
|
||||
char buffer[16];
|
||||
|
||||
static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
|
||||
static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
|
||||
UConverter *cnv;
|
||||
UErrorCode errorCode;
|
||||
int32_t length;
|
||||
int8_t len8;
|
||||
|
||||
/* UTF-16/32: test that the BOM is output before the sub character */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
cnv=ucnv_open("UTF-16", &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
|
||||
ucnv_close(cnv);
|
||||
if(U_FAILURE(errorCode) ||
|
||||
length!=4 ||
|
||||
NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
|
||||
) {
|
||||
log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
cnv=ucnv_open("UTF-32", &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
|
||||
ucnv_close(cnv);
|
||||
if(U_FAILURE(errorCode) ||
|
||||
length!=8 ||
|
||||
NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
|
||||
) {
|
||||
log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
|
||||
}
|
||||
|
||||
/* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
cnv=ucnv_open("ISO-8859-1", &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
|
||||
} else {
|
||||
len8 = sizeof(buffer);
|
||||
ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
|
||||
/* Stateless converter, we expect the string converted to charset bytes. */
|
||||
if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
|
||||
log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
ucnv_close(cnv);
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
cnv=ucnv_open("HZ", &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
|
||||
} else {
|
||||
len8 = sizeof(buffer);
|
||||
ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
|
||||
/* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
|
||||
if(U_FAILURE(errorCode) || len8!=0) {
|
||||
log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
ucnv_close(cnv);
|
||||
|
||||
/*
|
||||
* Further testing of ucnv_setSubstString() is done via intltest convert.
|
||||
* We do not test edge cases of illegal arguments and similar because the
|
||||
* function implementation uses all of its parameters in calls to other
|
||||
* functions with UErrorCode parameters.
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -183,7 +183,7 @@ ConversionTest::TestFromUnicode() {
|
|||
char charset[100], cbopt[4];
|
||||
const char *option;
|
||||
UnicodeString s, unicode, invalidUChars;
|
||||
int32_t offsetsLength;
|
||||
int32_t offsetsLength, index;
|
||||
UConverterFromUCallback callback;
|
||||
|
||||
TestDataModule *dataModule;
|
||||
|
@ -242,15 +242,17 @@ ConversionTest::TestFromUnicode() {
|
|||
}
|
||||
|
||||
s=testCase->getString("callback", errorCode);
|
||||
cc.setSub=0; // default: no subchar
|
||||
|
||||
// read NUL-separated subchar first, if any
|
||||
length=u_strlen(p=s.getTerminatedBuffer());
|
||||
if(++length<s.length()) {
|
||||
if((index=s.indexOf((UChar)0))>0) {
|
||||
// read NUL-separated subchar first, if any
|
||||
// copy the subchar from Latin-1 characters
|
||||
// start after the NUL
|
||||
p=s.getTerminatedBuffer();
|
||||
length=index+1;
|
||||
p+=length;
|
||||
length=s.length()-length;
|
||||
if(length>=(int32_t)sizeof(cc.subchar)) {
|
||||
if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
int32_t j;
|
||||
|
@ -260,13 +262,26 @@ ConversionTest::TestFromUnicode() {
|
|||
}
|
||||
// NUL-terminate the subchar
|
||||
cc.subchar[j]=0;
|
||||
cc.setSub=1;
|
||||
}
|
||||
|
||||
// remove the NUL and subchar from s
|
||||
s.truncate(u_strlen(s.getBuffer()));
|
||||
} else {
|
||||
// no subchar
|
||||
cc.subchar[0]=0;
|
||||
s.truncate(index);
|
||||
} else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
|
||||
// read a substitution string, separated by an equal sign
|
||||
p=s.getBuffer()+index+1;
|
||||
length=s.length()-(index+1);
|
||||
if(length<=0 || length>=LENGTHOF(cc.subString)) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
u_memcpy(cc.subString, p, length);
|
||||
// NUL-terminate the subString
|
||||
cc.subString[length]=0;
|
||||
cc.setSub=-1;
|
||||
}
|
||||
|
||||
// remove the equal sign and subString from s
|
||||
s.truncate(index);
|
||||
}
|
||||
|
||||
s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
|
||||
|
@ -1052,10 +1067,19 @@ ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call
|
|||
// set the subchar
|
||||
int32_t length;
|
||||
|
||||
if((length=(int32_t)strlen(cc.subchar))!=0) {
|
||||
if(cc.setSub>0) {
|
||||
length=(int32_t)strlen(cc.subchar);
|
||||
ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubChars() failed - %s",
|
||||
errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
|
||||
cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
|
||||
ucnv_close(cnv);
|
||||
return FALSE;
|
||||
}
|
||||
} else if(cc.setSub<0) {
|
||||
ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
|
||||
cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
|
||||
ucnv_close(cnv);
|
||||
return FALSE;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Copyright (C) 2003-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -29,7 +29,9 @@
|
|||
struct ConversionCase {
|
||||
int32_t caseNr;
|
||||
const char *charset, *cbopt, *name;
|
||||
UChar subString[16];
|
||||
char subchar[8];
|
||||
int8_t setSub;
|
||||
|
||||
const uint8_t *bytes;
|
||||
int32_t bytesLength;
|
||||
|
|
50
icu4c/source/test/testdata/conversion.txt
vendored
50
icu4c/source/test/testdata/conversion.txt
vendored
|
@ -29,11 +29,16 @@ conversion:table(nofallback) {
|
|||
"Callback option - a letter is passed in directly as const char * see ucnv_err.h\n"
|
||||
"Empty string: Sub callback with NULL option\n"
|
||||
|
||||
"In order to specify a charset substitution character,\n"
|
||||
"In order to specify a charset substitution character (for ucnv_setSubstChars()),\n"
|
||||
"add a NUL (U+0000) to the callback string followed by the subchar bytes as Latin-1\n"
|
||||
"characters. For example, for a Sub callback with no option and a subchar of FC FC,\n"
|
||||
"use the string \"?\x00\xFC\xFC\"\n"
|
||||
|
||||
"In order to specify a substitution string (for ucnv_setSubstString()),\n"
|
||||
"add an '=' to the callback string followed by the substitution string.\n"
|
||||
"For example, for a Sub callback with no option and a substitution string\n"
|
||||
"of \"ab\", use the string \"?=ab\"\n"
|
||||
|
||||
"fallbacks: per-direction boolean, currently only for fromUnicode; see Jitterbug 2401\n"
|
||||
|
||||
"errorCode: (empty)==zero | invalid | illegal | truncated | illesc | unsuppesc\n"
|
||||
|
@ -445,6 +450,49 @@ conversion:table(nofallback) {
|
|||
fromUnicode {
|
||||
Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
|
||||
Cases {
|
||||
// Test substitution strings.
|
||||
{
|
||||
"windows-1252", // stateless MBCS with WriteSub(), should internally set char *
|
||||
"a\ufdd0c",
|
||||
:bin{ 61402421402463 },
|
||||
:intvector{ 0,1,1,1,1,1,2 },
|
||||
:int{1}, :int{0}, "", "?=@$!@$", ""
|
||||
}
|
||||
{
|
||||
"ibm-930", // stateful MBCS
|
||||
"a\ufdd0\u4e00\ufdd0e",
|
||||
:bin{ 620e4bce0f400e45414bce0f4066 },
|
||||
:intvector{ 0,1,1,1,1,1,2,2,2,3,3,3,3,4 },
|
||||
:int{1}, :int{0}, "", "?=\u4e01 ", ""
|
||||
}
|
||||
{
|
||||
"iso-2022-jp",
|
||||
"a\x1bc", // Unicode ESC must not occur as a character
|
||||
:bin{ 6163 },
|
||||
:intvector{ 0,2 },
|
||||
:int{1}, :int{0}, "", "?=", "" // empty substitution string
|
||||
}
|
||||
{
|
||||
"iso-2022-cn",
|
||||
"a\x1bc", // Unicode ESC must not occur as a character
|
||||
:bin{ 61202063 },
|
||||
:intvector{ 0,1,1,2 },
|
||||
:int{1}, :int{0}, "", "?= ", ""
|
||||
}
|
||||
{
|
||||
"iso-2022-cn",
|
||||
"a\x1bc", // Unicode ESC must not occur as a character
|
||||
:bin{ 611b2429410e523b0f2063 },
|
||||
:intvector{ 0,1,1,1,1,1,1,1,1,1,2 },
|
||||
:int{1}, :int{0}, "", "?=\u4e00 ", ""
|
||||
}
|
||||
{
|
||||
"us-ascii",
|
||||
"a\x85c",
|
||||
:bin{ 61402421402463 },
|
||||
:intvector{ 0,1,1,1,1,1,2 },
|
||||
:int{1}, :int{0}, "", "?=@$!@$", ""
|
||||
}
|
||||
// ISO 2022-CN: test a single-byte subchar, j5171
|
||||
{
|
||||
"iso-2022-cn",
|
||||
|
|
Loading…
Add table
Reference in a new issue