mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-1439 implement ucnv_getUnicodeSet() for roundtrippable code points
X-SVN-Rev: 11464
This commit is contained in:
parent
45065374f1
commit
a6213ee1c0
15 changed files with 418 additions and 29 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-2001, International Business Machines
|
||||
* Copyright (C) 1998-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -24,6 +24,7 @@
|
|||
#include "unicode/ures.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
|
@ -669,6 +670,34 @@ ucnv_getPlatform (const UConverter * converter,
|
|||
return (UConverterPlatform)converter->sharedData->staticData->platform;
|
||||
}
|
||||
|
||||
/** ### TODO @draft ICU 2.6 */
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if(cnv==NULL || set==NULL || which<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=which) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
/* does this converter support this function? */
|
||||
if(cnv->sharedData->impl->getUnicodeSet==NULL) {
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
/* empty the set */
|
||||
uset_clear(set);
|
||||
|
||||
/* call the converter to add the code points it supports */
|
||||
cnv->sharedData->impl->getUnicodeSet(cnv, set, which, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getToUCallBack (const UConverter * converter,
|
||||
UConverterToUCallback *action,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2001, International Business Machines
|
||||
* Copyright (C) 2000-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -17,8 +17,9 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/*Empties the internal unicode output buffer */
|
||||
|
@ -239,3 +240,20 @@ ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
|
|||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0x10ffff);
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0xd7ff);
|
||||
uset_addRange(set, 0xe000, 0x10ffff);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2002, International Business Machines
|
||||
* Copyright (C) 1999-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
|
@ -19,6 +19,7 @@
|
|||
#define UCNV_CNV_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnvmbcs.h"
|
||||
|
@ -97,6 +98,12 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv,
|
|||
int32_t *pBufferSize,
|
||||
UErrorCode *status);
|
||||
|
||||
/** ### TODO @draft ICU 2.6 */
|
||||
typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
UBool CONVERSION_U_SUCCESS (UErrorCode err);
|
||||
|
||||
void ucnv_flushInternalUnicodeBuffer (UConverter * _this,
|
||||
|
@ -149,6 +156,7 @@ struct UConverterImpl {
|
|||
UConverterGetName getName;
|
||||
UConverterWriteSub writeSub;
|
||||
UConverterSafeClone safeClone;
|
||||
UConverterGetUnicodeSet getUnicodeSet; /* ### TODO ICU 2.6 */
|
||||
};
|
||||
|
||||
extern const UConverterSharedData
|
||||
|
@ -231,4 +239,16 @@ ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
|
|||
UBool collectPairs,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif /* UCNV_CNV */
|
||||
|
|
|
@ -519,7 +519,11 @@ static const UConverterImpl _LMBCSImpl##n={\
|
|||
_LMBCSFromUnicode,\
|
||||
_LMBCSFromUnicode,\
|
||||
_LMBCSGetNextUChar,\
|
||||
NULL\
|
||||
NULL,\
|
||||
NULL,\
|
||||
NULL,\
|
||||
NULL,\
|
||||
ucnv_getCompleteUnicodeSet\
|
||||
};\
|
||||
static const UConverterStaticData _LMBCSStaticData##n={\
|
||||
sizeof(UConverterStaticData),\
|
||||
|
|
|
@ -439,7 +439,10 @@ static const UConverterImpl _UTF16BEImpl={
|
|||
T_UConverter_getNextUChar_UTF16_BE,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
/* The 1200 CCSID refers to any version of Unicode with any endianess of UTF-16 */
|
||||
|
@ -532,7 +535,10 @@ static const UConverterImpl _UTF16LEImpl={
|
|||
T_UConverter_getNextUChar_UTF16_LE,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
|
||||
|
@ -761,7 +767,8 @@ static const UConverterImpl _UTF16Impl = {
|
|||
NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _UTF16StaticData = {
|
||||
|
|
|
@ -551,7 +551,10 @@ static const UConverterImpl _UTF32BEImpl = {
|
|||
T_UConverter_getNextUChar_UTF32_BE,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
|
||||
|
@ -1035,7 +1038,10 @@ static const UConverterImpl _UTF32LEImpl = {
|
|||
T_UConverter_getNextUChar_UTF32_LE,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
|
||||
|
@ -1292,7 +1298,8 @@ static const UConverterImpl _UTF32Impl = {
|
|||
NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _UTF32StaticData = {
|
||||
|
|
|
@ -792,7 +792,9 @@ static const UConverterImpl _UTF7Impl={
|
|||
|
||||
NULL,
|
||||
_UTF7GetName,
|
||||
NULL /* we don't need writeSub() because we never call a callback at fromUnicode() */
|
||||
NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _UTF7StaticData={
|
||||
|
@ -1527,7 +1529,9 @@ static const UConverterImpl _IMAPImpl={
|
|||
|
||||
NULL,
|
||||
NULL,
|
||||
NULL /* we don't need writeSub() because we never call a callback at fromUnicode() */
|
||||
NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _IMAPStaticData={
|
||||
|
|
|
@ -975,7 +975,8 @@ static const UConverterImpl _UTF8Impl={
|
|||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
ucnv_getNonSurrogateUnicodeSet
|
||||
};
|
||||
|
||||
/* The 1208 CCSID refers to any version of Unicode of UTF-8 */
|
||||
|
|
|
@ -1597,7 +1597,8 @@ static const UConverterImpl _Bocu1Impl={
|
|||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _Bocu1StaticData={
|
||||
|
|
|
@ -20,10 +20,11 @@
|
|||
|
||||
#include "cmemory.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
#define UCNV_TILDE 0x7E /* ~ */
|
||||
#define UCNV_OPEN_BRACE 0x7B /* { */
|
||||
|
@ -635,7 +636,20 @@ _HZ_SafeClone(const UConverter *cnv,
|
|||
return &localClone->cnv;
|
||||
}
|
||||
|
||||
static void
|
||||
_HZ_GetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* the tilde '~' is hardcoded in the converter */
|
||||
uset_add(set, 0x7e);
|
||||
|
||||
/* add all of the code points that the sub-converter handles */
|
||||
((UConverterDataHZ*)cnv->extraInfo)->
|
||||
gbConverter->sharedData->impl->
|
||||
getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
|
||||
set, which, pErrorCode);
|
||||
}
|
||||
|
||||
static const UConverterImpl _HZImpl={
|
||||
|
||||
|
@ -657,7 +671,8 @@ static const UConverterImpl _HZImpl={
|
|||
NULL,
|
||||
NULL,
|
||||
_HZ_WriteSub,
|
||||
_HZ_SafeClone
|
||||
_HZ_SafeClone,
|
||||
_HZ_GetUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _HZStaticData={
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
|
@ -425,6 +426,14 @@ getTrail:
|
|||
pArgs->offsets=offsets;
|
||||
}
|
||||
|
||||
static void
|
||||
_Latin1GetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0xff);
|
||||
}
|
||||
|
||||
static const UConverterImpl _Latin1Impl={
|
||||
UCNV_LATIN_1,
|
||||
|
||||
|
@ -442,7 +451,10 @@ static const UConverterImpl _Latin1Impl={
|
|||
_Latin1GetNextUChar,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
_Latin1GetUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _Latin1StaticData={
|
||||
|
@ -713,6 +725,14 @@ _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
return 0xffff;
|
||||
}
|
||||
|
||||
static void
|
||||
_ASCIIGetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0x7f);
|
||||
}
|
||||
|
||||
static const UConverterImpl _ASCIIImpl={
|
||||
UCNV_US_ASCII,
|
||||
|
||||
|
@ -730,7 +750,10 @@ static const UConverterImpl _ASCIIImpl={
|
|||
_ASCIIGetNextUChar,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
_ASCIIGetUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _ASCIIStaticData={
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2001, International Business Machines
|
||||
* Copyright (C) 2000-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -43,6 +43,7 @@
|
|||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnvmbcs.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
@ -398,7 +399,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {
|
|||
|
||||
for(st1=0; st1<maxStage1; ++st1) {
|
||||
st2=table[st1];
|
||||
if(st2!=0) {
|
||||
if(st2>maxStage1) {
|
||||
stage2=table+st2;
|
||||
for(st2=0; st2<64; ++st2) {
|
||||
st3=stage2[st2];
|
||||
|
@ -419,7 +420,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {
|
|||
|
||||
for(st1=0; st1<maxStage1; ++st1) {
|
||||
st2=table[st1];
|
||||
if(st2!=0) {
|
||||
if(st2>(maxStage1>>1)) {
|
||||
stage2=(const uint32_t *)table+st2;
|
||||
for(st2=0; st2<64; ++st2) {
|
||||
st3=stage2[st2]&0xffff;
|
||||
|
@ -452,6 +453,105 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_MBCSGetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
UConverterMBCSTable *mbcsTable;
|
||||
const uint16_t *table;
|
||||
|
||||
uint32_t st3;
|
||||
uint16_t st1, maxStage1, st2;
|
||||
|
||||
UChar32 c;
|
||||
|
||||
if(cnv->options&_MBCS_OPTION_GB18030) {
|
||||
uset_addRange(set, 0, 0xd7ff);
|
||||
uset_addRange(set, 0xe000, 0x10ffff);
|
||||
return;
|
||||
}
|
||||
|
||||
/* enumerate the from-Unicode trie table */
|
||||
mbcsTable=&cnv->sharedData->table->mbcs;
|
||||
table=mbcsTable->fromUnicodeTable;
|
||||
if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
|
||||
maxStage1=0x440;
|
||||
} else {
|
||||
maxStage1=0x40;
|
||||
}
|
||||
|
||||
c=0; /* keep track of the current code point while enumerating */
|
||||
|
||||
if(mbcsTable->outputType==MBCS_OUTPUT_1) {
|
||||
const uint16_t *stage2, *stage3, *results;
|
||||
|
||||
results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
|
||||
|
||||
for(st1=0; st1<maxStage1; ++st1) {
|
||||
st2=table[st1];
|
||||
if(st2>maxStage1) {
|
||||
stage2=table+st2;
|
||||
for(st2=0; st2<64; ++st2) {
|
||||
if((st3=stage2[st2])!=0) {
|
||||
/* read the stage 3 block */
|
||||
stage3=results+st3;
|
||||
|
||||
/*
|
||||
* Add code points for which the roundtrip flag is set.
|
||||
* Once we get a set for fallback mappings, we have to use
|
||||
* a threshold variable with a value of 0x800.
|
||||
* See _MBCSSingleFromBMPWithOffsets() and
|
||||
* MBCS_SINGLE_RESULT_FROM_U() for details.
|
||||
*/
|
||||
do {
|
||||
if(*stage3++>=0xf00) {
|
||||
uset_add(set, c);
|
||||
}
|
||||
} while((++c&0xf)!=0);
|
||||
} else {
|
||||
c+=16; /* empty stage 3 block */
|
||||
}
|
||||
}
|
||||
} else {
|
||||
c+=1024; /* empty stage 2 block */
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const uint32_t *stage2;
|
||||
|
||||
for(st1=0; st1<maxStage1; ++st1) {
|
||||
st2=table[st1];
|
||||
if(st2>(maxStage1>>1)) {
|
||||
stage2=(const uint32_t *)table+st2;
|
||||
for(st2=0; st2<64; ++st2) {
|
||||
if((st3=stage2[st2])!=0) {
|
||||
/* get the roundtrip flags for the stage 3 block */
|
||||
st3>>=16;
|
||||
|
||||
/*
|
||||
* Add code points for which the roundtrip flag is set.
|
||||
* Once we get a set for fallback mappings, we have to check
|
||||
* non-roundtrip stage 3 results for whether they are 0.
|
||||
* See _MBCSFromUnicodeWithOffsets() for details.
|
||||
*/
|
||||
do {
|
||||
if(st3&1) {
|
||||
uset_add(set, c);
|
||||
}
|
||||
st3>>=1;
|
||||
} while((++c&0xf)!=0);
|
||||
} else {
|
||||
c+=16; /* empty stage 3 block */
|
||||
}
|
||||
}
|
||||
} else {
|
||||
c+=1024; /* empty stage 2 block */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* EBCDIC swap LF<->NL ------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
|
@ -3561,7 +3661,9 @@ static const UConverterImpl _MBCSImpl={
|
|||
|
||||
_MBCSGetStarters,
|
||||
_MBCSGetName,
|
||||
_MBCSWriteSub
|
||||
_MBCSWriteSub,
|
||||
NULL,
|
||||
_MBCSGetUnicodeSet
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -2198,7 +2198,8 @@ static const UConverterImpl _SCSUImpl={
|
|||
NULL,
|
||||
_SCSUGetName,
|
||||
_SCSUWriteSub,
|
||||
_SCSUSafeClone
|
||||
_SCSUSafeClone,
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
static const UConverterStaticData _SCSUStaticData={
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2002, International Business Machines
|
||||
* Copyright (C) 1999-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* ucnv.h:
|
||||
|
@ -51,6 +51,14 @@ typedef struct UConverter UConverter;
|
|||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/uenum.h"
|
||||
|
||||
#ifndef __USET_H__
|
||||
|
||||
/* see unicode/uset.h */
|
||||
struct USet;
|
||||
typedef struct USet USet;
|
||||
|
||||
#endif
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
|
||||
|
@ -679,6 +687,19 @@ ucnv_getStarters(const UConverter* converter,
|
|||
UBool starters[256],
|
||||
UErrorCode* err);
|
||||
|
||||
/** ### TODO @draft ICU 2.6 */
|
||||
typedef enum UConverterUnicodeSet {
|
||||
UCNV_ROUNDTRIP_SET,
|
||||
UCNV_SET_COUNT
|
||||
} UConverterUnicodeSet;
|
||||
|
||||
/** ### TODO @draft ICU 2.6 */
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Gets the current calback function used by the converter when an illegal
|
||||
* or invalid codepage sequence is found.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2001, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2003, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -16,9 +16,10 @@
|
|||
#include "cmemory.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "cintltst.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
#define MAX_LENGTH 999
|
||||
|
||||
|
@ -31,7 +32,7 @@ static int32_t gOutBufferSize = 0;
|
|||
static char gNuConvTestName[1024];
|
||||
|
||||
#define nct_min(x,y) ((x<y) ? x : y)
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
static void printSeq(const unsigned char* a, int len);
|
||||
static void printSeqErr(const unsigned char* a, int len);
|
||||
|
@ -67,6 +68,7 @@ static void TestAvailableConverters(void);
|
|||
static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/
|
||||
static void TestResetBehaviour(void);
|
||||
static void TestTruncated(void);
|
||||
static void TestUnicodeSet(void);
|
||||
|
||||
static void TestWithBufferSize(int32_t osize, int32_t isize);
|
||||
|
||||
|
@ -120,6 +122,7 @@ void addExtraTests(TestNode** root)
|
|||
addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8");
|
||||
addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32");
|
||||
addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated");
|
||||
addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet");
|
||||
}
|
||||
|
||||
/*test surrogate behaviour*/
|
||||
|
@ -1810,3 +1813,136 @@ TestTruncated() {
|
|||
doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct NameRange {
|
||||
const char *name;
|
||||
UChar32 start, end, start2, end2, notStart, notEnd;
|
||||
} NameRange;
|
||||
|
||||
static void
|
||||
TestUnicodeSet() {
|
||||
UErrorCode errorCode;
|
||||
UConverter *cnv;
|
||||
USet *set;
|
||||
const char *name;
|
||||
int32_t i, count;
|
||||
|
||||
static const char *const completeSetNames[]={
|
||||
"UTF-7",
|
||||
"UTF-8",
|
||||
"UTF-16",
|
||||
"UTF-16BE",
|
||||
"UTF-16LE",
|
||||
"UTF-32",
|
||||
"UTF-32BE",
|
||||
"UTF-32LE",
|
||||
"SCSU",
|
||||
"BOCU-1",
|
||||
"CESU-8",
|
||||
"gb18030",
|
||||
"IMAP-mailbox-name",
|
||||
"LMBCS-1",
|
||||
"LMBCS-2",
|
||||
"LMBCS-3",
|
||||
"LMBCS-4",
|
||||
"LMBCS-5",
|
||||
"LMBCS-6",
|
||||
"LMBCS-8",
|
||||
"LMBCS-11",
|
||||
"LMBCS-16",
|
||||
"LMBCS-17",
|
||||
"LMBCS-18",
|
||||
"LMBCS-19"
|
||||
};
|
||||
|
||||
static const NameRange nameRanges[]={
|
||||
{ "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
|
||||
{ "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
|
||||
{ "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
|
||||
{ "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
|
||||
{ "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
|
||||
{ "HZ", 0x410, 0x44f, 0x4e00, 0x4eff, 0xac00, 0xd7ff },
|
||||
{ "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
|
||||
};
|
||||
|
||||
/* open an empty set */
|
||||
set=uset_open(1, 0);
|
||||
|
||||
count=ucnv_countAvailable();
|
||||
for(i=0; i<count; ++i) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
name=ucnv_getAvailableName(i);
|
||||
cnv=ucnv_open(name, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: unable to open converter %s - %s\n",
|
||||
name, u_errorName(errorCode));
|
||||
continue;
|
||||
}
|
||||
|
||||
uset_clear(set);
|
||||
ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
|
||||
name, u_errorName(errorCode));
|
||||
} else if(uset_size(set)==0) {
|
||||
log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
|
||||
}
|
||||
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
|
||||
/* test converters that are known to convert all of Unicode (except maybe for surrogates) */
|
||||
for(i=0; i<LENGTHOF(completeSetNames); ++i) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
name=completeSetNames[i];
|
||||
cnv=ucnv_open(name, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: unable to open converter %s - %s\n",
|
||||
name, u_errorName(errorCode));
|
||||
continue;
|
||||
}
|
||||
|
||||
uset_clear(set);
|
||||
ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
|
||||
name, u_errorName(errorCode));
|
||||
} else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
|
||||
log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
|
||||
}
|
||||
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
|
||||
/* test specific sets */
|
||||
for(i=0; i<LENGTHOF(nameRanges); ++i) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
name=nameRanges[i].name;
|
||||
cnv=ucnv_open(name, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_data_err("error: unable to open converter %s - %s\n",
|
||||
name, u_errorName(errorCode));
|
||||
continue;
|
||||
}
|
||||
|
||||
uset_clear(set);
|
||||
ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
|
||||
name, u_errorName(errorCode));
|
||||
} else if(
|
||||
!uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
|
||||
nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)
|
||||
) {
|
||||
log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
|
||||
} else if(nameRanges[i].notStart>=0) {
|
||||
/* simulate containsAny() with the C API */
|
||||
uset_complement(set);
|
||||
if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
|
||||
log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
|
||||
}
|
||||
}
|
||||
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue