ICU-6001 fix ucnv_getUnicodeSet(LMBCS)

X-SVN-Rev: 22850
This commit is contained in:
Markus Scherer 2007-10-25 05:17:43 +00:00
parent b90b809ea1
commit 49868a5a68
3 changed files with 19 additions and 12 deletions

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2000-2006, International Business Machines
* Copyright (C) 2000-2007, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_lmb.cpp
@ -536,7 +536,7 @@ static const UConverterImpl _LMBCSImpl##n={\
NULL,\
NULL,\
_LMBCSSafeClone,\
_LMBCSGetUnicodeSet\
ucnv_getCompleteUnicodeSet\
};\
static const UConverterStaticData _LMBCSStaticData##n={\
sizeof(UConverterStaticData),\
@ -662,15 +662,14 @@ _LMBCSSafeClone(const UConverter *cnv,
return &newLMBCS->cnv;
}
static void
_LMBCSGetUnicodeSet(const UConverter *cnv,
const USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
/* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
sa->addRange(sa->set, 0, 0xf5ff);
sa->addRange(sa->set, 0xf700, 0x10ffff);
}
/*
* There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117)
* which added all code points except for U+F6xx
* because those cannot be represented in the Unicode group.
* However, it turns out that windows-950 has roundtrips for all of U+F6xx
* which means that LMBCS can convert all Unicode code points after all.
* We now simply use ucnv_getCompleteUnicodeSet().
*/
/*
Here's the basic helper function that we use when converting from

View file

@ -541,7 +541,7 @@ ConversionTest::TestGetUnicodeSet2() {
"JIS7",
"ISO-2022-CN",
"ISO-2022-CN-EXT",
// "LMBCS" TODO(markus): known bug, the fallback set is said to be missing [\uF600-\uF6FF]
"LMBCS"
};
char buffer[1024];
int32_t i;

View file

@ -1365,6 +1365,14 @@ conversion:table(nofallback) {
:int{0}
}
// LMBCS
{
"LMBCS",
"[\x00-\U0010ffff]",
"[]",
:int{0}
}
// DBCS-only
{
"ibm-971",