ICU-942 fully handle <subchar1> and |2 in converters

X-SVN-Rev: 13522
This commit is contained in:
Markus Scherer 2003-10-29 03:18:21 +00:00
parent aefaa06057
commit 4614bfe102
4 changed files with 51 additions and 4 deletions

View file

@ -709,7 +709,7 @@ static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize )
0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,};
const UChar expectedUnicode[] =
{ 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd};
0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd};
int32_t fromtest4Offs[] =
{ 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,};

View file

@ -136,6 +136,42 @@ conversion {
fromUnicode {
Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
Cases {
// <subchar1> from |2 mappings
{
"ibm-1390",
"\x0e\x0f\u0901\U00050000\uffe8\uffee",
:bin{ 3f3f0efefefefe0f3f3f },
:intvector{ 0, 1, 2, 2, 2, 3, 3, 5, 5, 6 },
:int{1}, :int{1}, "", "?", ""
}
// <subchar1> from |2 mappings, and also contains a fallback to 00
{
"*test4",
"\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
:bin{ 0000e10102030affff },
:intvector{ 0, 1, 2, 4, 4, 4, 4, 6, 8 },
:int{1}, :int{1}, "", "?", ""
}
// setting a <subchar> resets the <subchar1>
{
"*test4",
"\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
:bin{ 00000102030f0102030a0102030f0102030f },
:intvector{ 0, 1, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8 },
:int{1}, :int{1}, "", "?\x00\x01\x02\x03\x0f", ""
}
// fallback to 00 with old single-byte data structure
{
"*test1",
"\u20ac\u20ad\U00101234\U00050000",
:bin{ 000007ff },
:intvector{ 0, 1, 2, 4 },
:int{1}, :int{1}, "", "?", ""
}
// extensions
{
"ibm-1390",

View file

@ -6,7 +6,6 @@
# test1.ucm
#
# Test file for MBCS conversion with single-byte codepage data.
# Also contains extension mappings (m:n).
<code_set_name> "test1"
<mb_cur_max> 1
@ -19,6 +18,7 @@ CHARMAP
# fromUnicode result is zero byte from other than U+0000
<U20ac> \x00 |0
<U20ad> \x00 |1
# nothing special
<U0005> \x05 |0

View file

@ -11,17 +11,25 @@
<mb_cur_max> 4
<mb_cur_min> 1
<uconv_class> "MBCS"
# both subchars are single-byters, which does not make sense
# but works - adding subchar1 for tests but don't want to
# change old tests for a new subchar -- markus 20031028
<subchar> \xff
<icu:state> 0, 1:1, 5-9, ff
<subchar1> \xe1
<icu:state> 0, 1:1, 5-9, e1, ff
<icu:state> 2:2
<icu:state> 3:3
<icu:state> a-f.p
<icu:state> a-f.p, ff
CHARMAP
# fromUnicode result is zero byte from other than U+0000
<U20ac> \x00 |0
# fallback from non-zero to zero possible with extension table
<U20ad> \x00 |1
# nothing special
<U0005> \x05 |0
@ -42,4 +50,7 @@ CHARMAP
<U000e> \x01\x02\x03\x0e |3
#unassigned \x01\x02\x03\x0f
# <subchar1> non-mapping
<U50005> \xe1 |2
END CHARMAP