mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-942 fully handle <subchar1> and |2 in converters
X-SVN-Rev: 13522
This commit is contained in:
parent
aefaa06057
commit
4614bfe102
4 changed files with 51 additions and 4 deletions
|
@ -709,7 +709,7 @@ static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize )
|
|||
0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,};
|
||||
const UChar expectedUnicode[] =
|
||||
{ 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
|
||||
0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd};
|
||||
0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd};
|
||||
int32_t fromtest4Offs[] =
|
||||
{ 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,};
|
||||
|
||||
|
|
36
icu4c/source/test/testdata/conversion.txt
vendored
36
icu4c/source/test/testdata/conversion.txt
vendored
|
@ -136,6 +136,42 @@ conversion {
|
|||
fromUnicode {
|
||||
Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
|
||||
Cases {
|
||||
// <subchar1> from |2 mappings
|
||||
{
|
||||
"ibm-1390",
|
||||
"\x0e\x0f\u0901\U00050000\uffe8\uffee",
|
||||
:bin{ 3f3f0efefefefe0f3f3f },
|
||||
:intvector{ 0, 1, 2, 2, 2, 3, 3, 5, 5, 6 },
|
||||
:int{1}, :int{1}, "", "?", ""
|
||||
}
|
||||
|
||||
// <subchar1> from |2 mappings, and also contains a fallback to 00
|
||||
{
|
||||
"*test4",
|
||||
"\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
|
||||
:bin{ 0000e10102030affff },
|
||||
:intvector{ 0, 1, 2, 4, 4, 4, 4, 6, 8 },
|
||||
:int{1}, :int{1}, "", "?", ""
|
||||
}
|
||||
|
||||
// setting a <subchar> resets the <subchar1>
|
||||
{
|
||||
"*test4",
|
||||
"\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
|
||||
:bin{ 00000102030f0102030a0102030f0102030f },
|
||||
:intvector{ 0, 1, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8 },
|
||||
:int{1}, :int{1}, "", "?\x00\x01\x02\x03\x0f", ""
|
||||
}
|
||||
|
||||
// fallback to 00 with old single-byte data structure
|
||||
{
|
||||
"*test1",
|
||||
"\u20ac\u20ad\U00101234\U00050000",
|
||||
:bin{ 000007ff },
|
||||
:intvector{ 0, 1, 2, 4 },
|
||||
:int{1}, :int{1}, "", "?", ""
|
||||
}
|
||||
|
||||
// extensions
|
||||
{
|
||||
"ibm-1390",
|
||||
|
|
2
icu4c/source/test/testdata/test1.ucm
vendored
2
icu4c/source/test/testdata/test1.ucm
vendored
|
@ -6,7 +6,6 @@
|
|||
# test1.ucm
|
||||
#
|
||||
# Test file for MBCS conversion with single-byte codepage data.
|
||||
# Also contains extension mappings (m:n).
|
||||
|
||||
<code_set_name> "test1"
|
||||
<mb_cur_max> 1
|
||||
|
@ -19,6 +18,7 @@ CHARMAP
|
|||
|
||||
# fromUnicode result is zero byte from other than U+0000
|
||||
<U20ac> \x00 |0
|
||||
<U20ad> \x00 |1
|
||||
|
||||
# nothing special
|
||||
<U0005> \x05 |0
|
||||
|
|
15
icu4c/source/test/testdata/test4.ucm
vendored
15
icu4c/source/test/testdata/test4.ucm
vendored
|
@ -11,17 +11,25 @@
|
|||
<mb_cur_max> 4
|
||||
<mb_cur_min> 1
|
||||
<uconv_class> "MBCS"
|
||||
|
||||
# both subchars are single-byters, which does not make sense
|
||||
# but works - adding subchar1 for tests but don't want to
|
||||
# change old tests for a new subchar -- markus 20031028
|
||||
<subchar> \xff
|
||||
<icu:state> 0, 1:1, 5-9, ff
|
||||
<subchar1> \xe1
|
||||
<icu:state> 0, 1:1, 5-9, e1, ff
|
||||
<icu:state> 2:2
|
||||
<icu:state> 3:3
|
||||
<icu:state> a-f.p
|
||||
<icu:state> a-f.p, ff
|
||||
|
||||
CHARMAP
|
||||
|
||||
# fromUnicode result is zero byte from other than U+0000
|
||||
<U20ac> \x00 |0
|
||||
|
||||
# fallback from non-zero to zero possible with extension table
|
||||
<U20ad> \x00 |1
|
||||
|
||||
# nothing special
|
||||
<U0005> \x05 |0
|
||||
|
||||
|
@ -42,4 +50,7 @@ CHARMAP
|
|||
<U000e> \x01\x02\x03\x0e |3
|
||||
#unassigned \x01\x02\x03\x0f
|
||||
|
||||
# <subchar1> non-mapping
|
||||
<U50005> \xe1 |2
|
||||
|
||||
END CHARMAP
|
||||
|
|
Loading…
Add table
Reference in a new issue