mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-22420 GB18030 change 3 mappings for GBK/web compat
This commit is contained in:
parent
87fe057838
commit
c670bbd5b0
4 changed files with 29 additions and 2 deletions
|
@ -5,6 +5,12 @@
|
|||
|
||||
# ICU codepage data for GB 18030-2022
|
||||
|
||||
# This data file was originally generated from the mapping tables
|
||||
# published with the original (year 2000) GB18030 standard.
|
||||
# It has been updated for the 2005 version of GB18030 (ICU-8274 & ICU-8427)
|
||||
# and for the 2022 version (ICU-22357).
|
||||
# ICU-22420 then made minor mapping changes for GBK and web data/WHATWG compatibility.
|
||||
|
||||
<code_set_name> "gb18030-2022"
|
||||
<char_name_mask> "AXXXX"
|
||||
<mb_cur_max> 4
|
||||
|
@ -23,7 +29,8 @@
|
|||
# The second <icu:state> line is commented out (and does not count)
|
||||
# because the state table is hand-optimized and does not use what would be
|
||||
# the natural path for the encoding scheme.
|
||||
<icu:state> 0-7f, 81:6, 82:7, 83:8, 84:9, 85-fe:3
|
||||
# ICU-22420 makes 0x80 valid for the GBK encoding of the Euro sign.
|
||||
<icu:state> 0-80, 81:6, 82:7, 83:8, 84:9, 85-fe:3
|
||||
# <icu:state> 30-39:2, 40-7e, 80-fe
|
||||
<icu:state> 81-fe:2
|
||||
<icu:state> 30-39
|
||||
|
@ -56,6 +63,18 @@
|
|||
|
||||
CHARMAP
|
||||
|
||||
# ICU-22420 reverse fallbacks for compatibility with GBK and other web data as in WHATWG.
|
||||
# U+20AC = EURO SIGN (normally \xA2\xE3)
|
||||
# U+3000 = IDEOGRAPHIC SPACE (normally \xA1\xA1)
|
||||
#
|
||||
# PUA U+E5E5 used to round-trip to \xA3\xA0, as specified in GB18030.
|
||||
# Now that \xA3\xA0 maps to U+3000 (“reverse fallback” mapping),
|
||||
# we use a “good one-way” mapping from U+E5E5 to \xA3\xA0
|
||||
# for maximum compatibility with previous behavior.
|
||||
<U20AC> \x80 |3
|
||||
<U3000> \xA3\xA0 |3
|
||||
<UE5E5> \xA3\xA0 |4
|
||||
|
||||
<U0000> \x00 |0
|
||||
<U0001> \x01 |0
|
||||
<U0002> \x02 |0
|
||||
|
@ -29602,7 +29621,7 @@ CHARMAP
|
|||
<UE5E2> \xA3\x9D |0
|
||||
<UE5E3> \xA3\x9E |0
|
||||
<UE5E4> \xA3\x9F |0
|
||||
<UE5E5> \xA3\xA0 |0
|
||||
# <UE5E5> \xA3\xA0 |0
|
||||
<UE5E6> \xA4\x40 |0
|
||||
<UE5E7> \xA4\x41 |0
|
||||
<UE5E8> \xA4\x42 |0
|
||||
|
|
8
icu4c/source/test/testdata/conversion.txt
vendored
8
icu4c/source/test/testdata/conversion.txt
vendored
|
@ -115,6 +115,14 @@ conversion:table(nofallback) {
|
|||
:intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17,18,20 },
|
||||
:int{1}, :int{0}, "", "&C", :bin{""}
|
||||
}
|
||||
// GB18030: ICU-22420 adds two reverse fallbacks
|
||||
{
|
||||
"gb18030",
|
||||
:bin{ 80a1a1a2e3a3a0 },
|
||||
"\u20AC\u3000\u20AC\u3000",
|
||||
:intvector{ 0,1,3,5 },
|
||||
:int{1}, :int{0}, "", "&C", :bin{""}
|
||||
}
|
||||
{
|
||||
"UTF-8",
|
||||
:bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a },
|
||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Add table
Reference in a new issue