mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-9046 fix gb18030.ucm state table to accommodate 81 35 F4 37 with minimal .cnv file size
X-SVN-Rev: 31202
This commit is contained in:
parent
b19326fa13
commit
097d4ba867
1 changed files with 14 additions and 4 deletions
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2000-2011, International Business Machines Corporation and others.
|
||||
# Copyright (C) 2000-2012, International Business Machines Corporation and others.
|
||||
# All Rights Reserved.
|
||||
|
||||
# ICU codepage data for GB 18030
|
||||
|
@ -14,7 +14,9 @@
|
|||
# Note that the entire block for the supplementary Unicode planes is
|
||||
# marked unassigned because they are handled algorithmically.
|
||||
# Similarly, some of the BMP mappings are marked as unassigned for the same reason.
|
||||
# See http://userguide.icu-project.org/conversion/data#TOC-State-table-syntax-in-.ucm-files
|
||||
|
||||
# States 0..2:
|
||||
# Mostly assigned sequences, with branches in the lead bytes
|
||||
# The second <icu:state> line is commented out (and does not count)
|
||||
# because the state table is hand-optimized and does not use what would be
|
||||
|
@ -24,19 +26,27 @@
|
|||
<icu:state> 81-fe:2
|
||||
<icu:state> 30-39
|
||||
|
||||
# All-unassigned 4-byte sequences
|
||||
# States 3..5: All-unassigned 4-byte sequences.
|
||||
# Do not change these states, or else the conversion table will grow significantly.
|
||||
<icu:state> 30-39:4, 40-7e, 80-fe
|
||||
<icu:state> 81-fe:5
|
||||
<icu:state> 30-36.u, 37, 38-39.u
|
||||
<icu:state> 30-39.u
|
||||
|
||||
# States 6..9:
|
||||
# Some unassigned 4-byte sequences, one state for each of the lead bytes 81-84
|
||||
# Each of these states branch on the second of four bytes; for the third and fourth bytes,
|
||||
# unassigned sequences continue with state 5, assigned ones with state 2
|
||||
<icu:state> 30:1, 31-35:4, 36-39:1, 40-7e, 80-fe
|
||||
<icu:state> 30:1, 31-34:4, 35:a, 36-39:1, 40-7e, 80-fe
|
||||
<icu:state> 30-35:1, 36-39:4, 40-7e, 80-fe
|
||||
<icu:state> 30-35:4, 36:1, 37-39:4, 40-7e, 80-fe
|
||||
<icu:state> 30-31:1, 32-39:4, 40-7e, 80-fe
|
||||
|
||||
# State 0xa=10, reached from 81 35: Handle the new mapping U+E7C7 <-> 81 35 F4 37
|
||||
# (see changes between revisions 25802 and 29863),
|
||||
# allow mappings for 81 35 F4 zz,
|
||||
# but otherwise keep 81 35 xx yy going to "unassigned" states.
|
||||
<icu:state> 81-fe:5, f4:2
|
||||
|
||||
# GB 18030 BMP mappings that are not handled algorithmically are
|
||||
# generated using gbmake4 and gbtoucm tools. Please see charset/source/gb18030/gb18030.html
|
||||
# or http://source.icu-project.org/repos/icu/data/trunk/charset/source/gb18030/gb18030.html
|
||||
|
|
Loading…
Add table
Reference in a new issue