mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-271 First-pass LMBCS support (Lotus Multi-byte Character set)
X-SVN-Rev: 1042
This commit is contained in:
parent
bd899dc9a3
commit
2dbef4e207
13 changed files with 2220 additions and 7 deletions
icu4c
data
source
common
data/mappings
test/cintltst
tools/makeconv
|
@ -47,7 +47,19 @@ UTF16_PlatformEndian iso-10646-ucs-2 csUnicode utf16 utf-16 ibm-1200 ibm1200
|
|||
UTF16_OppositeEndian
|
||||
LATIN_1 iso-8859-1 iso_8859-1 ibm-819 ibm819 cp819 latin1 latin-1 ascii ascii-7 us-ascii 8859-1 csisolatin1 iso-ir-100 iso_8859-1:1978 #!!!!! There's whole lot of names for this - cp367 csASCII etc.
|
||||
ISO_2022 iso-2022 2022 cp2022 iso2022 iso_2022
|
||||
LMBCS
|
||||
LMBCS-1 lmbcs
|
||||
LMBCS-2
|
||||
LMBCS-3
|
||||
LMBCS-4
|
||||
LMBCS-5
|
||||
LMBCS-6
|
||||
LMBCS-8
|
||||
LMBCS-11
|
||||
LMBCS-16
|
||||
LMBCS-17
|
||||
LMBCS-18
|
||||
LMBCS-19
|
||||
|
||||
|
||||
# Table-based
|
||||
|
||||
|
@ -76,10 +88,13 @@ ibm-1383 euc-cn euccn ibm-eucCN # China EUC
|
|||
#ibm-1162 tis-620 cp874 windows-874 ms874 # Thai (w/ euro support) #what is the connection between this and the one below!!!
|
||||
ibm-874 ibm-1161 #same as 1162 (w/o euro update) ***This is commented out in Helena's
|
||||
|
||||
lmb-excp # special exceptions list for LMBCS algorithm
|
||||
|
||||
# Platform codepages
|
||||
ibm-437 ibm437 cp437 csPC8CodePage437 437 # PC US
|
||||
# HSYS:
|
||||
ibm-850 IBM850 cp850 850 csPC850Multilingual # PC latin1
|
||||
ibm-851 IBM851 cp851 851 csPC851 # PC DOS Greek (no euro)
|
||||
ibm-858 ibm858 cp858 # PC latin1 with Euro cp850 removed
|
||||
ibm-9044 IBM852 852 csPCp852 cp852 # PC latin2 (w/ euro update) #where should the names go here or below - inconsistency!!!
|
||||
ibm-852 # PC latin2 (w/o euro update)
|
||||
|
|
285
icu4c/data/ibm-851.ucm
Normal file
285
icu4c/data/ibm-851.ucm
Normal file
|
@ -0,0 +1,285 @@
|
|||
# ******************************************************************************
|
||||
# *
|
||||
# * Copyright (C) 1995-2000, International Business Machines
|
||||
# * Corporation and others. All Rights Reserved.
|
||||
# *
|
||||
# ******************************************************************************
|
||||
#
|
||||
# File created on Fri Feb 11 14:11:00 2000
|
||||
#
|
||||
# File created manually
|
||||
# from source files IBM-851.TXMAP100
|
||||
#
|
||||
# Table Version : 1.00
|
||||
#
|
||||
<code_set_name> "IBM-851"
|
||||
<char_name_mask> "AXXXX"
|
||||
<mb_cur_max> 1
|
||||
<mb_cur_min> 1
|
||||
<uconv_class> "SBCS"
|
||||
<subchar> \x7F
|
||||
#
|
||||
CHARMAP
|
||||
#
|
||||
#
|
||||
#ISO 10646 IBM-851
|
||||
#_________ _________
|
||||
<U0000> \x00 # ..NUL...
|
||||
<U0001> \x01 # ..SOH...
|
||||
<U0002> \x02 # ..STX...
|
||||
<U0003> \x03 # ..ETX...
|
||||
<U0004> \x04 # ..EOT...
|
||||
<U0005> \x05 # ..ENQ...
|
||||
<U0006> \x06 # ..ACK...
|
||||
<U0007> \x07 # ..BEL...
|
||||
<U0008> \x08 # ...BS...
|
||||
<U0009> \x09 # ...HT...
|
||||
<U000A> \x0A # ...LF...
|
||||
<U000B> \x0B # ...VT...
|
||||
<U000C> \x0C # ...FF...
|
||||
<U000D> \x0D # ...CR...
|
||||
<U000E> \x0E # .SO/LS1.
|
||||
<U000F> \x0F # .SI/LS0.
|
||||
<U0010> \x10 # ..DLE...
|
||||
<U0011> \x11 # ..DC1...
|
||||
<U0012> \x12 # ..DC2...
|
||||
<U0013> \x13 # ..DC3...
|
||||
<U0014> \x14 # ..DC4...
|
||||
<U0015> \x15 # ..NAK...
|
||||
<U0016> \x16 # ..SYN...
|
||||
<U0017> \x17 # ..ETB...
|
||||
<U0018> \x18 # ..CAN...
|
||||
<U0019> \x19 # ...EM...
|
||||
<U001C> \x1A # ..IFS...
|
||||
<U001B> \x1B # ..ESC...
|
||||
<U007F> \x1C # ..DEL...
|
||||
<U001D> \x1D # ...GS...
|
||||
<U001E> \x1E # ...RS...
|
||||
<U001F> \x1F # ...US...
|
||||
<U0020> \x20 # SP010000
|
||||
<U0021> \x21 # SP020000
|
||||
<U0022> \x22 # SP040000
|
||||
<U0023> \x23 # SM010000
|
||||
<U0024> \x24 # SC030000
|
||||
<U0025> \x25 # SM020000
|
||||
<U0026> \x26 # SM030000
|
||||
<U0027> \x27 # SP050000
|
||||
<U0028> \x28 # SP060000
|
||||
<U0029> \x29 # SP070000
|
||||
<U002A> \x2A # SM040000
|
||||
<U002B> \x2B # SA010000
|
||||
<U002C> \x2C # SP080000
|
||||
<U002D> \x2D # SP100000
|
||||
<U002E> \x2E # SP110000
|
||||
<U002F> \x2F # SP120000
|
||||
<U0030> \x30 # ND100000
|
||||
<U0031> \x31 # ND010000
|
||||
<U0032> \x32 # ND020000
|
||||
<U0033> \x33 # ND030000
|
||||
<U0034> \x34 # ND040000
|
||||
<U0035> \x35 # ND050000
|
||||
<U0036> \x36 # ND060000
|
||||
<U0037> \x37 # ND070000
|
||||
<U0038> \x38 # ND080000
|
||||
<U0039> \x39 # ND090000
|
||||
<U003A> \x3A # SP130000
|
||||
<U003B> \x3B # SP140000
|
||||
<U003C> \x3C # SA030000
|
||||
<U003D> \x3D # SA040000
|
||||
<U003E> \x3E # SA050000
|
||||
<U003F> \x3F # SP150000
|
||||
<U0040> \x40 # SM050000
|
||||
<U0041> \x41 # LA020000
|
||||
<U0042> \x42 # LB020000
|
||||
<U0043> \x43 # LC020000
|
||||
<U0044> \x44 # LD020000
|
||||
<U0045> \x45 # LE020000
|
||||
<U0046> \x46 # LF020000
|
||||
<U0047> \x47 # LG020000
|
||||
<U0048> \x48 # LH020000
|
||||
<U0049> \x49 # LI020000
|
||||
<U004A> \x4A # LJ020000
|
||||
<U004B> \x4B # LK020000
|
||||
<U004C> \x4C # LL020000
|
||||
<U004D> \x4D # LM020000
|
||||
<U004E> \x4E # LN020000
|
||||
<U004F> \x4F # LO020000
|
||||
<U0050> \x50 # LP020000
|
||||
<U0051> \x51 # LQ020000
|
||||
<U0052> \x52 # LR020000
|
||||
<U0053> \x53 # LS020000
|
||||
<U0054> \x54 # LT020000
|
||||
<U0055> \x55 # LU020000
|
||||
<U0056> \x56 # LV020000
|
||||
<U0057> \x57 # LW020000
|
||||
<U0058> \x58 # LX020000
|
||||
<U0059> \x59 # LY020000
|
||||
<U005A> \x5A # LZ020000
|
||||
<U005B> \x5B # SM060000
|
||||
<U005C> \x5C # SM070000
|
||||
<U005D> \x5D # SM080000
|
||||
<U005E> \x5E # SD150000
|
||||
<U005F> \x5F # SP090000
|
||||
<U0060> \x60 # SD130000
|
||||
<U0061> \x61 # LA010000
|
||||
<U0062> \x62 # LB010000
|
||||
<U0063> \x63 # LC010000
|
||||
<U0064> \x64 # LD010000
|
||||
<U0065> \x65 # LE010000
|
||||
<U0066> \x66 # LF010000
|
||||
<U0067> \x67 # LG010000
|
||||
<U0068> \x68 # LH010000
|
||||
<U0069> \x69 # LI010000
|
||||
<U006A> \x6A # LJ010000
|
||||
<U006B> \x6B # LK010000
|
||||
<U006C> \x6C # LL010000
|
||||
<U006D> \x6D # LM010000
|
||||
<U006E> \x6E # LN010000
|
||||
<U006F> \x6F # LO010000
|
||||
<U0070> \x70 # LP010000
|
||||
<U0071> \x71 # LQ010000
|
||||
<U0072> \x72 # LR010000
|
||||
<U0073> \x73 # LS010000
|
||||
<U0074> \x74 # LT010000
|
||||
<U0075> \x75 # LU010000
|
||||
<U0076> \x76 # LV010000
|
||||
<U0077> \x77 # LW010000
|
||||
<U0078> \x78 # LX010000
|
||||
<U0079> \x79 # LY010000
|
||||
<U007A> \x7A # LZ010000
|
||||
<U007B> \x7B # SM110000
|
||||
<U007C> \x7C # SM130000
|
||||
<U007D> \x7D # SM140000
|
||||
<U007E> \x7E # SD190000
|
||||
<U001A> \x7F # ..SUB...
|
||||
<U00C7> \x80 # LC420000
|
||||
<U00FC> \x81 # LU170000
|
||||
<U00E9> \x82 # LE110000
|
||||
<U00E2> \x83 # LA150000
|
||||
<U00E4> \x84 # LA170000
|
||||
<U00E0> \x85 # LA130000
|
||||
<U0386> \x86 # GA120000
|
||||
<U00E7> \x87 # LC410000
|
||||
<U00EA> \x88 # LE150000
|
||||
<U00EB> \x89 # LE170000
|
||||
<U00E8> \x8A # LE130000
|
||||
<U00EF> \x8B # LI170000
|
||||
<U00EE> \x8C # LI150000
|
||||
<U0388> \x8D # GE120000
|
||||
<U00C4> \x8E # LA180000
|
||||
<U0389> \x8F # GE720000
|
||||
<U038A> \x90 # GI120000
|
||||
<U038C> \x92 # GO120000
|
||||
<U00F4> \x93 # LO150000
|
||||
<U00F6> \x94 # LO170000
|
||||
<U038E> \x95 # GU120000
|
||||
<U00FB> \x96 # LU150000
|
||||
<U00F9> \x97 # LU130000
|
||||
<U038F> \x98 # GO720000
|
||||
<U00D6> \x99 # LO180000
|
||||
<U00DC> \x9A # LU180000
|
||||
<U03AC> \x9B # GA110000
|
||||
<U00A3> \x9C # SC020000
|
||||
<U03AD> \x9D # GE110000
|
||||
<U03AE> \x9E # GE710000
|
||||
<U03AF> \x9F # GI110000
|
||||
<U03CA> \xA0 # GI170000
|
||||
<U0390> \xA1 # GI730000
|
||||
<U03CC> \xA2 # GO110000
|
||||
<U03CD> \xA3 # GU110000
|
||||
<U0391> \xA4 # GA020000
|
||||
<U0392> \xA5 # GB020000
|
||||
<U0393> \xA6 # GG020000
|
||||
<U0394> \xA7 # GD020000
|
||||
<U0395> \xA8 # GE020000
|
||||
<U0396> \xA9 # GZ020000
|
||||
<U0397> \xAA # GE320000
|
||||
<U00BD> \xAB # NF010000
|
||||
<U0398> \xAC # GT620000
|
||||
<U0399> \xAD # GI020000
|
||||
<U00AB> \xAE # SP170000
|
||||
<U00BB> \xAF # SP180000
|
||||
<U2591> \xB0 # SF140000
|
||||
<U2592> \xB1 # SF150000
|
||||
<U2593> \xB2 # SF160000
|
||||
<U2502> \xB3 # SF110000
|
||||
<U2524> \xB4 # SF090000
|
||||
<U039A> \xB5 # GK020000
|
||||
<U039B> \xB6 # GL020000
|
||||
<U039C> \xB7 # GM020000
|
||||
<U039D> \xB8 # GN020000
|
||||
<U2563> \xB9 # SF230000
|
||||
<U2551> \xBA # SF240000
|
||||
<U2557> \xBB # SF250000
|
||||
<U255D> \xBC # SF260000
|
||||
<U039E> \xBD # GX020000
|
||||
<U039F> \xBE # GO020000
|
||||
<U2510> \xBF # SF030000
|
||||
<U2514> \xC0 # SF020000
|
||||
<U2534> \xC1 # SF070000
|
||||
<U252C> \xC2 # SF060000
|
||||
<U251C> \xC3 # SF080000
|
||||
<U2500> \xC4 # SF100000
|
||||
<U253C> \xC5 # SF050000
|
||||
<U03A0> \xC6 # GP020000
|
||||
<U03A1> \xC7 # GR020000
|
||||
<U255A> \xC8 # SF380000
|
||||
<U2554> \xC9 # SF390000
|
||||
<U2569> \xCA # SF400000
|
||||
<U2566> \xCB # SF410000
|
||||
<U2560> \xCC # SF420000
|
||||
<U2550> \xCD # SF430000
|
||||
<U256C> \xCE # SF440000
|
||||
<U03A3> \xCF # GS020000
|
||||
<U03A4> \xD0 # GT020000
|
||||
<U03A5> \xD1 # GU020000
|
||||
<U03A6> \xD2 # GF020000
|
||||
<U03A7> \xD3 # GH020000
|
||||
<U03A8> \xD4 # GP620000
|
||||
<U03A9> \xD5 # GO320000
|
||||
<U03B1> \xD6 # GA010000
|
||||
<U03B2> \xD7 # GB010000
|
||||
<U03B3> \xD8 # GG010000
|
||||
<U2518> \xD9 # SF040000
|
||||
<U250C> \xDA # SF010000
|
||||
<U2588> \xDB # SF610000
|
||||
<U2584> \xDC # SF570000
|
||||
<U03B4> \xDD # GD010000
|
||||
<U03B5> \xDE # GE010000
|
||||
<U2580> \xDF # SF600000
|
||||
<U03B6> \xE0 # GZ010000
|
||||
<U03B7> \xE1 # GE310000
|
||||
<U03B8> \xE2 # GT610000
|
||||
<U03B9> \xE3 # GI010000
|
||||
<U03BA> \xE4 # GK010000
|
||||
<U03BB> \xE5 # GL010000
|
||||
<U03BC> \xE6 # GM010000
|
||||
<U03BD> \xE7 # GN010000
|
||||
<U03BE> \xE8 # GX010000
|
||||
<U03BF> \xE9 # GO010000
|
||||
<U03C0> \xEA # GP010000
|
||||
<U03C1> \xEB # GR010000
|
||||
<U03C3> \xEC # GS010000
|
||||
<U03C2> \xED # GS610000
|
||||
<U03C4> \xEE # GT010000
|
||||
<U00B4> \xEF # SD110000
|
||||
<U00AD> \xF0 # SP320000
|
||||
<U00B1> \xF1 # SA020000
|
||||
<U03C5> \xF2 # GU010000
|
||||
<U03C6> \xF3 # GF010000
|
||||
<U03C7> \xF4 # GH010000
|
||||
<U00A7> \xF5 # SM240000
|
||||
<U03C8> \xF6 # GP610000
|
||||
<U00B8> \xF7 # SD410000
|
||||
<U00B0> \xF8 # SM190000
|
||||
<U00A8> \xF9 # SD170000
|
||||
<U03C9> \xFA # GO310000
|
||||
<U03CB> \xFB # GU170000
|
||||
<U03B0> \xFC # GU730000
|
||||
<U03CE> \xFD # GO710000
|
||||
<U25A0> \xFE # SM470000
|
||||
<U00A0> \xFF # SP300000
|
||||
#
|
||||
END CHARMAP
|
||||
#
|
||||
#________________________________________________________________________
|
315
icu4c/data/lmb-excp.ucm
Normal file
315
icu4c/data/lmb-excp.ucm
Normal file
|
@ -0,0 +1,315 @@
|
|||
# *******************************************************************************
|
||||
# *
|
||||
# * Copyright (C) 1995-2000, International Business Machines
|
||||
# * Corporation and others. All Rights Reserved.
|
||||
# *
|
||||
# *******************************************************************************
|
||||
#
|
||||
# File created on Thu Feb 10 11:47:54 2000
|
||||
#
|
||||
# File created manually from source file LMBCS.ALL
|
||||
#
|
||||
# Table Version : 1.00
|
||||
#
|
||||
<code_set_name> "lmb-excp"
|
||||
<char_name_mask> "AXXXX"
|
||||
<mb_cur_max> 2
|
||||
<mb_cur_min> 1
|
||||
<uconv_class> "MBCS"
|
||||
<subchar> \x3F
|
||||
#
|
||||
CHARMAP
|
||||
#
|
||||
#
|
||||
#ISO 10646 LMBCS
|
||||
#_________ _________
|
||||
<U0027> \x01\x27
|
||||
<U005E> \x01\x23
|
||||
<U005E> \x01\x33
|
||||
<U005E> \x01\x6D
|
||||
<U0060> \x01\x24
|
||||
<U0060> \x01\x34
|
||||
<U007E> \x01\x21
|
||||
<U007E> \x01\x31
|
||||
<U007E> \x01\x6C
|
||||
<U00A0> \x01\x3B
|
||||
<U00A7> \x01\x15
|
||||
<U00A8> \x01\x20
|
||||
<U00A8> \x01\x30
|
||||
<U00AF> \x01\x67
|
||||
<U00B4> \x01\x25
|
||||
<U00B4> \x01\x35
|
||||
<U00B6> \x01\x14
|
||||
<U0100> \x06\x2E
|
||||
<U0101> \x06\x01
|
||||
<U0108> \x06\x02
|
||||
<U0109> \x06\x03
|
||||
<U010A> \x06\x04
|
||||
<U010B> \x06\x05
|
||||
<U0112> \x06\x06
|
||||
<U0113> \x06\x07
|
||||
<U0116> \x06\x08
|
||||
<U0117> \x06\x09
|
||||
<U011C> \x06\x0A
|
||||
<U011D> \x06\x0B
|
||||
<U0120> \x06\x0C
|
||||
<U0121> \x06\x0D
|
||||
<U0122> \x06\x0E
|
||||
<U0123> \x06\x0F
|
||||
<U0124> \x06\x10
|
||||
<U0125> \x06\x11
|
||||
<U0126> \x01\x72
|
||||
<U0127> \x01\x73
|
||||
<U0128> \x06\x12
|
||||
<U0129> \x06\x13
|
||||
<U012A> \x06\x14
|
||||
<U012B> \x06\x15
|
||||
<U012E> \x06\x16
|
||||
<U012F> \x06\x17
|
||||
<U0132> \x01\x61
|
||||
<U0133> \x01\x60
|
||||
<U0134> \x06\x18
|
||||
<U0135> \x06\x19
|
||||
<U0136> \x06\x1A
|
||||
<U0137> \x06\x1B
|
||||
<U0138> \x01\x7A
|
||||
<U013B> \x06\x1C
|
||||
<U013C> \x06\x1D
|
||||
<U013F> \x01\x66
|
||||
<U0140> \x01\x65
|
||||
<U0145> \x06\x1E
|
||||
<U0146> \x06\x1F
|
||||
<U0149> \x01\x64
|
||||
<U014A> \x01\x78
|
||||
<U014B> \x01\x79
|
||||
<U014C> \x06\x20
|
||||
<U014D> \x06\x21
|
||||
<U0152> \x01\x40
|
||||
<U0153> \x01\x41
|
||||
<U0156> \x06\x22
|
||||
<U0157> \x06\x23
|
||||
<U015C> \x06\x24
|
||||
<U015D> \x06\x25
|
||||
<U0166> \x01\x74
|
||||
<U0167> \x01\x75
|
||||
<U0168> \x06\x26
|
||||
<U0169> \x06\x27
|
||||
<U016A> \x06\x28
|
||||
<U016B> \x06\x29
|
||||
<U016C> \x06\x2A
|
||||
<U016D> \x06\x2B
|
||||
<U0172> \x06\x2C
|
||||
<U0173> \x06\x2D
|
||||
<U0178> \x01\x42
|
||||
<U02BC> \x02\x07
|
||||
<U02BD> \x02\x08
|
||||
<U02C7> \x01\x6B
|
||||
<U02D8> \x01\x68
|
||||
<U02D9> \x01\x43
|
||||
<U02DA> \x01\x22
|
||||
<U02DA> \x01\x32
|
||||
<U02DA> \x01\x44
|
||||
<U02DB> \x01\x6A
|
||||
<U02DD> \x01\x69
|
||||
<U037A> \x02\x01
|
||||
<U0384> \x02\x06
|
||||
<U0385> \x02\x02
|
||||
<U03AA> \x02\x03
|
||||
<U03AB> \x02\x04
|
||||
<U03C6> \x02\x6D
|
||||
<U2013> \x01\x29
|
||||
<U2014> \x01\x2A
|
||||
<U2015> \x02\x05
|
||||
<U2017> \x01\x39
|
||||
<U2018> \x01\x2B
|
||||
<U2019> \x01\x2C
|
||||
<U201A> \x01\x37
|
||||
<U201C> \x01\x26
|
||||
<U201D> \x01\x38
|
||||
<U201E> \x01\x36
|
||||
<U2020> \x01\x70
|
||||
<U2021> \x01\x71
|
||||
<U2022> \x01\x07
|
||||
<U2026> \x01\x28
|
||||
<U2030> \x02\x7A
|
||||
<U2032> \x02\x69
|
||||
<U2033> \x02\x6A
|
||||
<U2039> \x01\x2E
|
||||
<U203A> \x01\x2F
|
||||
<U203C> \x01\x13
|
||||
<U203E> \x02\x09
|
||||
<U2044> \x02\x78
|
||||
<U207F> \x02\x7C
|
||||
<U20A4> \x01\x7E
|
||||
<U20A7> \x01\x7F
|
||||
<U2111> \x02\x52
|
||||
<U2113> \x01\x77
|
||||
<U211C> \x02\x53
|
||||
<U2122> \x01\x76
|
||||
<U2126> \x01\x4E
|
||||
<U2135> \x02\x51
|
||||
<U215B> \x02\x16
|
||||
<U215C> \x02\x15
|
||||
<U215D> \x02\x14
|
||||
<U215E> \x02\x13
|
||||
<U2190> \x01\x1B
|
||||
<U2191> \x01\x18
|
||||
<U2192> \x01\x1A
|
||||
<U2193> \x01\x19
|
||||
<U2194> \x01\x1D
|
||||
<U2195> \x01\x12
|
||||
<U21A8> \x01\x17
|
||||
<U21D0> \x02\x1B
|
||||
<U21D1> \x02\x18
|
||||
<U21D2> \x02\x1A
|
||||
<U21D3> \x02\x19
|
||||
<U21D4> \x02\x1D
|
||||
<U21D5> \x02\x12
|
||||
<U2200> \x02\x66
|
||||
<U2201> \x02\x64
|
||||
<U2202> \x02\x50
|
||||
<U2203> \x02\x67
|
||||
<U2205> \x02\x7D
|
||||
<U2207> \x02\x41
|
||||
<U2208> \x02\x5C
|
||||
<U2209> \x02\x5D
|
||||
<U220B> \x02\x5B
|
||||
<U2219> \x02\x79
|
||||
<U221A> \x02\x7B
|
||||
<U221D> \x02\x6C
|
||||
<U221E> \x02\x6B
|
||||
<U221F> \x01\x1C
|
||||
<U2220> \x02\x40
|
||||
<U2229> \x02\x6F
|
||||
<U222A> \x02\x6E
|
||||
<U222B> \x02\x65
|
||||
<U2245> \x02\x71
|
||||
<U2248> \x02\x77
|
||||
<U2260> \x02\x76
|
||||
<U2261> \x02\x70
|
||||
<U2264> \x02\x73
|
||||
<U2265> \x02\x72
|
||||
<U2282> \x02\x7E
|
||||
<U2283> \x02\x7F
|
||||
<U2286> \x02\x5E
|
||||
<U2287> \x02\x5F
|
||||
<U2295> \x02\x61
|
||||
<U2297> \x02\x60
|
||||
<U22C0> \x02\x63
|
||||
<U2310> \x01\x7D
|
||||
<U2318> \x01\x4B
|
||||
<U2320> \x02\x74
|
||||
<U2321> \x02\x75
|
||||
<U2552> \x01\x55
|
||||
<U2553> \x01\x56
|
||||
<U2555> \x01\x5C
|
||||
<U2556> \x01\x5B
|
||||
<U2558> \x01\x54
|
||||
<U2559> \x01\x53
|
||||
<U255B> \x01\x5E
|
||||
<U255C> \x01\x5D
|
||||
<U255E> \x01\x46
|
||||
<U255F> \x01\x47
|
||||
<U2561> \x01\x59
|
||||
<U2562> \x01\x5A
|
||||
<U2564> \x01\x51
|
||||
<U2565> \x01\x52
|
||||
<U2567> \x01\x5F
|
||||
<U2568> \x01\x50
|
||||
<U256A> \x01\x58
|
||||
<U256B> \x01\x57
|
||||
<U258C> \x01\x48
|
||||
<U2590> \x01\x49
|
||||
<U25AC> \x01\x16
|
||||
<U25B2> \x01\x1E
|
||||
<U25BA> \x01\x10
|
||||
<U25BC> \x01\x1F
|
||||
<U25C4> \x01\x11
|
||||
<U25CA> \x01\x4A
|
||||
<U25CB> \x01\x09
|
||||
<U25D8> \x01\x08
|
||||
<U25D9> \x01\x0A
|
||||
<U263A> \x01\x01
|
||||
<U263B> \x01\x02
|
||||
<U263C> \x01\x0F
|
||||
<U2640> \x01\x0C
|
||||
<U2642> \x01\x0B
|
||||
<U2660> \x01\x06
|
||||
<U2663> \x01\x05
|
||||
<U2665> \x01\x03
|
||||
<U2666> \x01\x04
|
||||
<U266A> \x01\x0D
|
||||
<U266B> \x01\x0E
|
||||
<U2713> \x02\x62
|
||||
<UF862> \x02\x0E
|
||||
<UF863> \x02\x0F
|
||||
<UF864> \x02\x10
|
||||
<UF865> \x02\x11
|
||||
<UF866> \x02\x3F
|
||||
<UF867> \x02\x17
|
||||
<UF868> \x02\x1C
|
||||
<UF869> \x02\x1E
|
||||
<UF86A> \x02\x68
|
||||
<UF86B> \x02\x5A
|
||||
<UF86C> \x02\x59
|
||||
<UF86D> \x02\x58
|
||||
<UF86E> \x02\x57
|
||||
<UF86F> \x02\x56
|
||||
<UF870> \x02\x55
|
||||
<UF871> \x02\x54
|
||||
<UF872> \x02\x4F
|
||||
<UF873> \x02\x4E
|
||||
<UF874> \x02\x4D
|
||||
<UF875> \x02\x4C
|
||||
<UF876> \x02\x4B
|
||||
<UF877> \x02\x4A
|
||||
<UF878> \x02\x49
|
||||
<UF879> \x02\x48
|
||||
<UF87A> \x02\x47
|
||||
<UF87B> \x02\x46
|
||||
<UF87C> \x02\x45
|
||||
<UF87D> \x02\x44
|
||||
<UF87E> \x02\x43
|
||||
<UF87F> \x02\x42
|
||||
<UF880> \x02\x3E
|
||||
<UF881> \x02\x3D
|
||||
<UF882> \x02\x3C
|
||||
<UF883> \x02\x3B
|
||||
<UF884> \x02\x3A
|
||||
<UF885> \x02\x39
|
||||
<UF886> \x02\x38
|
||||
<UF887> \x02\x37
|
||||
<UF888> \x02\x36
|
||||
<UF889> \x02\x35
|
||||
<UF88A> \x02\x34
|
||||
<UF88B> \x02\x33
|
||||
<UF88C> \x02\x32
|
||||
<UF88D> \x02\x31
|
||||
<UF88E> \x02\x30
|
||||
<UF88F> \x02\x2F
|
||||
<UF890> \x02\x2E
|
||||
<UF891> \x02\x2D
|
||||
<UF892> \x02\x2C
|
||||
<UF893> \x02\x2B
|
||||
<UF894> \x02\x2A
|
||||
<UF895> \x02\x29
|
||||
<UF896> \x02\x28
|
||||
<UF897> \x02\x27
|
||||
<UF898> \x02\x26
|
||||
<UF899> \x02\x25
|
||||
<UF89A> \x02\x24
|
||||
<UF89B> \x02\x23
|
||||
<UF89C> \x02\x22
|
||||
<UF89D> \x02\x21
|
||||
<UF89E> \x02\x20
|
||||
<UF89F> \x02\x1F
|
||||
<UF8FB> \x01\x7C
|
||||
<UF8FC> \x01\x63
|
||||
<UF8FD> \x01\x62
|
||||
<UF8FE> \x01\x4D
|
||||
<UF8FF> \x01\x4C
|
||||
<UFFFD> \x01\x3D
|
||||
#
|
||||
END CHARMAP
|
||||
#
|
||||
#________________________________________________________________________
|
|
@ -231,6 +231,10 @@ SOURCE=.\ucnv_io.c
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_lmb.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_utf.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
|
|
@ -35,7 +35,9 @@ static const UConverterSharedData *
|
|||
converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
|
||||
&_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data,
|
||||
&_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_EBCDICStatefulData,
|
||||
&_ISO2022Data
|
||||
&_ISO2022Data,
|
||||
&_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
|
||||
&_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19
|
||||
};
|
||||
|
||||
static struct {
|
||||
|
@ -53,9 +55,22 @@ static struct {
|
|||
{ "UTF16_PlatformEndian", UCNV_UTF16_LittleEndian },
|
||||
{ "UTF16_OppositeEndian", UCNV_UTF16_BigEndian},
|
||||
#endif
|
||||
{ "ISO_2022", UCNV_ISO_2022 }
|
||||
{ "ISO_2022", UCNV_ISO_2022 },
|
||||
{ "LMBCS-1", UCNV_LMBCS_1 },
|
||||
{ "LMBCS-2", UCNV_LMBCS_2 },
|
||||
{ "LMBCS-3", UCNV_LMBCS_3 },
|
||||
{ "LMBCS-4", UCNV_LMBCS_4 },
|
||||
{ "LMBCS-5", UCNV_LMBCS_5 },
|
||||
{ "LMBCS-6", UCNV_LMBCS_6 },
|
||||
{ "LMBCS-8", UCNV_LMBCS_8 },
|
||||
{ "LMBCS-11",UCNV_LMBCS_11 },
|
||||
{ "LMBCS-16",UCNV_LMBCS_16 },
|
||||
{ "LMBCS-17",UCNV_LMBCS_17 },
|
||||
{ "LMBCS-18",UCNV_LMBCS_18 },
|
||||
{ "LMBCS-19",UCNV_LMBCS_19 }
|
||||
};
|
||||
|
||||
|
||||
/*Takes an alias name gets an actual converter file name
|
||||
*goes to disk and opens it.
|
||||
*allocates the memory and returns a new UConverter object
|
||||
|
|
|
@ -194,7 +194,9 @@ struct UConverterImpl {
|
|||
extern const UConverterSharedData
|
||||
_SBCSData, _DBCSData, _MBCSData, _Latin1Data,
|
||||
_UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData,
|
||||
_ISO2022Data;
|
||||
_ISO2022Data,
|
||||
_LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
|
||||
_LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
|
870
icu4c/source/common/ucnv_lmb.c
Normal file
870
icu4c/source/common/ucnv_lmb.c
Normal file
|
@ -0,0 +1,870 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_lmb.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 4 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2000feb09
|
||||
* created by: Brendan Murray
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucmp8.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
/* LMBCS -------------------------------------------------------------------- */
|
||||
|
||||
/* Group bytes, and things that look like group bytes, should always be 8-bits */
|
||||
typedef uint8_t ulmbcs_grp_t;
|
||||
|
||||
|
||||
/* Define some constants instead of using literals */
|
||||
|
||||
|
||||
/* LMBCS groups */
|
||||
#define ULMBCS_GRP_EXCEPT 0x00 /* placeholder index for 'oddballs' XY, where Y<0x80 */
|
||||
#define ULMBCS_GRP_L1 0x01 /* Latin-1 */
|
||||
#define ULMBCS_GRP_GR 0x02 /* Greek */
|
||||
#define ULMBCS_GRP_HE 0x03 /* Hebrew */
|
||||
#define ULMBCS_GRP_AR 0x04 /* Arabic */
|
||||
#define ULMBCS_GRP_RU 0x05 /* Cyrillic */
|
||||
#define ULMBCS_GRP_L2 0x06 /* Latin-2 */
|
||||
#define ULMBCS_GRP_TR 0x08 /* Turkish */
|
||||
#define ULMBCS_GRP_TH 0x0B /* Thai */
|
||||
#define ULMBCS_GRP_CTRL 0x0F /* C0/C1 controls */
|
||||
#define ULMBCS_GRP_JA 0x10 /* Japanese */
|
||||
#define ULMBCS_GRP_KO 0x11 /* Korean */
|
||||
#define ULMBCS_GRP_CN 0x12 /* Chinese PRC */
|
||||
#define ULMBCS_GRP_TW 0x13 /* Chinese Taiwan */
|
||||
#define ULMBCS_GRP_UNICODE 0x14 /* Unicode compatibility group */
|
||||
#define ULMBCS_GRP_LAST 0x14 /* last LMBCS group that means anything */
|
||||
|
||||
/* some special values that can appear in place of optimization groups */
|
||||
#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */
|
||||
#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */
|
||||
#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */
|
||||
#define ULMBCS_123SYSTEMRANGE 0x19 /* Fixed control char for 1-2-3 file data: start system range name */
|
||||
#define ULMBCS_DEFAULTOPTGROUP 0x1 /* default optimization group for LMBCS */
|
||||
#define ULMBCS_DOUBLEOPTGROUP 0x10 /* start of double-byte optimization groups */
|
||||
|
||||
/* parts of LMBCS values, or ranges for LMBCS data */
|
||||
#define ULMBCS_UNICOMPATZERO 0xF6 /* PUA range for Unicode chars containing LSB = 0 */
|
||||
#define ULMBCS_CTRLOFFSET 0x20 /* Offset of control range in group 0x0F */
|
||||
#define ULMBCS_C1START 0x80 /* Start of 'C1' upper ascii range in ANSI code pages */
|
||||
#define ULMBCS_C0END 0x1F /* last of the 'C0' lower ascii contraol range in ANSI code pages */
|
||||
#define ULMBCS_INVALIDCHAR 0xFFFF /* Invalid character value = convert failed */
|
||||
|
||||
|
||||
/* special return values for FindLMBCSUniRange */
|
||||
#define ULMBCS_AMBIGUOUS_SBCS 0x80 // could fit in more than one
|
||||
// LMBCS sbcs native encoding (example: most accented latin)
|
||||
#define ULMBCS_AMBIGUOUS_MBCS 0x81 // could fit in more than one
|
||||
//LMBCS mbcs native encoding (example: Unihan)
|
||||
|
||||
/* macro to check compatibility of groups */
|
||||
#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \
|
||||
((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \
|
||||
(xgroup) < ULMBCS_DOUBLEOPTGROUP) || \
|
||||
(((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \
|
||||
(xgroup) >= ULMBCS_DOUBLEOPTGROUP))
|
||||
|
||||
/* Max size for 1 LMBCS char */
|
||||
#define ULMBCS_CHARSIZE_MAX 3
|
||||
|
||||
|
||||
/* JSGTODO: what is ICU standard debug assertion method?
|
||||
Invent an all-crash stop here, for now */
|
||||
#if 1
|
||||
#define MyAssert(b) {if (!(b)) {*(char *)0 = 1;}}
|
||||
#else
|
||||
#define MyAssert(b)
|
||||
#endif
|
||||
|
||||
|
||||
/* Map Optimization group byte to converter name. Note the following:
|
||||
0x00 is dummy, and contains the name of the exceptions converter.
|
||||
0x02 is currently unavailable: NLTC have been asked to provide.
|
||||
0x0F and 0x14 are algorithmically calculated
|
||||
0x09, 0x0A, 0x0D are data bytes (HT, LF, CR)
|
||||
0x07, 0x0C and 0x0E are unused
|
||||
*/
|
||||
static const char * OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = {
|
||||
/* 0x0000 */ "lmb-excp", /* No zero opt group: for non-standard entries */
|
||||
/* 0x0001 */ "ibm-850",
|
||||
/* 0x0002 */ "ibm-851",
|
||||
/* 0x0003 */ "ibm-1255",
|
||||
/* 0x0004 */ "ibm-1256",
|
||||
/* 0x0005 */ "ibm-1251",
|
||||
/* 0x0006 */ "ibm-852",
|
||||
/* 0x0007 */ NULL, /* Unused */
|
||||
/* 0x0008 */ "ibm-1254",
|
||||
/* 0x0009 */ NULL, /* Control char HT */
|
||||
/* 0x000A */ NULL, /* Control char LF */
|
||||
/* 0x000B */ "ibm-874",
|
||||
/* 0x000C */ NULL, /* Unused */
|
||||
/* 0x000D */ NULL, /* Control char CR */
|
||||
/* 0x000E */ NULL, /* Unused */
|
||||
/* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */
|
||||
/* 0x0010 */ "ibm-943",
|
||||
/* 0x0011 */ "ibm-1361",
|
||||
/* 0x0012 */ "ibm-950",
|
||||
/* 0x0013 */ "ibm-1386"
|
||||
|
||||
/* The rest are null, including the 0x0014 Unicode compatibility region
|
||||
and 0x0019, the 1-2-3 system range control char */
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* map UNICODE ranges to converter indexes (or special values) */
|
||||
|
||||
ulmbcs_grp_t FindLMBCSUniRange(UChar uniChar, UErrorCode* err);
|
||||
|
||||
struct _UniLMBCSGrpMap
|
||||
{
|
||||
UChar uniStartRange;
|
||||
UChar uniEndRange;
|
||||
ulmbcs_grp_t GrpType;
|
||||
} UniLMBCSGrpMap[]
|
||||
=
|
||||
{
|
||||
0x0001, 0x001F, ULMBCS_GRP_CTRL,
|
||||
0x0080, 0x009F, ULMBCS_GRP_CTRL,
|
||||
0x00A0, 0x0113, ULMBCS_AMBIGUOUS_SBCS,
|
||||
0x0115, 0x0120, ULMBCS_AMBIGUOUS_SBCS,
|
||||
0x0120, 0x012B, ULMBCS_GRP_EXCEPT,
|
||||
0x012C, 0x01CD, ULMBCS_AMBIGUOUS_SBCS,
|
||||
0x01CE, 0x01CE, ULMBCS_AMBIGUOUS_MBCS,
|
||||
0x01CF, 0x1FFF, ULMBCS_AMBIGUOUS_SBCS,
|
||||
0x2000, 0xFFFD, ULMBCS_AMBIGUOUS_MBCS,
|
||||
0xFFFF, 0xFFFF,
|
||||
};
|
||||
|
||||
ulmbcs_grp_t FindLMBCSUniRange(UChar uniChar, UErrorCode* err)
|
||||
{
|
||||
struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap;
|
||||
|
||||
while (uniChar > pTable->uniEndRange)
|
||||
{
|
||||
pTable++;
|
||||
}
|
||||
|
||||
if (uniChar >= pTable->uniStartRange)
|
||||
{
|
||||
return pTable->GrpType;
|
||||
}
|
||||
|
||||
if (pTable->uniStartRange == 0xFFFF)
|
||||
{
|
||||
*err = ULMBCS_INVALIDCHAR;
|
||||
}
|
||||
return ULMBCS_GRP_UNICODE;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// JSGTODO (by Brendan?) some incomplete source data from Brendan to be integrated
|
||||
|
||||
0xFE30, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
|
||||
0xFA2E, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
|
||||
0xF8FF, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
|
||||
0xD7FF, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
|
||||
0xABFF, ULMBCS_GRP_KO, ULMBCS_FLAGS_UNICODE,
|
||||
0x9FFF, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
|
||||
0x31FF, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
|
||||
0x318F, ULMBCS_GRP_CN, ULMBCS_FLAGS_CONTINUE,
|
||||
0x3130, ULMBCS_GRP_KO, ULMBCS_FLAGS_UNICODE,
|
||||
0x3100, ULMBCS_GRP_CN, ULMBCS_FLAGS_CONTINUE,
|
||||
0x313F, ULMBCS_GRP_JA, ULMBCS_FLAGS_UNICODE,
|
||||
0x2FFF, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
|
||||
0x2714, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
|
||||
0x2000, ULMBCS_GRP_L1, ULMBCS_FLAGS_CONTINUE,
|
||||
0x0E5C, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
|
||||
0x0E00, ULMBCS_GRP_TH, ULMBCS_FLAGS_UNICODE,
|
||||
0x06FF, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
|
||||
0x0600, ULMBCS_GRP_AR, ULMBCS_FLAGS_UNICODE,
|
||||
0x0500, ULMBCS_GRP_HE, ULMBCS_FLAGS_UNICODE,
|
||||
0x0400, ULMBCS_GRP_RU, ULMBCS_FLAGS_UNICODE,
|
||||
0x0300, ULMBCS_GRP_GR, ULMBCS_FLAGS_UNICODE,
|
||||
0x001F, ULMBCS_GRP_L1, ULMBCS_FLAGS_CONTINUE,
|
||||
0x0000, ULMBCS_GRP_CTRL, ULMBCS_FLAGS_UNICODE
|
||||
#endif
|
||||
|
||||
|
||||
int LMBCSConversionWorker (
|
||||
UConverterDataLMBCS * extraInfo, ulmbcs_grp_t group,
|
||||
uint8_t * pStartLMBCS, UChar * pUniChar,
|
||||
ulmbcs_grp_t * lastConverterIndex, bool_t * groups_tried,
|
||||
UErrorCode* err);
|
||||
|
||||
int LMBCSConversionWorker (
|
||||
UConverterDataLMBCS * extraInfo, ulmbcs_grp_t group,
|
||||
uint8_t * pStartLMBCS, UChar * pUniChar,
|
||||
ulmbcs_grp_t * lastConverterIndex, bool_t * groups_tried,
|
||||
UErrorCode * err)
|
||||
{
|
||||
uint8_t * pLMBCS = pStartLMBCS;
|
||||
UConverter * xcnv = extraInfo->OptGrpConverter[group];
|
||||
uint8_t mbChar [ULMBCS_CHARSIZE_MAX];
|
||||
uint8_t * pmbChar = mbChar;
|
||||
bool_t isDoubleByteGroup = (group >= ULMBCS_DOUBLEOPTGROUP) ? TRUE : FALSE;
|
||||
UErrorCode localErr = 0;
|
||||
int bytesConverted =0;
|
||||
|
||||
MyAssert(xcnv);
|
||||
MyAssert(group<ULMBCS_GRP_UNICODE);
|
||||
|
||||
ucnv_fromUnicode(xcnv, (char **)&pmbChar,(char *)mbChar+sizeof(mbChar),&pUniChar,pUniChar+1,NULL,TRUE,&localErr);
|
||||
bytesConverted = pmbChar - mbChar;
|
||||
pmbChar = mbChar;
|
||||
|
||||
/* most common failure mode is the sub-converter using the substitution char (0x7f for our converters)
|
||||
*/
|
||||
|
||||
if (*mbChar == xcnv->subChar[0] || U_FAILURE(localErr) || !bytesConverted )
|
||||
{
|
||||
// JSGTODO: are there some local failure modes that ought to be bubbled up in some other way?
|
||||
groups_tried[group] = TRUE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*lastConverterIndex = group;
|
||||
|
||||
/* All initial byte values in lower ascii range should have been caught by now,
|
||||
except with the exception group.
|
||||
|
||||
Uncomment this assert to find them.
|
||||
*/
|
||||
|
||||
// MyAssert((*pmbChar <= ULMBCS_C0END) || (*pmbChar >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
|
||||
|
||||
/* use converted data: first write 0, 1 or two group bytes */
|
||||
if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group)
|
||||
{
|
||||
*pLMBCS++ = group;
|
||||
if (bytesConverted == 1 && isDoubleByteGroup)
|
||||
{
|
||||
*pLMBCS++ = group;
|
||||
}
|
||||
}
|
||||
/* then move over the converted data */
|
||||
do
|
||||
{
|
||||
*pLMBCS++ = *pmbChar++;
|
||||
}
|
||||
while(--bytesConverted);
|
||||
|
||||
return (pLMBCS - pStartLMBCS);
|
||||
}
|
||||
|
||||
|
||||
/* Convert Unicode string to LMBCS */
|
||||
void _LMBCSFromUnicode(UConverter* _this,
|
||||
char** target,
|
||||
const char* targetLimit,
|
||||
const UChar** source,
|
||||
const UChar* sourceLimit,
|
||||
int32_t * offsets,
|
||||
bool_t flush,
|
||||
UErrorCode* err)
|
||||
{
|
||||
ulmbcs_grp_t lastConverterIndex = 0;
|
||||
UChar uniChar;
|
||||
uint8_t LMBCS[ULMBCS_CHARSIZE_MAX];
|
||||
uint8_t * pLMBCS;
|
||||
int bytes_written;
|
||||
bool_t groups_tried[ULMBCS_GRP_LAST];
|
||||
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
|
||||
|
||||
/* Arguments Check */
|
||||
if (!err || U_FAILURE(*err))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (sourceLimit < *source)
|
||||
{
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
uniChar = *(*source)++;
|
||||
bytes_written = 0;
|
||||
pLMBCS = LMBCS;
|
||||
|
||||
/* single byte matches */
|
||||
|
||||
if (uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR ||
|
||||
uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE ||
|
||||
((uniChar >= ULMBCS_CTRLOFFSET) && (uniChar < ULMBCS_C1START)))
|
||||
{
|
||||
*pLMBCS++ = (uint8_t) uniChar;
|
||||
bytes_written = 1;
|
||||
}
|
||||
|
||||
|
||||
if (!bytes_written)
|
||||
{
|
||||
/* Check by UNICODE range */
|
||||
ulmbcs_grp_t group = FindLMBCSUniRange(uniChar,err);
|
||||
|
||||
if (group == ULMBCS_GRP_UNICODE)
|
||||
{
|
||||
/* encode into LMBCS Unicode range */
|
||||
uint8_t LowCh = (uint8_t) (uniChar & 0x00FF);
|
||||
uint8_t HighCh = (uint8_t)(uniChar >> 8);
|
||||
|
||||
*pLMBCS++ = ULMBCS_GRP_UNICODE;
|
||||
|
||||
if (LowCh == 0)
|
||||
{
|
||||
*pLMBCS++ = ULMBCS_UNICOMPATZERO;
|
||||
*pLMBCS++ = HighCh;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pLMBCS++ = HighCh;
|
||||
*pLMBCS++ = LowCh;
|
||||
}
|
||||
|
||||
bytes_written = pLMBCS - LMBCS;
|
||||
}
|
||||
else if (group == ULMBCS_GRP_CTRL)
|
||||
{
|
||||
/* Handle control characters here */
|
||||
if (uniChar <= ULMBCS_C0END)
|
||||
{
|
||||
*pLMBCS++ = ULMBCS_GRP_CTRL;
|
||||
*pLMBCS++ = ULMBCS_CTRLOFFSET + (uint8_t) uniChar;
|
||||
}
|
||||
else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET)
|
||||
{
|
||||
*pLMBCS++ = ULMBCS_GRP_CTRL;
|
||||
*pLMBCS++ = (uint8_t) (uniChar & 0x00FF);
|
||||
}
|
||||
bytes_written = pLMBCS - LMBCS;
|
||||
}
|
||||
else if (group < ULMBCS_GRP_UNICODE)
|
||||
{
|
||||
/* a specific converter has been identified - use it */
|
||||
bytes_written = LMBCSConversionWorker (
|
||||
extraInfo, group, pLMBCS, &uniChar,
|
||||
&lastConverterIndex, groups_tried, err);
|
||||
|
||||
MyAssert(bytes_written); /* table should never return unusable group */
|
||||
|
||||
}
|
||||
else /* the ambiguous group cases */
|
||||
{
|
||||
memset(groups_tried, 0, sizeof(groups_tried));
|
||||
|
||||
/* check for non-default optimization group */
|
||||
if (extraInfo->OptGroup != 1
|
||||
&& ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))
|
||||
{
|
||||
bytes_written = LMBCSConversionWorker (extraInfo,
|
||||
extraInfo->OptGroup, pLMBCS, &uniChar,
|
||||
&lastConverterIndex, groups_tried, err);
|
||||
}
|
||||
/* check for locale optimization group */
|
||||
if (!bytes_written
|
||||
&& (extraInfo->localeConverterIndex)
|
||||
&& (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex)))
|
||||
{
|
||||
bytes_written = LMBCSConversionWorker (extraInfo,
|
||||
extraInfo->localeConverterIndex, pLMBCS, &uniChar,
|
||||
&lastConverterIndex, groups_tried, err);
|
||||
}
|
||||
/* check for last optimization group used for this string */
|
||||
if (!bytes_written
|
||||
&& (lastConverterIndex)
|
||||
&& (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex)))
|
||||
{
|
||||
bytes_written = LMBCSConversionWorker (extraInfo,
|
||||
lastConverterIndex, pLMBCS, &uniChar,
|
||||
&lastConverterIndex, groups_tried, err);
|
||||
|
||||
}
|
||||
if (!bytes_written)
|
||||
{
|
||||
/* just check every matching converter */
|
||||
ulmbcs_grp_t grp_start;
|
||||
ulmbcs_grp_t grp_end;
|
||||
ulmbcs_grp_t grp_ix;
|
||||
grp_start = (group == ULMBCS_AMBIGUOUS_MBCS)
|
||||
? ULMBCS_DOUBLEOPTGROUP
|
||||
: ULMBCS_GRP_L1;
|
||||
grp_end = (group == ULMBCS_AMBIGUOUS_MBCS)
|
||||
? ULMBCS_GRP_LAST-1
|
||||
: ULMBCS_GRP_TH;
|
||||
|
||||
for (grp_ix = grp_start;
|
||||
grp_ix <= grp_end && !bytes_written;
|
||||
grp_ix++)
|
||||
{
|
||||
if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix])
|
||||
{
|
||||
bytes_written = LMBCSConversionWorker (extraInfo,
|
||||
grp_ix, pLMBCS, &uniChar,
|
||||
&lastConverterIndex, groups_tried, err);
|
||||
}
|
||||
}
|
||||
|
||||
/* a final conversion fallback for sbcs to the exceptions group */
|
||||
if (!bytes_written && group == ULMBCS_AMBIGUOUS_SBCS)
|
||||
{
|
||||
bytes_written = LMBCSConversionWorker (extraInfo,
|
||||
ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
|
||||
&lastConverterIndex, groups_tried, err);
|
||||
}
|
||||
/* all of our strategies failed. Fallback to Unicode. Consider adding these to table */
|
||||
|
||||
if (!bytes_written)
|
||||
{
|
||||
/* encode into LMBCS Unicode range */
|
||||
uint8_t LowCh = (uint8_t) uniChar;
|
||||
uint8_t HighCh = (uint8_t)(uniChar >> 8);
|
||||
|
||||
*pLMBCS++ = ULMBCS_GRP_UNICODE;
|
||||
|
||||
if (LowCh == 0)
|
||||
{
|
||||
*pLMBCS++ = ULMBCS_UNICOMPATZERO;
|
||||
*pLMBCS++ = HighCh;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pLMBCS++ = HighCh;
|
||||
*pLMBCS++ = LowCh;
|
||||
}
|
||||
|
||||
bytes_written = pLMBCS - LMBCS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*target + bytes_written > targetLimit)
|
||||
{
|
||||
/* JSGTODO deal with buffer running out here */
|
||||
}
|
||||
|
||||
/* now that we are sure it all fits, move it in */
|
||||
for(pLMBCS = LMBCS; bytes_written--; *(*target)++ = *pLMBCS++)
|
||||
{ };
|
||||
|
||||
}
|
||||
while (*source<= sourceLimit &&
|
||||
*target <= targetLimit &&
|
||||
!U_FAILURE(*err));
|
||||
|
||||
/* JSGTODO Check the various exit conditions */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Return the Unicode representation for the current LMBCS character */
|
||||
UChar _LMBCSGetNextUChar(UConverter* _this,
|
||||
const char** source,
|
||||
const char* sourceLimit,
|
||||
UErrorCode* err)
|
||||
{
|
||||
uint8_t CurByte; // A byte from the input stream
|
||||
UChar uniChar; // an output UNICODE char
|
||||
UChar mbChar; // an intermediate multi-byte value (mbcs or LMBCS)
|
||||
CompactShortArray *MyCArray = NULL;
|
||||
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
|
||||
ulmbcs_grp_t group = 0;
|
||||
UConverter* cnv = 0;
|
||||
|
||||
/* Opt Group (or first data byte) */
|
||||
CurByte = *((uint8_t *) (*source)++);
|
||||
uniChar = 0;
|
||||
|
||||
// at entry of each if clause:
|
||||
// 1. 'CurByte' points at the first byte of a LMBCS character
|
||||
// 2. '*source'points to the next byte of the source stream after 'CurByte'
|
||||
|
||||
// the job of each if clause is:
|
||||
// 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte)
|
||||
// 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately
|
||||
|
||||
|
||||
// First lets check the simple fixed values.
|
||||
if (CurByte == 0 || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR ||
|
||||
CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE ||
|
||||
((CurByte >= ULMBCS_CTRLOFFSET) && (CurByte < ULMBCS_C1START)))
|
||||
{
|
||||
uniChar = CurByte;
|
||||
}
|
||||
else
|
||||
if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */
|
||||
{
|
||||
if (*source >= sourceLimit)
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint8_t C0C1byte = *(*source)++;
|
||||
uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BE as is */
|
||||
{
|
||||
uint8_t HighCh, LowCh;
|
||||
|
||||
|
||||
HighCh = *(*source)++; /* Big-endian Unicode in LMBCs compatibility group*/
|
||||
LowCh = *(*source)++;
|
||||
|
||||
if (HighCh == ULMBCS_UNICOMPATZERO )
|
||||
{
|
||||
HighCh = LowCh;
|
||||
LowCh = 0; /* zero-byte in LSB special character */
|
||||
}
|
||||
|
||||
uniChar = (HighCh << 8) | LowCh;
|
||||
|
||||
}
|
||||
|
||||
else if (CurByte <= ULMBCS_CTRLOFFSET)
|
||||
{
|
||||
group = CurByte; /* group byte is in the source */
|
||||
cnv = extraInfo->OptGrpConverter[group];
|
||||
|
||||
if (!cnv)
|
||||
{
|
||||
/* this is not a valid group byte - no converter*/
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
|
||||
|
||||
else if (group >= ULMBCS_DOUBLEOPTGROUP) /* double byte conversion */
|
||||
{
|
||||
uint8_t HighCh, LowCh;
|
||||
|
||||
|
||||
HighCh = *(*source)++;
|
||||
LowCh = *(*source)++;
|
||||
|
||||
/* check for LMBCS doubled-group-byte case */
|
||||
mbChar = (HighCh == group) ? LowCh : (HighCh<<8) | LowCh;
|
||||
|
||||
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
|
||||
uniChar = (UChar) ucmp16_getu (MyCArray, mbChar);
|
||||
|
||||
}
|
||||
else /* single byte conversion */
|
||||
{
|
||||
CurByte = *(*source)++;
|
||||
if (CurByte >= ULMBCS_C1START)
|
||||
{
|
||||
uniChar = cnv->sharedData->table->sbcs.toUnicode[CurByte];
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The non-optimizable oddballs where there is an explicit byte
|
||||
* AND the second byte is not in the upper ascii range
|
||||
*/
|
||||
cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT];
|
||||
|
||||
/* Lookup value must include opt group */
|
||||
mbChar = (UChar)(group << 8) | (UChar) CurByte;
|
||||
|
||||
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
|
||||
uniChar = (UChar) ucmp16_getu(MyCArray, mbChar);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */
|
||||
{
|
||||
group = extraInfo->OptGroup;
|
||||
cnv = extraInfo->OptGrpConverter[group];
|
||||
|
||||
if (group >= ULMBCS_DOUBLEOPTGROUP) /* double byte conversion */
|
||||
{
|
||||
uint8_t HighCh, LowCh;
|
||||
|
||||
// JSGTODO need to deal with case of single byte G1
|
||||
// chars in mbcs groups
|
||||
|
||||
HighCh = CurByte;
|
||||
LowCh = *(*source)++;
|
||||
|
||||
mbChar = (HighCh<<8) | LowCh;
|
||||
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
|
||||
uniChar = (UChar) ucmp16_getu (MyCArray, mbChar);
|
||||
(*source) += sizeof(UChar);
|
||||
}
|
||||
else /* single byte conversion */
|
||||
{
|
||||
uniChar = cnv->sharedData->table->sbcs.toUnicode[CurByte];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if DEBUG
|
||||
// JSGTODO: assert here: we should never get here.
|
||||
#endif
|
||||
|
||||
}
|
||||
// JSGTODO: need to correctly deal with partial chars
|
||||
return uniChar;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void _LMBCSToUnicodeWithOffsets(UConverter* _this,
|
||||
UChar** target,
|
||||
const UChar* targetLimit,
|
||||
const char** source,
|
||||
const char* sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode* err)
|
||||
{
|
||||
UChar uniChar; // an output UNICODE char
|
||||
CompactShortArray *MyCArray = NULL;
|
||||
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
|
||||
ulmbcs_grp_t group = 0;
|
||||
UConverter* cnv = 0;
|
||||
const char * pStartLMBCS = *source;
|
||||
|
||||
if (!err || U_FAILURE(*err))
|
||||
{
|
||||
return;
|
||||
}
|
||||
if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source))
|
||||
{
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0 // JSGTODOD - restore incomplete char handling
|
||||
|
||||
/* Have we arrived here from a prior conversion ending with a partial char?
|
||||
The only possible configurations are:
|
||||
1. mode contains the group byte of SBCS LMBCS char;
|
||||
2. mode contains the group byte of MBCS LMBCS char
|
||||
For both continue with next char in input buffer
|
||||
3. mode contains group byte + 1st data byte of MBCS LMBCS char
|
||||
Partially process & get the second data byte
|
||||
4. mode contains both group bytes of double group-byte MBCS LMBCS char
|
||||
Nuke contents after setting up converter & continue with buffer data
|
||||
*/
|
||||
if (_this->toUnicodeStatus)
|
||||
{
|
||||
mbChar = (UChar) _this->mode; /* Restore the previously calculated char */
|
||||
|
||||
_this->toUnicodeStatus = 0; /* Reset other fields*/
|
||||
_this->invalidCharLength = 0;
|
||||
|
||||
/* Check if this is a partial MBCS char (fall through if SBCS) */
|
||||
if (mbChar > 0xFF)
|
||||
{
|
||||
/* Select the correct converter */
|
||||
group = (mbChar >> 8) & 0x00FF;
|
||||
cnv = extraInfo->OptGrpConverter[group];
|
||||
|
||||
/* Pick up the converter table */
|
||||
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
|
||||
|
||||
/* Use only data byte: NULL if the character has pair of group-bytes */
|
||||
if (mbChar & 0x00FF < ULMBCS_MAXGRPBYTE)
|
||||
CurByte = 0;
|
||||
else
|
||||
CurByte = ((mbChar & 0x00FF) << 8);
|
||||
|
||||
/* Add the current char from the buffer */
|
||||
CurByte |= *((uint8_t *) (*source)++);
|
||||
|
||||
goto continueWithPartialMBCSChar;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
goto continueWithPartialChar;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* Process from source to limit */
|
||||
while (!*err && sourceLimit > *source && targetLimit > *target)
|
||||
{
|
||||
if(offsets)
|
||||
{
|
||||
*offsets = (*source) - pStartLMBCS;
|
||||
}
|
||||
|
||||
uniChar = _LMBCSGetNextUChar(_this, source, sourceLimit, err);
|
||||
|
||||
|
||||
// last step is always to move the new value into the buffer
|
||||
if (U_SUCCESS(*err) && uniChar != missingUCharMarker)
|
||||
{
|
||||
// JSGTODO deal with missingUCharMarker case for error/info reporting.
|
||||
*(*target)++ = uniChar;
|
||||
if(offsets)
|
||||
{
|
||||
offsets++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// JSGTODO restore partial char handling
|
||||
/* Check to see if we've fallen through because of a partial char */
|
||||
if (*err == U_TRUNCATED_CHAR_FOUND)
|
||||
{
|
||||
_this->mode = mbChar; /* Save current partial char */
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Convert LMBCS string to Unicode */
|
||||
void _LMBCSToUnicode(UConverter* _this,
|
||||
UChar** target,
|
||||
const UChar* targetLimit,
|
||||
const char** source,
|
||||
const char* sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode* err)
|
||||
{
|
||||
_LMBCSToUnicodeWithOffsets(_this, target, targetLimit, source, sourceLimit, offsets, flush,err);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void _LMBCSOpenWorker(UConverter* _this,
|
||||
const char* name,
|
||||
const char* locale,
|
||||
UErrorCode* err,
|
||||
ulmbcs_grp_t OptGroup
|
||||
)
|
||||
{
|
||||
UConverterDataLMBCS * extraInfo = uprv_malloc (sizeof (UConverterDataLMBCS));
|
||||
|
||||
if(extraInfo != NULL)
|
||||
{
|
||||
|
||||
ulmbcs_grp_t i;
|
||||
ulmbcs_grp_t imax;
|
||||
|
||||
imax = sizeof(extraInfo->OptGrpConverter)/sizeof(extraInfo->OptGrpConverter[0]);
|
||||
|
||||
for (i=0; i < imax; i++)
|
||||
{
|
||||
extraInfo->OptGrpConverter[i] =
|
||||
(OptGroupByteToCPName[i] != NULL) ?
|
||||
ucnv_open(OptGroupByteToCPName[i], err) : NULL;
|
||||
}
|
||||
|
||||
extraInfo->OptGroup = OptGroup;
|
||||
/* JSGTODO: add LocaleConverterIndex logic here */
|
||||
extraInfo->localeConverterIndex = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
_this->extraInfo = extraInfo;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void _LMBCSClose(UConverter * _this)
|
||||
{
|
||||
if (_this->extraInfo != NULL)
|
||||
{
|
||||
ulmbcs_grp_t Ix;
|
||||
|
||||
for (Ix=0; Ix < ULMBCS_GRP_UNICODE; Ix++)
|
||||
{
|
||||
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
|
||||
if (extraInfo->OptGrpConverter[Ix] != NULL)
|
||||
ucnv_close (extraInfo->OptGrpConverter[Ix]);
|
||||
}
|
||||
uprv_free (_this->extraInfo);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define DEFINE_LMBCS_OPEN(n) \
|
||||
static void _LMBCSOpen##n(UConverter* _this,const char* name,const char* locale,UErrorCode* err) \
|
||||
{ _LMBCSOpenWorker(_this, name,locale, err, n);} \
|
||||
|
||||
|
||||
DEFINE_LMBCS_OPEN(1)
|
||||
DEFINE_LMBCS_OPEN(2)
|
||||
DEFINE_LMBCS_OPEN(3)
|
||||
DEFINE_LMBCS_OPEN(4)
|
||||
DEFINE_LMBCS_OPEN(5)
|
||||
DEFINE_LMBCS_OPEN(6)
|
||||
DEFINE_LMBCS_OPEN(8)
|
||||
DEFINE_LMBCS_OPEN(11)
|
||||
DEFINE_LMBCS_OPEN(16)
|
||||
DEFINE_LMBCS_OPEN(17)
|
||||
DEFINE_LMBCS_OPEN(18)
|
||||
DEFINE_LMBCS_OPEN(19)
|
||||
|
||||
#define DECLARE_LMBCS_DATA(n) \
|
||||
static const UConverterImpl _LMBCSImpl##n={\
|
||||
UCNV_LMBCS_##n,\
|
||||
NULL,NULL,\
|
||||
_LMBCSOpen##n,\
|
||||
_LMBCSClose,\
|
||||
NULL,\
|
||||
_LMBCSToUnicode,\
|
||||
_LMBCSToUnicodeWithOffsets,\
|
||||
_LMBCSFromUnicode,\
|
||||
NULL,\
|
||||
_LMBCSGetNextUChar,\
|
||||
NULL\
|
||||
};\
|
||||
extern const UConverterSharedData _LMBCSData##n={\
|
||||
sizeof(UConverterSharedData), ~0,\
|
||||
NULL, NULL, &_LMBCSImpl##n, "LMBCS_" ## #n,\
|
||||
0, UCNV_IBM, UCNV_LMBCS_1, 1, 1,\
|
||||
{ 0, 1, 0x3f, 0, 0, 0 }\
|
||||
};
|
||||
|
||||
DECLARE_LMBCS_DATA(1)
|
||||
DECLARE_LMBCS_DATA(2)
|
||||
DECLARE_LMBCS_DATA(3)
|
||||
DECLARE_LMBCS_DATA(4)
|
||||
DECLARE_LMBCS_DATA(5)
|
||||
DECLARE_LMBCS_DATA(6)
|
||||
DECLARE_LMBCS_DATA(8)
|
||||
DECLARE_LMBCS_DATA(11)
|
||||
DECLARE_LMBCS_DATA(16)
|
||||
DECLARE_LMBCS_DATA(17)
|
||||
DECLARE_LMBCS_DATA(18)
|
||||
DECLARE_LMBCS_DATA(19)
|
||||
|
||||
|
||||
|
||||
|
|
@ -60,8 +60,24 @@ typedef enum {
|
|||
UCNV_UTF16_LittleEndian = 6,
|
||||
UCNV_EBCDIC_STATEFUL = 7,
|
||||
UCNV_ISO_2022 = 8,
|
||||
|
||||
UCNV_LMBCS_1 = 9,
|
||||
UCNV_LMBCS_2,
|
||||
UCNV_LMBCS_3,
|
||||
UCNV_LMBCS_4,
|
||||
UCNV_LMBCS_5,
|
||||
UCNV_LMBCS_6,
|
||||
UCNV_LMBCS_8,
|
||||
UCNV_LMBCS_11,
|
||||
UCNV_LMBCS_16,
|
||||
UCNV_LMBCS_17,
|
||||
UCNV_LMBCS_18,
|
||||
UCNV_LMBCS_19,
|
||||
UCNV_LMBCS_LAST = UCNV_LMBCS_19,
|
||||
|
||||
/* Number of converter types for which we have conversion routines. */
|
||||
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9
|
||||
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = UCNV_LMBCS_LAST+1
|
||||
|
||||
} UConverterType;
|
||||
|
||||
/* ### move the following typedef and array into implementation files! */
|
||||
|
@ -256,6 +272,17 @@ typedef struct
|
|||
}
|
||||
UConverterDataISO2022;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UConverter *OptGrpConverter[0x20]; /* Converter per Opt. grp. */
|
||||
uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */
|
||||
uint8_t localeConverterIndex; /* reasonable locale match for index */
|
||||
|
||||
}
|
||||
UConverterDataLMBCS;
|
||||
|
||||
|
||||
#define CONVERTER_FILE_EXTENSION ".cnv"
|
||||
|
||||
#endif /* _UCNV_BLD */
|
||||
|
|
|
@ -47,7 +47,19 @@ UTF16_PlatformEndian iso-10646-ucs-2 csUnicode utf16 utf-16 ibm-1200 ibm1200
|
|||
UTF16_OppositeEndian
|
||||
LATIN_1 iso-8859-1 iso_8859-1 ibm-819 ibm819 cp819 latin1 latin-1 ascii ascii-7 us-ascii 8859-1 csisolatin1 iso-ir-100 iso_8859-1:1978 #!!!!! There's whole lot of names for this - cp367 csASCII etc.
|
||||
ISO_2022 iso-2022 2022 cp2022 iso2022 iso_2022
|
||||
LMBCS
|
||||
LMBCS-1 lmbcs
|
||||
LMBCS-2
|
||||
LMBCS-3
|
||||
LMBCS-4
|
||||
LMBCS-5
|
||||
LMBCS-6
|
||||
LMBCS-8
|
||||
LMBCS-11
|
||||
LMBCS-16
|
||||
LMBCS-17
|
||||
LMBCS-18
|
||||
LMBCS-19
|
||||
|
||||
|
||||
# Table-based
|
||||
|
||||
|
@ -76,10 +88,13 @@ ibm-1383 euc-cn euccn ibm-eucCN # China EUC
|
|||
#ibm-1162 tis-620 cp874 windows-874 ms874 # Thai (w/ euro support) #what is the connection between this and the one below!!!
|
||||
ibm-874 ibm-1161 #same as 1162 (w/o euro update) ***This is commented out in Helena's
|
||||
|
||||
lmb-excp # special exceptions list for LMBCS algorithm
|
||||
|
||||
# Platform codepages
|
||||
ibm-437 ibm437 cp437 csPC8CodePage437 437 # PC US
|
||||
# HSYS:
|
||||
ibm-850 IBM850 cp850 850 csPC850Multilingual # PC latin1
|
||||
ibm-851 IBM851 cp851 851 csPC851 # PC DOS Greek (no euro)
|
||||
ibm-858 ibm858 cp858 # PC latin1 with Euro cp850 removed
|
||||
ibm-9044 IBM852 852 csPCp852 cp852 # PC latin2 (w/ euro update) #where should the names go here or below - inconsistency!!!
|
||||
ibm-852 # PC latin2 (w/o euro update)
|
||||
|
|
285
icu4c/source/data/mappings/ibm-851.ucm
Normal file
285
icu4c/source/data/mappings/ibm-851.ucm
Normal file
|
@ -0,0 +1,285 @@
|
|||
# ******************************************************************************
|
||||
# *
|
||||
# * Copyright (C) 1995-2000, International Business Machines
|
||||
# * Corporation and others. All Rights Reserved.
|
||||
# *
|
||||
# ******************************************************************************
|
||||
#
|
||||
# File created on Fri Feb 11 14:11:00 2000
|
||||
#
|
||||
# File created manually
|
||||
# from source files IBM-851.TXMAP100
|
||||
#
|
||||
# Table Version : 1.00
|
||||
#
|
||||
<code_set_name> "IBM-851"
|
||||
<char_name_mask> "AXXXX"
|
||||
<mb_cur_max> 1
|
||||
<mb_cur_min> 1
|
||||
<uconv_class> "SBCS"
|
||||
<subchar> \x7F
|
||||
#
|
||||
CHARMAP
|
||||
#
|
||||
#
|
||||
#ISO 10646 IBM-851
|
||||
#_________ _________
|
||||
<U0000> \x00 # ..NUL...
|
||||
<U0001> \x01 # ..SOH...
|
||||
<U0002> \x02 # ..STX...
|
||||
<U0003> \x03 # ..ETX...
|
||||
<U0004> \x04 # ..EOT...
|
||||
<U0005> \x05 # ..ENQ...
|
||||
<U0006> \x06 # ..ACK...
|
||||
<U0007> \x07 # ..BEL...
|
||||
<U0008> \x08 # ...BS...
|
||||
<U0009> \x09 # ...HT...
|
||||
<U000A> \x0A # ...LF...
|
||||
<U000B> \x0B # ...VT...
|
||||
<U000C> \x0C # ...FF...
|
||||
<U000D> \x0D # ...CR...
|
||||
<U000E> \x0E # .SO/LS1.
|
||||
<U000F> \x0F # .SI/LS0.
|
||||
<U0010> \x10 # ..DLE...
|
||||
<U0011> \x11 # ..DC1...
|
||||
<U0012> \x12 # ..DC2...
|
||||
<U0013> \x13 # ..DC3...
|
||||
<U0014> \x14 # ..DC4...
|
||||
<U0015> \x15 # ..NAK...
|
||||
<U0016> \x16 # ..SYN...
|
||||
<U0017> \x17 # ..ETB...
|
||||
<U0018> \x18 # ..CAN...
|
||||
<U0019> \x19 # ...EM...
|
||||
<U001C> \x1A # ..IFS...
|
||||
<U001B> \x1B # ..ESC...
|
||||
<U007F> \x1C # ..DEL...
|
||||
<U001D> \x1D # ...GS...
|
||||
<U001E> \x1E # ...RS...
|
||||
<U001F> \x1F # ...US...
|
||||
<U0020> \x20 # SP010000
|
||||
<U0021> \x21 # SP020000
|
||||
<U0022> \x22 # SP040000
|
||||
<U0023> \x23 # SM010000
|
||||
<U0024> \x24 # SC030000
|
||||
<U0025> \x25 # SM020000
|
||||
<U0026> \x26 # SM030000
|
||||
<U0027> \x27 # SP050000
|
||||
<U0028> \x28 # SP060000
|
||||
<U0029> \x29 # SP070000
|
||||
<U002A> \x2A # SM040000
|
||||
<U002B> \x2B # SA010000
|
||||
<U002C> \x2C # SP080000
|
||||
<U002D> \x2D # SP100000
|
||||
<U002E> \x2E # SP110000
|
||||
<U002F> \x2F # SP120000
|
||||
<U0030> \x30 # ND100000
|
||||
<U0031> \x31 # ND010000
|
||||
<U0032> \x32 # ND020000
|
||||
<U0033> \x33 # ND030000
|
||||
<U0034> \x34 # ND040000
|
||||
<U0035> \x35 # ND050000
|
||||
<U0036> \x36 # ND060000
|
||||
<U0037> \x37 # ND070000
|
||||
<U0038> \x38 # ND080000
|
||||
<U0039> \x39 # ND090000
|
||||
<U003A> \x3A # SP130000
|
||||
<U003B> \x3B # SP140000
|
||||
<U003C> \x3C # SA030000
|
||||
<U003D> \x3D # SA040000
|
||||
<U003E> \x3E # SA050000
|
||||
<U003F> \x3F # SP150000
|
||||
<U0040> \x40 # SM050000
|
||||
<U0041> \x41 # LA020000
|
||||
<U0042> \x42 # LB020000
|
||||
<U0043> \x43 # LC020000
|
||||
<U0044> \x44 # LD020000
|
||||
<U0045> \x45 # LE020000
|
||||
<U0046> \x46 # LF020000
|
||||
<U0047> \x47 # LG020000
|
||||
<U0048> \x48 # LH020000
|
||||
<U0049> \x49 # LI020000
|
||||
<U004A> \x4A # LJ020000
|
||||
<U004B> \x4B # LK020000
|
||||
<U004C> \x4C # LL020000
|
||||
<U004D> \x4D # LM020000
|
||||
<U004E> \x4E # LN020000
|
||||
<U004F> \x4F # LO020000
|
||||
<U0050> \x50 # LP020000
|
||||
<U0051> \x51 # LQ020000
|
||||
<U0052> \x52 # LR020000
|
||||
<U0053> \x53 # LS020000
|
||||
<U0054> \x54 # LT020000
|
||||
<U0055> \x55 # LU020000
|
||||
<U0056> \x56 # LV020000
|
||||
<U0057> \x57 # LW020000
|
||||
<U0058> \x58 # LX020000
|
||||
<U0059> \x59 # LY020000
|
||||
<U005A> \x5A # LZ020000
|
||||
<U005B> \x5B # SM060000
|
||||
<U005C> \x5C # SM070000
|
||||
<U005D> \x5D # SM080000
|
||||
<U005E> \x5E # SD150000
|
||||
<U005F> \x5F # SP090000
|
||||
<U0060> \x60 # SD130000
|
||||
<U0061> \x61 # LA010000
|
||||
<U0062> \x62 # LB010000
|
||||
<U0063> \x63 # LC010000
|
||||
<U0064> \x64 # LD010000
|
||||
<U0065> \x65 # LE010000
|
||||
<U0066> \x66 # LF010000
|
||||
<U0067> \x67 # LG010000
|
||||
<U0068> \x68 # LH010000
|
||||
<U0069> \x69 # LI010000
|
||||
<U006A> \x6A # LJ010000
|
||||
<U006B> \x6B # LK010000
|
||||
<U006C> \x6C # LL010000
|
||||
<U006D> \x6D # LM010000
|
||||
<U006E> \x6E # LN010000
|
||||
<U006F> \x6F # LO010000
|
||||
<U0070> \x70 # LP010000
|
||||
<U0071> \x71 # LQ010000
|
||||
<U0072> \x72 # LR010000
|
||||
<U0073> \x73 # LS010000
|
||||
<U0074> \x74 # LT010000
|
||||
<U0075> \x75 # LU010000
|
||||
<U0076> \x76 # LV010000
|
||||
<U0077> \x77 # LW010000
|
||||
<U0078> \x78 # LX010000
|
||||
<U0079> \x79 # LY010000
|
||||
<U007A> \x7A # LZ010000
|
||||
<U007B> \x7B # SM110000
|
||||
<U007C> \x7C # SM130000
|
||||
<U007D> \x7D # SM140000
|
||||
<U007E> \x7E # SD190000
|
||||
<U001A> \x7F # ..SUB...
|
||||
<U00C7> \x80 # LC420000
|
||||
<U00FC> \x81 # LU170000
|
||||
<U00E9> \x82 # LE110000
|
||||
<U00E2> \x83 # LA150000
|
||||
<U00E4> \x84 # LA170000
|
||||
<U00E0> \x85 # LA130000
|
||||
<U0386> \x86 # GA120000
|
||||
<U00E7> \x87 # LC410000
|
||||
<U00EA> \x88 # LE150000
|
||||
<U00EB> \x89 # LE170000
|
||||
<U00E8> \x8A # LE130000
|
||||
<U00EF> \x8B # LI170000
|
||||
<U00EE> \x8C # LI150000
|
||||
<U0388> \x8D # GE120000
|
||||
<U00C4> \x8E # LA180000
|
||||
<U0389> \x8F # GE720000
|
||||
<U038A> \x90 # GI120000
|
||||
<U038C> \x92 # GO120000
|
||||
<U00F4> \x93 # LO150000
|
||||
<U00F6> \x94 # LO170000
|
||||
<U038E> \x95 # GU120000
|
||||
<U00FB> \x96 # LU150000
|
||||
<U00F9> \x97 # LU130000
|
||||
<U038F> \x98 # GO720000
|
||||
<U00D6> \x99 # LO180000
|
||||
<U00DC> \x9A # LU180000
|
||||
<U03AC> \x9B # GA110000
|
||||
<U00A3> \x9C # SC020000
|
||||
<U03AD> \x9D # GE110000
|
||||
<U03AE> \x9E # GE710000
|
||||
<U03AF> \x9F # GI110000
|
||||
<U03CA> \xA0 # GI170000
|
||||
<U0390> \xA1 # GI730000
|
||||
<U03CC> \xA2 # GO110000
|
||||
<U03CD> \xA3 # GU110000
|
||||
<U0391> \xA4 # GA020000
|
||||
<U0392> \xA5 # GB020000
|
||||
<U0393> \xA6 # GG020000
|
||||
<U0394> \xA7 # GD020000
|
||||
<U0395> \xA8 # GE020000
|
||||
<U0396> \xA9 # GZ020000
|
||||
<U0397> \xAA # GE320000
|
||||
<U00BD> \xAB # NF010000
|
||||
<U0398> \xAC # GT620000
|
||||
<U0399> \xAD # GI020000
|
||||
<U00AB> \xAE # SP170000
|
||||
<U00BB> \xAF # SP180000
|
||||
<U2591> \xB0 # SF140000
|
||||
<U2592> \xB1 # SF150000
|
||||
<U2593> \xB2 # SF160000
|
||||
<U2502> \xB3 # SF110000
|
||||
<U2524> \xB4 # SF090000
|
||||
<U039A> \xB5 # GK020000
|
||||
<U039B> \xB6 # GL020000
|
||||
<U039C> \xB7 # GM020000
|
||||
<U039D> \xB8 # GN020000
|
||||
<U2563> \xB9 # SF230000
|
||||
<U2551> \xBA # SF240000
|
||||
<U2557> \xBB # SF250000
|
||||
<U255D> \xBC # SF260000
|
||||
<U039E> \xBD # GX020000
|
||||
<U039F> \xBE # GO020000
|
||||
<U2510> \xBF # SF030000
|
||||
<U2514> \xC0 # SF020000
|
||||
<U2534> \xC1 # SF070000
|
||||
<U252C> \xC2 # SF060000
|
||||
<U251C> \xC3 # SF080000
|
||||
<U2500> \xC4 # SF100000
|
||||
<U253C> \xC5 # SF050000
|
||||
<U03A0> \xC6 # GP020000
|
||||
<U03A1> \xC7 # GR020000
|
||||
<U255A> \xC8 # SF380000
|
||||
<U2554> \xC9 # SF390000
|
||||
<U2569> \xCA # SF400000
|
||||
<U2566> \xCB # SF410000
|
||||
<U2560> \xCC # SF420000
|
||||
<U2550> \xCD # SF430000
|
||||
<U256C> \xCE # SF440000
|
||||
<U03A3> \xCF # GS020000
|
||||
<U03A4> \xD0 # GT020000
|
||||
<U03A5> \xD1 # GU020000
|
||||
<U03A6> \xD2 # GF020000
|
||||
<U03A7> \xD3 # GH020000
|
||||
<U03A8> \xD4 # GP620000
|
||||
<U03A9> \xD5 # GO320000
|
||||
<U03B1> \xD6 # GA010000
|
||||
<U03B2> \xD7 # GB010000
|
||||
<U03B3> \xD8 # GG010000
|
||||
<U2518> \xD9 # SF040000
|
||||
<U250C> \xDA # SF010000
|
||||
<U2588> \xDB # SF610000
|
||||
<U2584> \xDC # SF570000
|
||||
<U03B4> \xDD # GD010000
|
||||
<U03B5> \xDE # GE010000
|
||||
<U2580> \xDF # SF600000
|
||||
<U03B6> \xE0 # GZ010000
|
||||
<U03B7> \xE1 # GE310000
|
||||
<U03B8> \xE2 # GT610000
|
||||
<U03B9> \xE3 # GI010000
|
||||
<U03BA> \xE4 # GK010000
|
||||
<U03BB> \xE5 # GL010000
|
||||
<U03BC> \xE6 # GM010000
|
||||
<U03BD> \xE7 # GN010000
|
||||
<U03BE> \xE8 # GX010000
|
||||
<U03BF> \xE9 # GO010000
|
||||
<U03C0> \xEA # GP010000
|
||||
<U03C1> \xEB # GR010000
|
||||
<U03C3> \xEC # GS010000
|
||||
<U03C2> \xED # GS610000
|
||||
<U03C4> \xEE # GT010000
|
||||
<U00B4> \xEF # SD110000
|
||||
<U00AD> \xF0 # SP320000
|
||||
<U00B1> \xF1 # SA020000
|
||||
<U03C5> \xF2 # GU010000
|
||||
<U03C6> \xF3 # GF010000
|
||||
<U03C7> \xF4 # GH010000
|
||||
<U00A7> \xF5 # SM240000
|
||||
<U03C8> \xF6 # GP610000
|
||||
<U00B8> \xF7 # SD410000
|
||||
<U00B0> \xF8 # SM190000
|
||||
<U00A8> \xF9 # SD170000
|
||||
<U03C9> \xFA # GO310000
|
||||
<U03CB> \xFB # GU170000
|
||||
<U03B0> \xFC # GU730000
|
||||
<U03CE> \xFD # GO710000
|
||||
<U25A0> \xFE # SM470000
|
||||
<U00A0> \xFF # SP300000
|
||||
#
|
||||
END CHARMAP
|
||||
#
|
||||
#________________________________________________________________________
|
315
icu4c/source/data/mappings/lmb-excp.ucm
Normal file
315
icu4c/source/data/mappings/lmb-excp.ucm
Normal file
|
@ -0,0 +1,315 @@
|
|||
# *******************************************************************************
|
||||
# *
|
||||
# * Copyright (C) 1995-2000, International Business Machines
|
||||
# * Corporation and others. All Rights Reserved.
|
||||
# *
|
||||
# *******************************************************************************
|
||||
#
|
||||
# File created on Thu Feb 10 11:47:54 2000
|
||||
#
|
||||
# File created manually from source file LMBCS.ALL
|
||||
#
|
||||
# Table Version : 1.00
|
||||
#
|
||||
<code_set_name> "lmb-excp"
|
||||
<char_name_mask> "AXXXX"
|
||||
<mb_cur_max> 2
|
||||
<mb_cur_min> 1
|
||||
<uconv_class> "MBCS"
|
||||
<subchar> \x3F
|
||||
#
|
||||
CHARMAP
|
||||
#
|
||||
#
|
||||
#ISO 10646 LMBCS
|
||||
#_________ _________
|
||||
<U0027> \x01\x27
|
||||
<U005E> \x01\x23
|
||||
<U005E> \x01\x33
|
||||
<U005E> \x01\x6D
|
||||
<U0060> \x01\x24
|
||||
<U0060> \x01\x34
|
||||
<U007E> \x01\x21
|
||||
<U007E> \x01\x31
|
||||
<U007E> \x01\x6C
|
||||
<U00A0> \x01\x3B
|
||||
<U00A7> \x01\x15
|
||||
<U00A8> \x01\x20
|
||||
<U00A8> \x01\x30
|
||||
<U00AF> \x01\x67
|
||||
<U00B4> \x01\x25
|
||||
<U00B4> \x01\x35
|
||||
<U00B6> \x01\x14
|
||||
<U0100> \x06\x2E
|
||||
<U0101> \x06\x01
|
||||
<U0108> \x06\x02
|
||||
<U0109> \x06\x03
|
||||
<U010A> \x06\x04
|
||||
<U010B> \x06\x05
|
||||
<U0112> \x06\x06
|
||||
<U0113> \x06\x07
|
||||
<U0116> \x06\x08
|
||||
<U0117> \x06\x09
|
||||
<U011C> \x06\x0A
|
||||
<U011D> \x06\x0B
|
||||
<U0120> \x06\x0C
|
||||
<U0121> \x06\x0D
|
||||
<U0122> \x06\x0E
|
||||
<U0123> \x06\x0F
|
||||
<U0124> \x06\x10
|
||||
<U0125> \x06\x11
|
||||
<U0126> \x01\x72
|
||||
<U0127> \x01\x73
|
||||
<U0128> \x06\x12
|
||||
<U0129> \x06\x13
|
||||
<U012A> \x06\x14
|
||||
<U012B> \x06\x15
|
||||
<U012E> \x06\x16
|
||||
<U012F> \x06\x17
|
||||
<U0132> \x01\x61
|
||||
<U0133> \x01\x60
|
||||
<U0134> \x06\x18
|
||||
<U0135> \x06\x19
|
||||
<U0136> \x06\x1A
|
||||
<U0137> \x06\x1B
|
||||
<U0138> \x01\x7A
|
||||
<U013B> \x06\x1C
|
||||
<U013C> \x06\x1D
|
||||
<U013F> \x01\x66
|
||||
<U0140> \x01\x65
|
||||
<U0145> \x06\x1E
|
||||
<U0146> \x06\x1F
|
||||
<U0149> \x01\x64
|
||||
<U014A> \x01\x78
|
||||
<U014B> \x01\x79
|
||||
<U014C> \x06\x20
|
||||
<U014D> \x06\x21
|
||||
<U0152> \x01\x40
|
||||
<U0153> \x01\x41
|
||||
<U0156> \x06\x22
|
||||
<U0157> \x06\x23
|
||||
<U015C> \x06\x24
|
||||
<U015D> \x06\x25
|
||||
<U0166> \x01\x74
|
||||
<U0167> \x01\x75
|
||||
<U0168> \x06\x26
|
||||
<U0169> \x06\x27
|
||||
<U016A> \x06\x28
|
||||
<U016B> \x06\x29
|
||||
<U016C> \x06\x2A
|
||||
<U016D> \x06\x2B
|
||||
<U0172> \x06\x2C
|
||||
<U0173> \x06\x2D
|
||||
<U0178> \x01\x42
|
||||
<U02BC> \x02\x07
|
||||
<U02BD> \x02\x08
|
||||
<U02C7> \x01\x6B
|
||||
<U02D8> \x01\x68
|
||||
<U02D9> \x01\x43
|
||||
<U02DA> \x01\x22
|
||||
<U02DA> \x01\x32
|
||||
<U02DA> \x01\x44
|
||||
<U02DB> \x01\x6A
|
||||
<U02DD> \x01\x69
|
||||
<U037A> \x02\x01
|
||||
<U0384> \x02\x06
|
||||
<U0385> \x02\x02
|
||||
<U03AA> \x02\x03
|
||||
<U03AB> \x02\x04
|
||||
<U03C6> \x02\x6D
|
||||
<U2013> \x01\x29
|
||||
<U2014> \x01\x2A
|
||||
<U2015> \x02\x05
|
||||
<U2017> \x01\x39
|
||||
<U2018> \x01\x2B
|
||||
<U2019> \x01\x2C
|
||||
<U201A> \x01\x37
|
||||
<U201C> \x01\x26
|
||||
<U201D> \x01\x38
|
||||
<U201E> \x01\x36
|
||||
<U2020> \x01\x70
|
||||
<U2021> \x01\x71
|
||||
<U2022> \x01\x07
|
||||
<U2026> \x01\x28
|
||||
<U2030> \x02\x7A
|
||||
<U2032> \x02\x69
|
||||
<U2033> \x02\x6A
|
||||
<U2039> \x01\x2E
|
||||
<U203A> \x01\x2F
|
||||
<U203C> \x01\x13
|
||||
<U203E> \x02\x09
|
||||
<U2044> \x02\x78
|
||||
<U207F> \x02\x7C
|
||||
<U20A4> \x01\x7E
|
||||
<U20A7> \x01\x7F
|
||||
<U2111> \x02\x52
|
||||
<U2113> \x01\x77
|
||||
<U211C> \x02\x53
|
||||
<U2122> \x01\x76
|
||||
<U2126> \x01\x4E
|
||||
<U2135> \x02\x51
|
||||
<U215B> \x02\x16
|
||||
<U215C> \x02\x15
|
||||
<U215D> \x02\x14
|
||||
<U215E> \x02\x13
|
||||
<U2190> \x01\x1B
|
||||
<U2191> \x01\x18
|
||||
<U2192> \x01\x1A
|
||||
<U2193> \x01\x19
|
||||
<U2194> \x01\x1D
|
||||
<U2195> \x01\x12
|
||||
<U21A8> \x01\x17
|
||||
<U21D0> \x02\x1B
|
||||
<U21D1> \x02\x18
|
||||
<U21D2> \x02\x1A
|
||||
<U21D3> \x02\x19
|
||||
<U21D4> \x02\x1D
|
||||
<U21D5> \x02\x12
|
||||
<U2200> \x02\x66
|
||||
<U2201> \x02\x64
|
||||
<U2202> \x02\x50
|
||||
<U2203> \x02\x67
|
||||
<U2205> \x02\x7D
|
||||
<U2207> \x02\x41
|
||||
<U2208> \x02\x5C
|
||||
<U2209> \x02\x5D
|
||||
<U220B> \x02\x5B
|
||||
<U2219> \x02\x79
|
||||
<U221A> \x02\x7B
|
||||
<U221D> \x02\x6C
|
||||
<U221E> \x02\x6B
|
||||
<U221F> \x01\x1C
|
||||
<U2220> \x02\x40
|
||||
<U2229> \x02\x6F
|
||||
<U222A> \x02\x6E
|
||||
<U222B> \x02\x65
|
||||
<U2245> \x02\x71
|
||||
<U2248> \x02\x77
|
||||
<U2260> \x02\x76
|
||||
<U2261> \x02\x70
|
||||
<U2264> \x02\x73
|
||||
<U2265> \x02\x72
|
||||
<U2282> \x02\x7E
|
||||
<U2283> \x02\x7F
|
||||
<U2286> \x02\x5E
|
||||
<U2287> \x02\x5F
|
||||
<U2295> \x02\x61
|
||||
<U2297> \x02\x60
|
||||
<U22C0> \x02\x63
|
||||
<U2310> \x01\x7D
|
||||
<U2318> \x01\x4B
|
||||
<U2320> \x02\x74
|
||||
<U2321> \x02\x75
|
||||
<U2552> \x01\x55
|
||||
<U2553> \x01\x56
|
||||
<U2555> \x01\x5C
|
||||
<U2556> \x01\x5B
|
||||
<U2558> \x01\x54
|
||||
<U2559> \x01\x53
|
||||
<U255B> \x01\x5E
|
||||
<U255C> \x01\x5D
|
||||
<U255E> \x01\x46
|
||||
<U255F> \x01\x47
|
||||
<U2561> \x01\x59
|
||||
<U2562> \x01\x5A
|
||||
<U2564> \x01\x51
|
||||
<U2565> \x01\x52
|
||||
<U2567> \x01\x5F
|
||||
<U2568> \x01\x50
|
||||
<U256A> \x01\x58
|
||||
<U256B> \x01\x57
|
||||
<U258C> \x01\x48
|
||||
<U2590> \x01\x49
|
||||
<U25AC> \x01\x16
|
||||
<U25B2> \x01\x1E
|
||||
<U25BA> \x01\x10
|
||||
<U25BC> \x01\x1F
|
||||
<U25C4> \x01\x11
|
||||
<U25CA> \x01\x4A
|
||||
<U25CB> \x01\x09
|
||||
<U25D8> \x01\x08
|
||||
<U25D9> \x01\x0A
|
||||
<U263A> \x01\x01
|
||||
<U263B> \x01\x02
|
||||
<U263C> \x01\x0F
|
||||
<U2640> \x01\x0C
|
||||
<U2642> \x01\x0B
|
||||
<U2660> \x01\x06
|
||||
<U2663> \x01\x05
|
||||
<U2665> \x01\x03
|
||||
<U2666> \x01\x04
|
||||
<U266A> \x01\x0D
|
||||
<U266B> \x01\x0E
|
||||
<U2713> \x02\x62
|
||||
<UF862> \x02\x0E
|
||||
<UF863> \x02\x0F
|
||||
<UF864> \x02\x10
|
||||
<UF865> \x02\x11
|
||||
<UF866> \x02\x3F
|
||||
<UF867> \x02\x17
|
||||
<UF868> \x02\x1C
|
||||
<UF869> \x02\x1E
|
||||
<UF86A> \x02\x68
|
||||
<UF86B> \x02\x5A
|
||||
<UF86C> \x02\x59
|
||||
<UF86D> \x02\x58
|
||||
<UF86E> \x02\x57
|
||||
<UF86F> \x02\x56
|
||||
<UF870> \x02\x55
|
||||
<UF871> \x02\x54
|
||||
<UF872> \x02\x4F
|
||||
<UF873> \x02\x4E
|
||||
<UF874> \x02\x4D
|
||||
<UF875> \x02\x4C
|
||||
<UF876> \x02\x4B
|
||||
<UF877> \x02\x4A
|
||||
<UF878> \x02\x49
|
||||
<UF879> \x02\x48
|
||||
<UF87A> \x02\x47
|
||||
<UF87B> \x02\x46
|
||||
<UF87C> \x02\x45
|
||||
<UF87D> \x02\x44
|
||||
<UF87E> \x02\x43
|
||||
<UF87F> \x02\x42
|
||||
<UF880> \x02\x3E
|
||||
<UF881> \x02\x3D
|
||||
<UF882> \x02\x3C
|
||||
<UF883> \x02\x3B
|
||||
<UF884> \x02\x3A
|
||||
<UF885> \x02\x39
|
||||
<UF886> \x02\x38
|
||||
<UF887> \x02\x37
|
||||
<UF888> \x02\x36
|
||||
<UF889> \x02\x35
|
||||
<UF88A> \x02\x34
|
||||
<UF88B> \x02\x33
|
||||
<UF88C> \x02\x32
|
||||
<UF88D> \x02\x31
|
||||
<UF88E> \x02\x30
|
||||
<UF88F> \x02\x2F
|
||||
<UF890> \x02\x2E
|
||||
<UF891> \x02\x2D
|
||||
<UF892> \x02\x2C
|
||||
<UF893> \x02\x2B
|
||||
<UF894> \x02\x2A
|
||||
<UF895> \x02\x29
|
||||
<UF896> \x02\x28
|
||||
<UF897> \x02\x27
|
||||
<UF898> \x02\x26
|
||||
<UF899> \x02\x25
|
||||
<UF89A> \x02\x24
|
||||
<UF89B> \x02\x23
|
||||
<UF89C> \x02\x22
|
||||
<UF89D> \x02\x21
|
||||
<UF89E> \x02\x20
|
||||
<UF89F> \x02\x1F
|
||||
<UF8FB> \x01\x7C
|
||||
<UF8FC> \x01\x63
|
||||
<UF8FD> \x01\x62
|
||||
<UF8FE> \x01\x4D
|
||||
<UF8FF> \x01\x4C
|
||||
<UFFFD> \x01\x3D
|
||||
#
|
||||
END CHARMAP
|
||||
#
|
||||
#________________________________________________________________________
|
|
@ -30,6 +30,7 @@ void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
|
|||
void TestConverterTypesAndStarters(void);
|
||||
void TestAmbiguous(void);
|
||||
void TestUTF8(void);
|
||||
void TestLMBCS(void);
|
||||
void TestJitterbug255(void);
|
||||
|
||||
#define NEW_MAX_BUFFER 999
|
||||
|
@ -105,6 +106,7 @@ void addTestNewConvert(TestNode** root)
|
|||
addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
|
||||
addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
|
||||
addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
|
||||
addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
|
||||
addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
|
||||
}
|
||||
|
||||
|
@ -748,6 +750,68 @@ TestUTF8() {
|
|||
ucnv_close(cnv);
|
||||
}
|
||||
|
||||
void
|
||||
TestLMBCS() {
|
||||
/* test input */
|
||||
static const uint8_t in[]={
|
||||
0x61,
|
||||
0x01, 0x29,
|
||||
0x81,
|
||||
0xA0,
|
||||
0x0F, 0x27,
|
||||
0x0F, 0x91,
|
||||
0x14, 0x0a, 0x74,
|
||||
0x14, 0xF6, 0x02,
|
||||
0x10, 0x88, 0xA0
|
||||
};
|
||||
|
||||
/* expected test results */
|
||||
static const uint32_t results[]={
|
||||
/* number of bytes read, code point */
|
||||
1, 0x0061,
|
||||
2, 0x2013,
|
||||
1, 0x00FC,
|
||||
1, 0x00E1,
|
||||
2, 0x0007,
|
||||
2, 0x0091,
|
||||
3, 0x0a74,
|
||||
3, 0x0200,
|
||||
3, 0x5516
|
||||
|
||||
};
|
||||
|
||||
const char *s=(const char *)in, *s0, *limit=(const char *)in+sizeof(in);
|
||||
const uint32_t *r=results;
|
||||
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
uint32_t c;
|
||||
|
||||
UConverter *cnv=ucnv_open("LMBCS-1", &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while(s<limit) {
|
||||
s0=s;
|
||||
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
|
||||
break;
|
||||
} else if((uint32_t)(s-s0)!=*r || c!=*(r+1)) {
|
||||
log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
|
||||
c, (s-s0), *(r+1), *r);
|
||||
break;
|
||||
}
|
||||
r+=2;
|
||||
}
|
||||
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TestJitterbug255()
|
||||
{
|
||||
const char testBytes[] = { (char)0x95, (char)0xcf, (char)0x8a,
|
||||
|
|
|
@ -57,4 +57,5 @@ ibm-953.ucm ibm-955.ucm\
|
|||
ibm-37-s390.ucm\
|
||||
ibm-1140-s390.ucm ibm-1142-s390.ucm ibm-1143-s390.ucm ibm-1144-s390.ucm\
|
||||
ibm-1145-s390.ucm ibm-1146-s390.ucm ibm-1147-s390.ucm ibm-1148-s390.ucm\
|
||||
ibm-1149-s390.ucm ibm-1153-s390.ucm ibm-12712-s390.ucm ibm-16804-s390.ucm
|
||||
ibm-1149-s390.ucm ibm-1153-s390.ucm ibm-12712-s390.ucm ibm-16804-s390.ucm\
|
||||
lmb-excp.ucm ibm-851.ucm
|
||||
|
|
Loading…
Add table
Reference in a new issue