ICU-271 First-pass LMBCS support (Lotus Multi-byte Character set)

X-SVN-Rev: 1042
This commit is contained in:
Jim Snyder Grant 2000-03-31 16:53:09 +00:00
parent bd899dc9a3
commit 2dbef4e207
13 changed files with 2220 additions and 7 deletions

View file

@ -47,7 +47,19 @@ UTF16_PlatformEndian iso-10646-ucs-2 csUnicode utf16 utf-16 ibm-1200 ibm1200
UTF16_OppositeEndian
LATIN_1 iso-8859-1 iso_8859-1 ibm-819 ibm819 cp819 latin1 latin-1 ascii ascii-7 us-ascii 8859-1 csisolatin1 iso-ir-100 iso_8859-1:1978 #!!!!! There's whole lot of names for this - cp367 csASCII etc.
ISO_2022 iso-2022 2022 cp2022 iso2022 iso_2022
LMBCS
LMBCS-1 lmbcs
LMBCS-2
LMBCS-3
LMBCS-4
LMBCS-5
LMBCS-6
LMBCS-8
LMBCS-11
LMBCS-16
LMBCS-17
LMBCS-18
LMBCS-19
# Table-based
@ -76,10 +88,13 @@ ibm-1383 euc-cn euccn ibm-eucCN # China EUC
#ibm-1162 tis-620 cp874 windows-874 ms874 # Thai (w/ euro support) #what is the connection between this and the one below!!!
ibm-874 ibm-1161 #same as 1162 (w/o euro update) ***This is commented out in Helena's
lmb-excp # special exceptions list for LMBCS algorithm
# Platform codepages
ibm-437 ibm437 cp437 csPC8CodePage437 437 # PC US
# HSYS:
ibm-850 IBM850 cp850 850 csPC850Multilingual # PC latin1
ibm-851 IBM851 cp851 851 csPC851 # PC DOS Greek (no euro)
ibm-858 ibm858 cp858 # PC latin1 with Euro cp850 removed
ibm-9044 IBM852 852 csPCp852 cp852 # PC latin2 (w/ euro update) #where should the names go here or below - inconsistency!!!
ibm-852 # PC latin2 (w/o euro update)

285
icu4c/data/ibm-851.ucm Normal file
View file

@ -0,0 +1,285 @@
# ******************************************************************************
# *
# * Copyright (C) 1995-2000, International Business Machines
# * Corporation and others. All Rights Reserved.
# *
# ******************************************************************************
#
# File created on Fri Feb 11 14:11:00 2000
#
# File created manually
# from source files IBM-851.TXMAP100
#
# Table Version : 1.00
#
<code_set_name> "IBM-851"
<char_name_mask> "AXXXX"
<mb_cur_max> 1
<mb_cur_min> 1
<uconv_class> "SBCS"
<subchar> \x7F
#
CHARMAP
#
#
#ISO 10646 IBM-851
#_________ _________
<U0000> \x00 # ..NUL...
<U0001> \x01 # ..SOH...
<U0002> \x02 # ..STX...
<U0003> \x03 # ..ETX...
<U0004> \x04 # ..EOT...
<U0005> \x05 # ..ENQ...
<U0006> \x06 # ..ACK...
<U0007> \x07 # ..BEL...
<U0008> \x08 # ...BS...
<U0009> \x09 # ...HT...
<U000A> \x0A # ...LF...
<U000B> \x0B # ...VT...
<U000C> \x0C # ...FF...
<U000D> \x0D # ...CR...
<U000E> \x0E # .SO/LS1.
<U000F> \x0F # .SI/LS0.
<U0010> \x10 # ..DLE...
<U0011> \x11 # ..DC1...
<U0012> \x12 # ..DC2...
<U0013> \x13 # ..DC3...
<U0014> \x14 # ..DC4...
<U0015> \x15 # ..NAK...
<U0016> \x16 # ..SYN...
<U0017> \x17 # ..ETB...
<U0018> \x18 # ..CAN...
<U0019> \x19 # ...EM...
<U001C> \x1A # ..IFS...
<U001B> \x1B # ..ESC...
<U007F> \x1C # ..DEL...
<U001D> \x1D # ...GS...
<U001E> \x1E # ...RS...
<U001F> \x1F # ...US...
<U0020> \x20 # SP010000
<U0021> \x21 # SP020000
<U0022> \x22 # SP040000
<U0023> \x23 # SM010000
<U0024> \x24 # SC030000
<U0025> \x25 # SM020000
<U0026> \x26 # SM030000
<U0027> \x27 # SP050000
<U0028> \x28 # SP060000
<U0029> \x29 # SP070000
<U002A> \x2A # SM040000
<U002B> \x2B # SA010000
<U002C> \x2C # SP080000
<U002D> \x2D # SP100000
<U002E> \x2E # SP110000
<U002F> \x2F # SP120000
<U0030> \x30 # ND100000
<U0031> \x31 # ND010000
<U0032> \x32 # ND020000
<U0033> \x33 # ND030000
<U0034> \x34 # ND040000
<U0035> \x35 # ND050000
<U0036> \x36 # ND060000
<U0037> \x37 # ND070000
<U0038> \x38 # ND080000
<U0039> \x39 # ND090000
<U003A> \x3A # SP130000
<U003B> \x3B # SP140000
<U003C> \x3C # SA030000
<U003D> \x3D # SA040000
<U003E> \x3E # SA050000
<U003F> \x3F # SP150000
<U0040> \x40 # SM050000
<U0041> \x41 # LA020000
<U0042> \x42 # LB020000
<U0043> \x43 # LC020000
<U0044> \x44 # LD020000
<U0045> \x45 # LE020000
<U0046> \x46 # LF020000
<U0047> \x47 # LG020000
<U0048> \x48 # LH020000
<U0049> \x49 # LI020000
<U004A> \x4A # LJ020000
<U004B> \x4B # LK020000
<U004C> \x4C # LL020000
<U004D> \x4D # LM020000
<U004E> \x4E # LN020000
<U004F> \x4F # LO020000
<U0050> \x50 # LP020000
<U0051> \x51 # LQ020000
<U0052> \x52 # LR020000
<U0053> \x53 # LS020000
<U0054> \x54 # LT020000
<U0055> \x55 # LU020000
<U0056> \x56 # LV020000
<U0057> \x57 # LW020000
<U0058> \x58 # LX020000
<U0059> \x59 # LY020000
<U005A> \x5A # LZ020000
<U005B> \x5B # SM060000
<U005C> \x5C # SM070000
<U005D> \x5D # SM080000
<U005E> \x5E # SD150000
<U005F> \x5F # SP090000
<U0060> \x60 # SD130000
<U0061> \x61 # LA010000
<U0062> \x62 # LB010000
<U0063> \x63 # LC010000
<U0064> \x64 # LD010000
<U0065> \x65 # LE010000
<U0066> \x66 # LF010000
<U0067> \x67 # LG010000
<U0068> \x68 # LH010000
<U0069> \x69 # LI010000
<U006A> \x6A # LJ010000
<U006B> \x6B # LK010000
<U006C> \x6C # LL010000
<U006D> \x6D # LM010000
<U006E> \x6E # LN010000
<U006F> \x6F # LO010000
<U0070> \x70 # LP010000
<U0071> \x71 # LQ010000
<U0072> \x72 # LR010000
<U0073> \x73 # LS010000
<U0074> \x74 # LT010000
<U0075> \x75 # LU010000
<U0076> \x76 # LV010000
<U0077> \x77 # LW010000
<U0078> \x78 # LX010000
<U0079> \x79 # LY010000
<U007A> \x7A # LZ010000
<U007B> \x7B # SM110000
<U007C> \x7C # SM130000
<U007D> \x7D # SM140000
<U007E> \x7E # SD190000
<U001A> \x7F # ..SUB...
<U00C7> \x80 # LC420000
<U00FC> \x81 # LU170000
<U00E9> \x82 # LE110000
<U00E2> \x83 # LA150000
<U00E4> \x84 # LA170000
<U00E0> \x85 # LA130000
<U0386> \x86 # GA120000
<U00E7> \x87 # LC410000
<U00EA> \x88 # LE150000
<U00EB> \x89 # LE170000
<U00E8> \x8A # LE130000
<U00EF> \x8B # LI170000
<U00EE> \x8C # LI150000
<U0388> \x8D # GE120000
<U00C4> \x8E # LA180000
<U0389> \x8F # GE720000
<U038A> \x90 # GI120000
<U038C> \x92 # GO120000
<U00F4> \x93 # LO150000
<U00F6> \x94 # LO170000
<U038E> \x95 # GU120000
<U00FB> \x96 # LU150000
<U00F9> \x97 # LU130000
<U038F> \x98 # GO720000
<U00D6> \x99 # LO180000
<U00DC> \x9A # LU180000
<U03AC> \x9B # GA110000
<U00A3> \x9C # SC020000
<U03AD> \x9D # GE110000
<U03AE> \x9E # GE710000
<U03AF> \x9F # GI110000
<U03CA> \xA0 # GI170000
<U0390> \xA1 # GI730000
<U03CC> \xA2 # GO110000
<U03CD> \xA3 # GU110000
<U0391> \xA4 # GA020000
<U0392> \xA5 # GB020000
<U0393> \xA6 # GG020000
<U0394> \xA7 # GD020000
<U0395> \xA8 # GE020000
<U0396> \xA9 # GZ020000
<U0397> \xAA # GE320000
<U00BD> \xAB # NF010000
<U0398> \xAC # GT620000
<U0399> \xAD # GI020000
<U00AB> \xAE # SP170000
<U00BB> \xAF # SP180000
<U2591> \xB0 # SF140000
<U2592> \xB1 # SF150000
<U2593> \xB2 # SF160000
<U2502> \xB3 # SF110000
<U2524> \xB4 # SF090000
<U039A> \xB5 # GK020000
<U039B> \xB6 # GL020000
<U039C> \xB7 # GM020000
<U039D> \xB8 # GN020000
<U2563> \xB9 # SF230000
<U2551> \xBA # SF240000
<U2557> \xBB # SF250000
<U255D> \xBC # SF260000
<U039E> \xBD # GX020000
<U039F> \xBE # GO020000
<U2510> \xBF # SF030000
<U2514> \xC0 # SF020000
<U2534> \xC1 # SF070000
<U252C> \xC2 # SF060000
<U251C> \xC3 # SF080000
<U2500> \xC4 # SF100000
<U253C> \xC5 # SF050000
<U03A0> \xC6 # GP020000
<U03A1> \xC7 # GR020000
<U255A> \xC8 # SF380000
<U2554> \xC9 # SF390000
<U2569> \xCA # SF400000
<U2566> \xCB # SF410000
<U2560> \xCC # SF420000
<U2550> \xCD # SF430000
<U256C> \xCE # SF440000
<U03A3> \xCF # GS020000
<U03A4> \xD0 # GT020000
<U03A5> \xD1 # GU020000
<U03A6> \xD2 # GF020000
<U03A7> \xD3 # GH020000
<U03A8> \xD4 # GP620000
<U03A9> \xD5 # GO320000
<U03B1> \xD6 # GA010000
<U03B2> \xD7 # GB010000
<U03B3> \xD8 # GG010000
<U2518> \xD9 # SF040000
<U250C> \xDA # SF010000
<U2588> \xDB # SF610000
<U2584> \xDC # SF570000
<U03B4> \xDD # GD010000
<U03B5> \xDE # GE010000
<U2580> \xDF # SF600000
<U03B6> \xE0 # GZ010000
<U03B7> \xE1 # GE310000
<U03B8> \xE2 # GT610000
<U03B9> \xE3 # GI010000
<U03BA> \xE4 # GK010000
<U03BB> \xE5 # GL010000
<U03BC> \xE6 # GM010000
<U03BD> \xE7 # GN010000
<U03BE> \xE8 # GX010000
<U03BF> \xE9 # GO010000
<U03C0> \xEA # GP010000
<U03C1> \xEB # GR010000
<U03C3> \xEC # GS010000
<U03C2> \xED # GS610000
<U03C4> \xEE # GT010000
<U00B4> \xEF # SD110000
<U00AD> \xF0 # SP320000
<U00B1> \xF1 # SA020000
<U03C5> \xF2 # GU010000
<U03C6> \xF3 # GF010000
<U03C7> \xF4 # GH010000
<U00A7> \xF5 # SM240000
<U03C8> \xF6 # GP610000
<U00B8> \xF7 # SD410000
<U00B0> \xF8 # SM190000
<U00A8> \xF9 # SD170000
<U03C9> \xFA # GO310000
<U03CB> \xFB # GU170000
<U03B0> \xFC # GU730000
<U03CE> \xFD # GO710000
<U25A0> \xFE # SM470000
<U00A0> \xFF # SP300000
#
END CHARMAP
#
#________________________________________________________________________

315
icu4c/data/lmb-excp.ucm Normal file
View file

@ -0,0 +1,315 @@
# *******************************************************************************
# *
# * Copyright (C) 1995-2000, International Business Machines
# * Corporation and others. All Rights Reserved.
# *
# *******************************************************************************
#
# File created on Thu Feb 10 11:47:54 2000
#
# File created manually from source file LMBCS.ALL
#
# Table Version : 1.00
#
<code_set_name> "lmb-excp"
<char_name_mask> "AXXXX"
<mb_cur_max> 2
<mb_cur_min> 1
<uconv_class> "MBCS"
<subchar> \x3F
#
CHARMAP
#
#
#ISO 10646 LMBCS
#_________ _________
<U0027> \x01\x27
<U005E> \x01\x23
<U005E> \x01\x33
<U005E> \x01\x6D
<U0060> \x01\x24
<U0060> \x01\x34
<U007E> \x01\x21
<U007E> \x01\x31
<U007E> \x01\x6C
<U00A0> \x01\x3B
<U00A7> \x01\x15
<U00A8> \x01\x20
<U00A8> \x01\x30
<U00AF> \x01\x67
<U00B4> \x01\x25
<U00B4> \x01\x35
<U00B6> \x01\x14
<U0100> \x06\x2E
<U0101> \x06\x01
<U0108> \x06\x02
<U0109> \x06\x03
<U010A> \x06\x04
<U010B> \x06\x05
<U0112> \x06\x06
<U0113> \x06\x07
<U0116> \x06\x08
<U0117> \x06\x09
<U011C> \x06\x0A
<U011D> \x06\x0B
<U0120> \x06\x0C
<U0121> \x06\x0D
<U0122> \x06\x0E
<U0123> \x06\x0F
<U0124> \x06\x10
<U0125> \x06\x11
<U0126> \x01\x72
<U0127> \x01\x73
<U0128> \x06\x12
<U0129> \x06\x13
<U012A> \x06\x14
<U012B> \x06\x15
<U012E> \x06\x16
<U012F> \x06\x17
<U0132> \x01\x61
<U0133> \x01\x60
<U0134> \x06\x18
<U0135> \x06\x19
<U0136> \x06\x1A
<U0137> \x06\x1B
<U0138> \x01\x7A
<U013B> \x06\x1C
<U013C> \x06\x1D
<U013F> \x01\x66
<U0140> \x01\x65
<U0145> \x06\x1E
<U0146> \x06\x1F
<U0149> \x01\x64
<U014A> \x01\x78
<U014B> \x01\x79
<U014C> \x06\x20
<U014D> \x06\x21
<U0152> \x01\x40
<U0153> \x01\x41
<U0156> \x06\x22
<U0157> \x06\x23
<U015C> \x06\x24
<U015D> \x06\x25
<U0166> \x01\x74
<U0167> \x01\x75
<U0168> \x06\x26
<U0169> \x06\x27
<U016A> \x06\x28
<U016B> \x06\x29
<U016C> \x06\x2A
<U016D> \x06\x2B
<U0172> \x06\x2C
<U0173> \x06\x2D
<U0178> \x01\x42
<U02BC> \x02\x07
<U02BD> \x02\x08
<U02C7> \x01\x6B
<U02D8> \x01\x68
<U02D9> \x01\x43
<U02DA> \x01\x22
<U02DA> \x01\x32
<U02DA> \x01\x44
<U02DB> \x01\x6A
<U02DD> \x01\x69
<U037A> \x02\x01
<U0384> \x02\x06
<U0385> \x02\x02
<U03AA> \x02\x03
<U03AB> \x02\x04
<U03C6> \x02\x6D
<U2013> \x01\x29
<U2014> \x01\x2A
<U2015> \x02\x05
<U2017> \x01\x39
<U2018> \x01\x2B
<U2019> \x01\x2C
<U201A> \x01\x37
<U201C> \x01\x26
<U201D> \x01\x38
<U201E> \x01\x36
<U2020> \x01\x70
<U2021> \x01\x71
<U2022> \x01\x07
<U2026> \x01\x28
<U2030> \x02\x7A
<U2032> \x02\x69
<U2033> \x02\x6A
<U2039> \x01\x2E
<U203A> \x01\x2F
<U203C> \x01\x13
<U203E> \x02\x09
<U2044> \x02\x78
<U207F> \x02\x7C
<U20A4> \x01\x7E
<U20A7> \x01\x7F
<U2111> \x02\x52
<U2113> \x01\x77
<U211C> \x02\x53
<U2122> \x01\x76
<U2126> \x01\x4E
<U2135> \x02\x51
<U215B> \x02\x16
<U215C> \x02\x15
<U215D> \x02\x14
<U215E> \x02\x13
<U2190> \x01\x1B
<U2191> \x01\x18
<U2192> \x01\x1A
<U2193> \x01\x19
<U2194> \x01\x1D
<U2195> \x01\x12
<U21A8> \x01\x17
<U21D0> \x02\x1B
<U21D1> \x02\x18
<U21D2> \x02\x1A
<U21D3> \x02\x19
<U21D4> \x02\x1D
<U21D5> \x02\x12
<U2200> \x02\x66
<U2201> \x02\x64
<U2202> \x02\x50
<U2203> \x02\x67
<U2205> \x02\x7D
<U2207> \x02\x41
<U2208> \x02\x5C
<U2209> \x02\x5D
<U220B> \x02\x5B
<U2219> \x02\x79
<U221A> \x02\x7B
<U221D> \x02\x6C
<U221E> \x02\x6B
<U221F> \x01\x1C
<U2220> \x02\x40
<U2229> \x02\x6F
<U222A> \x02\x6E
<U222B> \x02\x65
<U2245> \x02\x71
<U2248> \x02\x77
<U2260> \x02\x76
<U2261> \x02\x70
<U2264> \x02\x73
<U2265> \x02\x72
<U2282> \x02\x7E
<U2283> \x02\x7F
<U2286> \x02\x5E
<U2287> \x02\x5F
<U2295> \x02\x61
<U2297> \x02\x60
<U22C0> \x02\x63
<U2310> \x01\x7D
<U2318> \x01\x4B
<U2320> \x02\x74
<U2321> \x02\x75
<U2552> \x01\x55
<U2553> \x01\x56
<U2555> \x01\x5C
<U2556> \x01\x5B
<U2558> \x01\x54
<U2559> \x01\x53
<U255B> \x01\x5E
<U255C> \x01\x5D
<U255E> \x01\x46
<U255F> \x01\x47
<U2561> \x01\x59
<U2562> \x01\x5A
<U2564> \x01\x51
<U2565> \x01\x52
<U2567> \x01\x5F
<U2568> \x01\x50
<U256A> \x01\x58
<U256B> \x01\x57
<U258C> \x01\x48
<U2590> \x01\x49
<U25AC> \x01\x16
<U25B2> \x01\x1E
<U25BA> \x01\x10
<U25BC> \x01\x1F
<U25C4> \x01\x11
<U25CA> \x01\x4A
<U25CB> \x01\x09
<U25D8> \x01\x08
<U25D9> \x01\x0A
<U263A> \x01\x01
<U263B> \x01\x02
<U263C> \x01\x0F
<U2640> \x01\x0C
<U2642> \x01\x0B
<U2660> \x01\x06
<U2663> \x01\x05
<U2665> \x01\x03
<U2666> \x01\x04
<U266A> \x01\x0D
<U266B> \x01\x0E
<U2713> \x02\x62
<UF862> \x02\x0E
<UF863> \x02\x0F
<UF864> \x02\x10
<UF865> \x02\x11
<UF866> \x02\x3F
<UF867> \x02\x17
<UF868> \x02\x1C
<UF869> \x02\x1E
<UF86A> \x02\x68
<UF86B> \x02\x5A
<UF86C> \x02\x59
<UF86D> \x02\x58
<UF86E> \x02\x57
<UF86F> \x02\x56
<UF870> \x02\x55
<UF871> \x02\x54
<UF872> \x02\x4F
<UF873> \x02\x4E
<UF874> \x02\x4D
<UF875> \x02\x4C
<UF876> \x02\x4B
<UF877> \x02\x4A
<UF878> \x02\x49
<UF879> \x02\x48
<UF87A> \x02\x47
<UF87B> \x02\x46
<UF87C> \x02\x45
<UF87D> \x02\x44
<UF87E> \x02\x43
<UF87F> \x02\x42
<UF880> \x02\x3E
<UF881> \x02\x3D
<UF882> \x02\x3C
<UF883> \x02\x3B
<UF884> \x02\x3A
<UF885> \x02\x39
<UF886> \x02\x38
<UF887> \x02\x37
<UF888> \x02\x36
<UF889> \x02\x35
<UF88A> \x02\x34
<UF88B> \x02\x33
<UF88C> \x02\x32
<UF88D> \x02\x31
<UF88E> \x02\x30
<UF88F> \x02\x2F
<UF890> \x02\x2E
<UF891> \x02\x2D
<UF892> \x02\x2C
<UF893> \x02\x2B
<UF894> \x02\x2A
<UF895> \x02\x29
<UF896> \x02\x28
<UF897> \x02\x27
<UF898> \x02\x26
<UF899> \x02\x25
<UF89A> \x02\x24
<UF89B> \x02\x23
<UF89C> \x02\x22
<UF89D> \x02\x21
<UF89E> \x02\x20
<UF89F> \x02\x1F
<UF8FB> \x01\x7C
<UF8FC> \x01\x63
<UF8FD> \x01\x62
<UF8FE> \x01\x4D
<UF8FF> \x01\x4C
<UFFFD> \x01\x3D
#
END CHARMAP
#
#________________________________________________________________________

View file

@ -231,6 +231,10 @@ SOURCE=.\ucnv_io.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_lmb.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_utf.c
# End Source File
# Begin Source File

View file

@ -35,7 +35,9 @@ static const UConverterSharedData *
converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
&_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data,
&_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_EBCDICStatefulData,
&_ISO2022Data
&_ISO2022Data,
&_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
&_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19
};
static struct {
@ -53,9 +55,22 @@ static struct {
{ "UTF16_PlatformEndian", UCNV_UTF16_LittleEndian },
{ "UTF16_OppositeEndian", UCNV_UTF16_BigEndian},
#endif
{ "ISO_2022", UCNV_ISO_2022 }
{ "ISO_2022", UCNV_ISO_2022 },
{ "LMBCS-1", UCNV_LMBCS_1 },
{ "LMBCS-2", UCNV_LMBCS_2 },
{ "LMBCS-3", UCNV_LMBCS_3 },
{ "LMBCS-4", UCNV_LMBCS_4 },
{ "LMBCS-5", UCNV_LMBCS_5 },
{ "LMBCS-6", UCNV_LMBCS_6 },
{ "LMBCS-8", UCNV_LMBCS_8 },
{ "LMBCS-11",UCNV_LMBCS_11 },
{ "LMBCS-16",UCNV_LMBCS_16 },
{ "LMBCS-17",UCNV_LMBCS_17 },
{ "LMBCS-18",UCNV_LMBCS_18 },
{ "LMBCS-19",UCNV_LMBCS_19 }
};
/*Takes an alias name gets an actual converter file name
*goes to disk and opens it.
*allocates the memory and returns a new UConverter object

View file

@ -194,7 +194,9 @@ struct UConverterImpl {
extern const UConverterSharedData
_SBCSData, _DBCSData, _MBCSData, _Latin1Data,
_UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData,
_ISO2022Data;
_ISO2022Data,
_LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
_LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19;
U_CDECL_END

View file

@ -0,0 +1,870 @@
/*
**********************************************************************
* Copyright (C) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_lmb.cpp
* encoding: US-ASCII
* tab size: 4 (not used)
* indentation:4
*
* created on: 2000feb09
* created by: Brendan Murray
*/
#include "unicode/utypes.h"
#include "cmemory.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
/* LMBCS -------------------------------------------------------------------- */
/* Group bytes, and things that look like group bytes, should always be 8-bits */
typedef uint8_t ulmbcs_grp_t;
/* Define some constants instead of using literals */
/* LMBCS groups */
#define ULMBCS_GRP_EXCEPT 0x00 /* placeholder index for 'oddballs' XY, where Y<0x80 */
#define ULMBCS_GRP_L1 0x01 /* Latin-1 */
#define ULMBCS_GRP_GR 0x02 /* Greek */
#define ULMBCS_GRP_HE 0x03 /* Hebrew */
#define ULMBCS_GRP_AR 0x04 /* Arabic */
#define ULMBCS_GRP_RU 0x05 /* Cyrillic */
#define ULMBCS_GRP_L2 0x06 /* Latin-2 */
#define ULMBCS_GRP_TR 0x08 /* Turkish */
#define ULMBCS_GRP_TH 0x0B /* Thai */
#define ULMBCS_GRP_CTRL 0x0F /* C0/C1 controls */
#define ULMBCS_GRP_JA 0x10 /* Japanese */
#define ULMBCS_GRP_KO 0x11 /* Korean */
#define ULMBCS_GRP_CN 0x12 /* Chinese PRC */
#define ULMBCS_GRP_TW 0x13 /* Chinese Taiwan */
#define ULMBCS_GRP_UNICODE 0x14 /* Unicode compatibility group */
#define ULMBCS_GRP_LAST 0x14 /* last LMBCS group that means anything */
/* some special values that can appear in place of optimization groups */
#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */
#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */
#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */
#define ULMBCS_123SYSTEMRANGE 0x19 /* Fixed control char for 1-2-3 file data: start system range name */
#define ULMBCS_DEFAULTOPTGROUP 0x1 /* default optimization group for LMBCS */
#define ULMBCS_DOUBLEOPTGROUP 0x10 /* start of double-byte optimization groups */
/* parts of LMBCS values, or ranges for LMBCS data */
#define ULMBCS_UNICOMPATZERO 0xF6 /* PUA range for Unicode chars containing LSB = 0 */
#define ULMBCS_CTRLOFFSET 0x20 /* Offset of control range in group 0x0F */
#define ULMBCS_C1START 0x80 /* Start of 'C1' upper ascii range in ANSI code pages */
#define ULMBCS_C0END 0x1F /* last of the 'C0' lower ascii contraol range in ANSI code pages */
#define ULMBCS_INVALIDCHAR 0xFFFF /* Invalid character value = convert failed */
/* special return values for FindLMBCSUniRange */
#define ULMBCS_AMBIGUOUS_SBCS 0x80 // could fit in more than one
// LMBCS sbcs native encoding (example: most accented latin)
#define ULMBCS_AMBIGUOUS_MBCS 0x81 // could fit in more than one
//LMBCS mbcs native encoding (example: Unihan)
/* macro to check compatibility of groups */
#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \
((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \
(xgroup) < ULMBCS_DOUBLEOPTGROUP) || \
(((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \
(xgroup) >= ULMBCS_DOUBLEOPTGROUP))
/* Max size for 1 LMBCS char */
#define ULMBCS_CHARSIZE_MAX 3
/* JSGTODO: what is ICU standard debug assertion method?
Invent an all-crash stop here, for now */
#if 1
#define MyAssert(b) {if (!(b)) {*(char *)0 = 1;}}
#else
#define MyAssert(b)
#endif
/* Map Optimization group byte to converter name. Note the following:
0x00 is dummy, and contains the name of the exceptions converter.
0x02 is currently unavailable: NLTC have been asked to provide.
0x0F and 0x14 are algorithmically calculated
0x09, 0x0A, 0x0D are data bytes (HT, LF, CR)
0x07, 0x0C and 0x0E are unused
*/
static const char * OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = {
/* 0x0000 */ "lmb-excp", /* No zero opt group: for non-standard entries */
/* 0x0001 */ "ibm-850",
/* 0x0002 */ "ibm-851",
/* 0x0003 */ "ibm-1255",
/* 0x0004 */ "ibm-1256",
/* 0x0005 */ "ibm-1251",
/* 0x0006 */ "ibm-852",
/* 0x0007 */ NULL, /* Unused */
/* 0x0008 */ "ibm-1254",
/* 0x0009 */ NULL, /* Control char HT */
/* 0x000A */ NULL, /* Control char LF */
/* 0x000B */ "ibm-874",
/* 0x000C */ NULL, /* Unused */
/* 0x000D */ NULL, /* Control char CR */
/* 0x000E */ NULL, /* Unused */
/* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */
/* 0x0010 */ "ibm-943",
/* 0x0011 */ "ibm-1361",
/* 0x0012 */ "ibm-950",
/* 0x0013 */ "ibm-1386"
/* The rest are null, including the 0x0014 Unicode compatibility region
and 0x0019, the 1-2-3 system range control char */
};
/* map UNICODE ranges to converter indexes (or special values) */
ulmbcs_grp_t FindLMBCSUniRange(UChar uniChar, UErrorCode* err);
struct _UniLMBCSGrpMap
{
UChar uniStartRange;
UChar uniEndRange;
ulmbcs_grp_t GrpType;
} UniLMBCSGrpMap[]
=
{
0x0001, 0x001F, ULMBCS_GRP_CTRL,
0x0080, 0x009F, ULMBCS_GRP_CTRL,
0x00A0, 0x0113, ULMBCS_AMBIGUOUS_SBCS,
0x0115, 0x0120, ULMBCS_AMBIGUOUS_SBCS,
0x0120, 0x012B, ULMBCS_GRP_EXCEPT,
0x012C, 0x01CD, ULMBCS_AMBIGUOUS_SBCS,
0x01CE, 0x01CE, ULMBCS_AMBIGUOUS_MBCS,
0x01CF, 0x1FFF, ULMBCS_AMBIGUOUS_SBCS,
0x2000, 0xFFFD, ULMBCS_AMBIGUOUS_MBCS,
0xFFFF, 0xFFFF,
};
ulmbcs_grp_t FindLMBCSUniRange(UChar uniChar, UErrorCode* err)
{
struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap;
while (uniChar > pTable->uniEndRange)
{
pTable++;
}
if (uniChar >= pTable->uniStartRange)
{
return pTable->GrpType;
}
if (pTable->uniStartRange == 0xFFFF)
{
*err = ULMBCS_INVALIDCHAR;
}
return ULMBCS_GRP_UNICODE;
}
#if 0
// JSGTODO (by Brendan?) some incomplete source data from Brendan to be integrated
0xFE30, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
0xFA2E, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
0xF8FF, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
0xD7FF, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
0xABFF, ULMBCS_GRP_KO, ULMBCS_FLAGS_UNICODE,
0x9FFF, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
0x31FF, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
0x318F, ULMBCS_GRP_CN, ULMBCS_FLAGS_CONTINUE,
0x3130, ULMBCS_GRP_KO, ULMBCS_FLAGS_UNICODE,
0x3100, ULMBCS_GRP_CN, ULMBCS_FLAGS_CONTINUE,
0x313F, ULMBCS_GRP_JA, ULMBCS_FLAGS_UNICODE,
0x2FFF, ULMBCS_GRP_JA, ULMBCS_FLAGS_CONTINUE,
0x2714, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
0x2000, ULMBCS_GRP_L1, ULMBCS_FLAGS_CONTINUE,
0x0E5C, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
0x0E00, ULMBCS_GRP_TH, ULMBCS_FLAGS_UNICODE,
0x06FF, ULMBCS_GRP_UNICODE, ULMBCS_FLAGS_UNICODE,
0x0600, ULMBCS_GRP_AR, ULMBCS_FLAGS_UNICODE,
0x0500, ULMBCS_GRP_HE, ULMBCS_FLAGS_UNICODE,
0x0400, ULMBCS_GRP_RU, ULMBCS_FLAGS_UNICODE,
0x0300, ULMBCS_GRP_GR, ULMBCS_FLAGS_UNICODE,
0x001F, ULMBCS_GRP_L1, ULMBCS_FLAGS_CONTINUE,
0x0000, ULMBCS_GRP_CTRL, ULMBCS_FLAGS_UNICODE
#endif
int LMBCSConversionWorker (
UConverterDataLMBCS * extraInfo, ulmbcs_grp_t group,
uint8_t * pStartLMBCS, UChar * pUniChar,
ulmbcs_grp_t * lastConverterIndex, bool_t * groups_tried,
UErrorCode* err);
int LMBCSConversionWorker (
UConverterDataLMBCS * extraInfo, ulmbcs_grp_t group,
uint8_t * pStartLMBCS, UChar * pUniChar,
ulmbcs_grp_t * lastConverterIndex, bool_t * groups_tried,
UErrorCode * err)
{
uint8_t * pLMBCS = pStartLMBCS;
UConverter * xcnv = extraInfo->OptGrpConverter[group];
uint8_t mbChar [ULMBCS_CHARSIZE_MAX];
uint8_t * pmbChar = mbChar;
bool_t isDoubleByteGroup = (group >= ULMBCS_DOUBLEOPTGROUP) ? TRUE : FALSE;
UErrorCode localErr = 0;
int bytesConverted =0;
MyAssert(xcnv);
MyAssert(group<ULMBCS_GRP_UNICODE);
ucnv_fromUnicode(xcnv, (char **)&pmbChar,(char *)mbChar+sizeof(mbChar),&pUniChar,pUniChar+1,NULL,TRUE,&localErr);
bytesConverted = pmbChar - mbChar;
pmbChar = mbChar;
/* most common failure mode is the sub-converter using the substitution char (0x7f for our converters)
*/
if (*mbChar == xcnv->subChar[0] || U_FAILURE(localErr) || !bytesConverted )
{
// JSGTODO: are there some local failure modes that ought to be bubbled up in some other way?
groups_tried[group] = TRUE;
return 0;
}
*lastConverterIndex = group;
/* All initial byte values in lower ascii range should have been caught by now,
except with the exception group.
Uncomment this assert to find them.
*/
// MyAssert((*pmbChar <= ULMBCS_C0END) || (*pmbChar >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
/* use converted data: first write 0, 1 or two group bytes */
if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group)
{
*pLMBCS++ = group;
if (bytesConverted == 1 && isDoubleByteGroup)
{
*pLMBCS++ = group;
}
}
/* then move over the converted data */
do
{
*pLMBCS++ = *pmbChar++;
}
while(--bytesConverted);
return (pLMBCS - pStartLMBCS);
}
/* Convert Unicode string to LMBCS */
void _LMBCSFromUnicode(UConverter* _this,
char** target,
const char* targetLimit,
const UChar** source,
const UChar* sourceLimit,
int32_t * offsets,
bool_t flush,
UErrorCode* err)
{
ulmbcs_grp_t lastConverterIndex = 0;
UChar uniChar;
uint8_t LMBCS[ULMBCS_CHARSIZE_MAX];
uint8_t * pLMBCS;
int bytes_written;
bool_t groups_tried[ULMBCS_GRP_LAST];
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
/* Arguments Check */
if (!err || U_FAILURE(*err))
{
return;
}
if (sourceLimit < *source)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
do
{
uniChar = *(*source)++;
bytes_written = 0;
pLMBCS = LMBCS;
/* single byte matches */
if (uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR ||
uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE ||
((uniChar >= ULMBCS_CTRLOFFSET) && (uniChar < ULMBCS_C1START)))
{
*pLMBCS++ = (uint8_t) uniChar;
bytes_written = 1;
}
if (!bytes_written)
{
/* Check by UNICODE range */
ulmbcs_grp_t group = FindLMBCSUniRange(uniChar,err);
if (group == ULMBCS_GRP_UNICODE)
{
/* encode into LMBCS Unicode range */
uint8_t LowCh = (uint8_t) (uniChar & 0x00FF);
uint8_t HighCh = (uint8_t)(uniChar >> 8);
*pLMBCS++ = ULMBCS_GRP_UNICODE;
if (LowCh == 0)
{
*pLMBCS++ = ULMBCS_UNICOMPATZERO;
*pLMBCS++ = HighCh;
}
else
{
*pLMBCS++ = HighCh;
*pLMBCS++ = LowCh;
}
bytes_written = pLMBCS - LMBCS;
}
else if (group == ULMBCS_GRP_CTRL)
{
/* Handle control characters here */
if (uniChar <= ULMBCS_C0END)
{
*pLMBCS++ = ULMBCS_GRP_CTRL;
*pLMBCS++ = ULMBCS_CTRLOFFSET + (uint8_t) uniChar;
}
else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET)
{
*pLMBCS++ = ULMBCS_GRP_CTRL;
*pLMBCS++ = (uint8_t) (uniChar & 0x00FF);
}
bytes_written = pLMBCS - LMBCS;
}
else if (group < ULMBCS_GRP_UNICODE)
{
/* a specific converter has been identified - use it */
bytes_written = LMBCSConversionWorker (
extraInfo, group, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried, err);
MyAssert(bytes_written); /* table should never return unusable group */
}
else /* the ambiguous group cases */
{
memset(groups_tried, 0, sizeof(groups_tried));
/* check for non-default optimization group */
if (extraInfo->OptGroup != 1
&& ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))
{
bytes_written = LMBCSConversionWorker (extraInfo,
extraInfo->OptGroup, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried, err);
}
/* check for locale optimization group */
if (!bytes_written
&& (extraInfo->localeConverterIndex)
&& (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex)))
{
bytes_written = LMBCSConversionWorker (extraInfo,
extraInfo->localeConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried, err);
}
/* check for last optimization group used for this string */
if (!bytes_written
&& (lastConverterIndex)
&& (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex)))
{
bytes_written = LMBCSConversionWorker (extraInfo,
lastConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried, err);
}
if (!bytes_written)
{
/* just check every matching converter */
ulmbcs_grp_t grp_start;
ulmbcs_grp_t grp_end;
ulmbcs_grp_t grp_ix;
grp_start = (group == ULMBCS_AMBIGUOUS_MBCS)
? ULMBCS_DOUBLEOPTGROUP
: ULMBCS_GRP_L1;
grp_end = (group == ULMBCS_AMBIGUOUS_MBCS)
? ULMBCS_GRP_LAST-1
: ULMBCS_GRP_TH;
for (grp_ix = grp_start;
grp_ix <= grp_end && !bytes_written;
grp_ix++)
{
if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix])
{
bytes_written = LMBCSConversionWorker (extraInfo,
grp_ix, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried, err);
}
}
/* a final conversion fallback for sbcs to the exceptions group */
if (!bytes_written && group == ULMBCS_AMBIGUOUS_SBCS)
{
bytes_written = LMBCSConversionWorker (extraInfo,
ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried, err);
}
/* all of our strategies failed. Fallback to Unicode. Consider adding these to table */
if (!bytes_written)
{
/* encode into LMBCS Unicode range */
uint8_t LowCh = (uint8_t) uniChar;
uint8_t HighCh = (uint8_t)(uniChar >> 8);
*pLMBCS++ = ULMBCS_GRP_UNICODE;
if (LowCh == 0)
{
*pLMBCS++ = ULMBCS_UNICOMPATZERO;
*pLMBCS++ = HighCh;
}
else
{
*pLMBCS++ = HighCh;
*pLMBCS++ = LowCh;
}
bytes_written = pLMBCS - LMBCS;
}
}
}
}
if (*target + bytes_written > targetLimit)
{
/* JSGTODO deal with buffer running out here */
}
/* now that we are sure it all fits, move it in */
for(pLMBCS = LMBCS; bytes_written--; *(*target)++ = *pLMBCS++)
{ };
}
while (*source<= sourceLimit &&
*target <= targetLimit &&
!U_FAILURE(*err));
/* JSGTODO Check the various exit conditions */
}
/* Return the Unicode representation for the current LMBCS character */
UChar _LMBCSGetNextUChar(UConverter* _this,
const char** source,
const char* sourceLimit,
UErrorCode* err)
{
uint8_t CurByte; // A byte from the input stream
UChar uniChar; // an output UNICODE char
UChar mbChar; // an intermediate multi-byte value (mbcs or LMBCS)
CompactShortArray *MyCArray = NULL;
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
ulmbcs_grp_t group = 0;
UConverter* cnv = 0;
/* Opt Group (or first data byte) */
CurByte = *((uint8_t *) (*source)++);
uniChar = 0;
// at entry of each if clause:
// 1. 'CurByte' points at the first byte of a LMBCS character
// 2. '*source'points to the next byte of the source stream after 'CurByte'
// the job of each if clause is:
// 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte)
// 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately
// First lets check the simple fixed values.
if (CurByte == 0 || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR ||
CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE ||
((CurByte >= ULMBCS_CTRLOFFSET) && (CurByte < ULMBCS_C1START)))
{
uniChar = CurByte;
}
else
if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */
{
if (*source >= sourceLimit)
{
*err = U_TRUNCATED_CHAR_FOUND;
}
else
{
uint8_t C0C1byte = *(*source)++;
uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte;
}
}
else
if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BE as is */
{
uint8_t HighCh, LowCh;
HighCh = *(*source)++; /* Big-endian Unicode in LMBCs compatibility group*/
LowCh = *(*source)++;
if (HighCh == ULMBCS_UNICOMPATZERO )
{
HighCh = LowCh;
LowCh = 0; /* zero-byte in LSB special character */
}
uniChar = (HighCh << 8) | LowCh;
}
else if (CurByte <= ULMBCS_CTRLOFFSET)
{
group = CurByte; /* group byte is in the source */
cnv = extraInfo->OptGrpConverter[group];
if (!cnv)
{
/* this is not a valid group byte - no converter*/
*err = U_INVALID_CHAR_FOUND;
}
else if (group >= ULMBCS_DOUBLEOPTGROUP) /* double byte conversion */
{
uint8_t HighCh, LowCh;
HighCh = *(*source)++;
LowCh = *(*source)++;
/* check for LMBCS doubled-group-byte case */
mbChar = (HighCh == group) ? LowCh : (HighCh<<8) | LowCh;
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
uniChar = (UChar) ucmp16_getu (MyCArray, mbChar);
}
else /* single byte conversion */
{
CurByte = *(*source)++;
if (CurByte >= ULMBCS_C1START)
{
uniChar = cnv->sharedData->table->sbcs.toUnicode[CurByte];
}
else
{
/* The non-optimizable oddballs where there is an explicit byte
* AND the second byte is not in the upper ascii range
*/
cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT];
/* Lookup value must include opt group */
mbChar = (UChar)(group << 8) | (UChar) CurByte;
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
uniChar = (UChar) ucmp16_getu(MyCArray, mbChar);
}
}
}
else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */
{
group = extraInfo->OptGroup;
cnv = extraInfo->OptGrpConverter[group];
if (group >= ULMBCS_DOUBLEOPTGROUP) /* double byte conversion */
{
uint8_t HighCh, LowCh;
// JSGTODO need to deal with case of single byte G1
// chars in mbcs groups
HighCh = CurByte;
LowCh = *(*source)++;
mbChar = (HighCh<<8) | LowCh;
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
uniChar = (UChar) ucmp16_getu (MyCArray, mbChar);
(*source) += sizeof(UChar);
}
else /* single byte conversion */
{
uniChar = cnv->sharedData->table->sbcs.toUnicode[CurByte];
}
}
else
{
#if DEBUG
// JSGTODO: assert here: we should never get here.
#endif
}
// JSGTODO: need to correctly deal with partial chars
return uniChar;
}
void _LMBCSToUnicodeWithOffsets(UConverter* _this,
UChar** target,
const UChar* targetLimit,
const char** source,
const char* sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode* err)
{
UChar uniChar; // an output UNICODE char
CompactShortArray *MyCArray = NULL;
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
ulmbcs_grp_t group = 0;
UConverter* cnv = 0;
const char * pStartLMBCS = *source;
if (!err || U_FAILURE(*err))
{
return;
}
if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source))
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
#if 0 // JSGTODOD - restore incomplete char handling
/* Have we arrived here from a prior conversion ending with a partial char?
The only possible configurations are:
1. mode contains the group byte of SBCS LMBCS char;
2. mode contains the group byte of MBCS LMBCS char
For both continue with next char in input buffer
3. mode contains group byte + 1st data byte of MBCS LMBCS char
Partially process & get the second data byte
4. mode contains both group bytes of double group-byte MBCS LMBCS char
Nuke contents after setting up converter & continue with buffer data
*/
if (_this->toUnicodeStatus)
{
mbChar = (UChar) _this->mode; /* Restore the previously calculated char */
_this->toUnicodeStatus = 0; /* Reset other fields*/
_this->invalidCharLength = 0;
/* Check if this is a partial MBCS char (fall through if SBCS) */
if (mbChar > 0xFF)
{
/* Select the correct converter */
group = (mbChar >> 8) & 0x00FF;
cnv = extraInfo->OptGrpConverter[group];
/* Pick up the converter table */
MyCArray = cnv->sharedData->table->mbcs.toUnicode;
/* Use only data byte: NULL if the character has pair of group-bytes */
if (mbChar & 0x00FF < ULMBCS_MAXGRPBYTE)
CurByte = 0;
else
CurByte = ((mbChar & 0x00FF) << 8);
/* Add the current char from the buffer */
CurByte |= *((uint8_t *) (*source)++);
goto continueWithPartialMBCSChar;
}
else
{
goto continueWithPartialChar;
}
}
#endif
/* Process from source to limit */
while (!*err && sourceLimit > *source && targetLimit > *target)
{
if(offsets)
{
*offsets = (*source) - pStartLMBCS;
}
uniChar = _LMBCSGetNextUChar(_this, source, sourceLimit, err);
// last step is always to move the new value into the buffer
if (U_SUCCESS(*err) && uniChar != missingUCharMarker)
{
// JSGTODO deal with missingUCharMarker case for error/info reporting.
*(*target)++ = uniChar;
if(offsets)
{
offsets++;
}
}
}
#if 0
// JSGTODO restore partial char handling
/* Check to see if we've fallen through because of a partial char */
if (*err == U_TRUNCATED_CHAR_FOUND)
{
_this->mode = mbChar; /* Save current partial char */
}
#endif
}
/* Convert LMBCS string to Unicode */
void _LMBCSToUnicode(UConverter* _this,
UChar** target,
const UChar* targetLimit,
const char** source,
const char* sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode* err)
{
_LMBCSToUnicodeWithOffsets(_this, target, targetLimit, source, sourceLimit, offsets, flush,err);
}
static void _LMBCSOpenWorker(UConverter* _this,
const char* name,
const char* locale,
UErrorCode* err,
ulmbcs_grp_t OptGroup
)
{
UConverterDataLMBCS * extraInfo = uprv_malloc (sizeof (UConverterDataLMBCS));
if(extraInfo != NULL)
{
ulmbcs_grp_t i;
ulmbcs_grp_t imax;
imax = sizeof(extraInfo->OptGrpConverter)/sizeof(extraInfo->OptGrpConverter[0]);
for (i=0; i < imax; i++)
{
extraInfo->OptGrpConverter[i] =
(OptGroupByteToCPName[i] != NULL) ?
ucnv_open(OptGroupByteToCPName[i], err) : NULL;
}
extraInfo->OptGroup = OptGroup;
/* JSGTODO: add LocaleConverterIndex logic here */
extraInfo->localeConverterIndex = 0;
}
else
{
*err = U_MEMORY_ALLOCATION_ERROR;
}
_this->extraInfo = extraInfo;
}
static void _LMBCSClose(UConverter * _this)
{
if (_this->extraInfo != NULL)
{
ulmbcs_grp_t Ix;
for (Ix=0; Ix < ULMBCS_GRP_UNICODE; Ix++)
{
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
if (extraInfo->OptGrpConverter[Ix] != NULL)
ucnv_close (extraInfo->OptGrpConverter[Ix]);
}
uprv_free (_this->extraInfo);
}
}
#define DEFINE_LMBCS_OPEN(n) \
static void _LMBCSOpen##n(UConverter* _this,const char* name,const char* locale,UErrorCode* err) \
{ _LMBCSOpenWorker(_this, name,locale, err, n);} \
DEFINE_LMBCS_OPEN(1)
DEFINE_LMBCS_OPEN(2)
DEFINE_LMBCS_OPEN(3)
DEFINE_LMBCS_OPEN(4)
DEFINE_LMBCS_OPEN(5)
DEFINE_LMBCS_OPEN(6)
DEFINE_LMBCS_OPEN(8)
DEFINE_LMBCS_OPEN(11)
DEFINE_LMBCS_OPEN(16)
DEFINE_LMBCS_OPEN(17)
DEFINE_LMBCS_OPEN(18)
DEFINE_LMBCS_OPEN(19)
#define DECLARE_LMBCS_DATA(n) \
static const UConverterImpl _LMBCSImpl##n={\
UCNV_LMBCS_##n,\
NULL,NULL,\
_LMBCSOpen##n,\
_LMBCSClose,\
NULL,\
_LMBCSToUnicode,\
_LMBCSToUnicodeWithOffsets,\
_LMBCSFromUnicode,\
NULL,\
_LMBCSGetNextUChar,\
NULL\
};\
extern const UConverterSharedData _LMBCSData##n={\
sizeof(UConverterSharedData), ~0,\
NULL, NULL, &_LMBCSImpl##n, "LMBCS_" ## #n,\
0, UCNV_IBM, UCNV_LMBCS_1, 1, 1,\
{ 0, 1, 0x3f, 0, 0, 0 }\
};
DECLARE_LMBCS_DATA(1)
DECLARE_LMBCS_DATA(2)
DECLARE_LMBCS_DATA(3)
DECLARE_LMBCS_DATA(4)
DECLARE_LMBCS_DATA(5)
DECLARE_LMBCS_DATA(6)
DECLARE_LMBCS_DATA(8)
DECLARE_LMBCS_DATA(11)
DECLARE_LMBCS_DATA(16)
DECLARE_LMBCS_DATA(17)
DECLARE_LMBCS_DATA(18)
DECLARE_LMBCS_DATA(19)

View file

@ -60,8 +60,24 @@ typedef enum {
UCNV_UTF16_LittleEndian = 6,
UCNV_EBCDIC_STATEFUL = 7,
UCNV_ISO_2022 = 8,
UCNV_LMBCS_1 = 9,
UCNV_LMBCS_2,
UCNV_LMBCS_3,
UCNV_LMBCS_4,
UCNV_LMBCS_5,
UCNV_LMBCS_6,
UCNV_LMBCS_8,
UCNV_LMBCS_11,
UCNV_LMBCS_16,
UCNV_LMBCS_17,
UCNV_LMBCS_18,
UCNV_LMBCS_19,
UCNV_LMBCS_LAST = UCNV_LMBCS_19,
/* Number of converter types for which we have conversion routines. */
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = UCNV_LMBCS_LAST+1
} UConverterType;
/* ### move the following typedef and array into implementation files! */
@ -256,6 +272,17 @@ typedef struct
}
UConverterDataISO2022;
typedef struct
{
UConverter *OptGrpConverter[0x20]; /* Converter per Opt. grp. */
uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */
uint8_t localeConverterIndex; /* reasonable locale match for index */
}
UConverterDataLMBCS;
#define CONVERTER_FILE_EXTENSION ".cnv"
#endif /* _UCNV_BLD */

View file

@ -47,7 +47,19 @@ UTF16_PlatformEndian iso-10646-ucs-2 csUnicode utf16 utf-16 ibm-1200 ibm1200
UTF16_OppositeEndian
LATIN_1 iso-8859-1 iso_8859-1 ibm-819 ibm819 cp819 latin1 latin-1 ascii ascii-7 us-ascii 8859-1 csisolatin1 iso-ir-100 iso_8859-1:1978 #!!!!! There's whole lot of names for this - cp367 csASCII etc.
ISO_2022 iso-2022 2022 cp2022 iso2022 iso_2022
LMBCS
LMBCS-1 lmbcs
LMBCS-2
LMBCS-3
LMBCS-4
LMBCS-5
LMBCS-6
LMBCS-8
LMBCS-11
LMBCS-16
LMBCS-17
LMBCS-18
LMBCS-19
# Table-based
@ -76,10 +88,13 @@ ibm-1383 euc-cn euccn ibm-eucCN # China EUC
#ibm-1162 tis-620 cp874 windows-874 ms874 # Thai (w/ euro support) #what is the connection between this and the one below!!!
ibm-874 ibm-1161 #same as 1162 (w/o euro update) ***This is commented out in Helena's
lmb-excp # special exceptions list for LMBCS algorithm
# Platform codepages
ibm-437 ibm437 cp437 csPC8CodePage437 437 # PC US
# HSYS:
ibm-850 IBM850 cp850 850 csPC850Multilingual # PC latin1
ibm-851 IBM851 cp851 851 csPC851 # PC DOS Greek (no euro)
ibm-858 ibm858 cp858 # PC latin1 with Euro cp850 removed
ibm-9044 IBM852 852 csPCp852 cp852 # PC latin2 (w/ euro update) #where should the names go here or below - inconsistency!!!
ibm-852 # PC latin2 (w/o euro update)

View file

@ -0,0 +1,285 @@
# ******************************************************************************
# *
# * Copyright (C) 1995-2000, International Business Machines
# * Corporation and others. All Rights Reserved.
# *
# ******************************************************************************
#
# File created on Fri Feb 11 14:11:00 2000
#
# File created manually
# from source files IBM-851.TXMAP100
#
# Table Version : 1.00
#
<code_set_name> "IBM-851"
<char_name_mask> "AXXXX"
<mb_cur_max> 1
<mb_cur_min> 1
<uconv_class> "SBCS"
<subchar> \x7F
#
CHARMAP
#
#
#ISO 10646 IBM-851
#_________ _________
<U0000> \x00 # ..NUL...
<U0001> \x01 # ..SOH...
<U0002> \x02 # ..STX...
<U0003> \x03 # ..ETX...
<U0004> \x04 # ..EOT...
<U0005> \x05 # ..ENQ...
<U0006> \x06 # ..ACK...
<U0007> \x07 # ..BEL...
<U0008> \x08 # ...BS...
<U0009> \x09 # ...HT...
<U000A> \x0A # ...LF...
<U000B> \x0B # ...VT...
<U000C> \x0C # ...FF...
<U000D> \x0D # ...CR...
<U000E> \x0E # .SO/LS1.
<U000F> \x0F # .SI/LS0.
<U0010> \x10 # ..DLE...
<U0011> \x11 # ..DC1...
<U0012> \x12 # ..DC2...
<U0013> \x13 # ..DC3...
<U0014> \x14 # ..DC4...
<U0015> \x15 # ..NAK...
<U0016> \x16 # ..SYN...
<U0017> \x17 # ..ETB...
<U0018> \x18 # ..CAN...
<U0019> \x19 # ...EM...
<U001C> \x1A # ..IFS...
<U001B> \x1B # ..ESC...
<U007F> \x1C # ..DEL...
<U001D> \x1D # ...GS...
<U001E> \x1E # ...RS...
<U001F> \x1F # ...US...
<U0020> \x20 # SP010000
<U0021> \x21 # SP020000
<U0022> \x22 # SP040000
<U0023> \x23 # SM010000
<U0024> \x24 # SC030000
<U0025> \x25 # SM020000
<U0026> \x26 # SM030000
<U0027> \x27 # SP050000
<U0028> \x28 # SP060000
<U0029> \x29 # SP070000
<U002A> \x2A # SM040000
<U002B> \x2B # SA010000
<U002C> \x2C # SP080000
<U002D> \x2D # SP100000
<U002E> \x2E # SP110000
<U002F> \x2F # SP120000
<U0030> \x30 # ND100000
<U0031> \x31 # ND010000
<U0032> \x32 # ND020000
<U0033> \x33 # ND030000
<U0034> \x34 # ND040000
<U0035> \x35 # ND050000
<U0036> \x36 # ND060000
<U0037> \x37 # ND070000
<U0038> \x38 # ND080000
<U0039> \x39 # ND090000
<U003A> \x3A # SP130000
<U003B> \x3B # SP140000
<U003C> \x3C # SA030000
<U003D> \x3D # SA040000
<U003E> \x3E # SA050000
<U003F> \x3F # SP150000
<U0040> \x40 # SM050000
<U0041> \x41 # LA020000
<U0042> \x42 # LB020000
<U0043> \x43 # LC020000
<U0044> \x44 # LD020000
<U0045> \x45 # LE020000
<U0046> \x46 # LF020000
<U0047> \x47 # LG020000
<U0048> \x48 # LH020000
<U0049> \x49 # LI020000
<U004A> \x4A # LJ020000
<U004B> \x4B # LK020000
<U004C> \x4C # LL020000
<U004D> \x4D # LM020000
<U004E> \x4E # LN020000
<U004F> \x4F # LO020000
<U0050> \x50 # LP020000
<U0051> \x51 # LQ020000
<U0052> \x52 # LR020000
<U0053> \x53 # LS020000
<U0054> \x54 # LT020000
<U0055> \x55 # LU020000
<U0056> \x56 # LV020000
<U0057> \x57 # LW020000
<U0058> \x58 # LX020000
<U0059> \x59 # LY020000
<U005A> \x5A # LZ020000
<U005B> \x5B # SM060000
<U005C> \x5C # SM070000
<U005D> \x5D # SM080000
<U005E> \x5E # SD150000
<U005F> \x5F # SP090000
<U0060> \x60 # SD130000
<U0061> \x61 # LA010000
<U0062> \x62 # LB010000
<U0063> \x63 # LC010000
<U0064> \x64 # LD010000
<U0065> \x65 # LE010000
<U0066> \x66 # LF010000
<U0067> \x67 # LG010000
<U0068> \x68 # LH010000
<U0069> \x69 # LI010000
<U006A> \x6A # LJ010000
<U006B> \x6B # LK010000
<U006C> \x6C # LL010000
<U006D> \x6D # LM010000
<U006E> \x6E # LN010000
<U006F> \x6F # LO010000
<U0070> \x70 # LP010000
<U0071> \x71 # LQ010000
<U0072> \x72 # LR010000
<U0073> \x73 # LS010000
<U0074> \x74 # LT010000
<U0075> \x75 # LU010000
<U0076> \x76 # LV010000
<U0077> \x77 # LW010000
<U0078> \x78 # LX010000
<U0079> \x79 # LY010000
<U007A> \x7A # LZ010000
<U007B> \x7B # SM110000
<U007C> \x7C # SM130000
<U007D> \x7D # SM140000
<U007E> \x7E # SD190000
<U001A> \x7F # ..SUB...
<U00C7> \x80 # LC420000
<U00FC> \x81 # LU170000
<U00E9> \x82 # LE110000
<U00E2> \x83 # LA150000
<U00E4> \x84 # LA170000
<U00E0> \x85 # LA130000
<U0386> \x86 # GA120000
<U00E7> \x87 # LC410000
<U00EA> \x88 # LE150000
<U00EB> \x89 # LE170000
<U00E8> \x8A # LE130000
<U00EF> \x8B # LI170000
<U00EE> \x8C # LI150000
<U0388> \x8D # GE120000
<U00C4> \x8E # LA180000
<U0389> \x8F # GE720000
<U038A> \x90 # GI120000
<U038C> \x92 # GO120000
<U00F4> \x93 # LO150000
<U00F6> \x94 # LO170000
<U038E> \x95 # GU120000
<U00FB> \x96 # LU150000
<U00F9> \x97 # LU130000
<U038F> \x98 # GO720000
<U00D6> \x99 # LO180000
<U00DC> \x9A # LU180000
<U03AC> \x9B # GA110000
<U00A3> \x9C # SC020000
<U03AD> \x9D # GE110000
<U03AE> \x9E # GE710000
<U03AF> \x9F # GI110000
<U03CA> \xA0 # GI170000
<U0390> \xA1 # GI730000
<U03CC> \xA2 # GO110000
<U03CD> \xA3 # GU110000
<U0391> \xA4 # GA020000
<U0392> \xA5 # GB020000
<U0393> \xA6 # GG020000
<U0394> \xA7 # GD020000
<U0395> \xA8 # GE020000
<U0396> \xA9 # GZ020000
<U0397> \xAA # GE320000
<U00BD> \xAB # NF010000
<U0398> \xAC # GT620000
<U0399> \xAD # GI020000
<U00AB> \xAE # SP170000
<U00BB> \xAF # SP180000
<U2591> \xB0 # SF140000
<U2592> \xB1 # SF150000
<U2593> \xB2 # SF160000
<U2502> \xB3 # SF110000
<U2524> \xB4 # SF090000
<U039A> \xB5 # GK020000
<U039B> \xB6 # GL020000
<U039C> \xB7 # GM020000
<U039D> \xB8 # GN020000
<U2563> \xB9 # SF230000
<U2551> \xBA # SF240000
<U2557> \xBB # SF250000
<U255D> \xBC # SF260000
<U039E> \xBD # GX020000
<U039F> \xBE # GO020000
<U2510> \xBF # SF030000
<U2514> \xC0 # SF020000
<U2534> \xC1 # SF070000
<U252C> \xC2 # SF060000
<U251C> \xC3 # SF080000
<U2500> \xC4 # SF100000
<U253C> \xC5 # SF050000
<U03A0> \xC6 # GP020000
<U03A1> \xC7 # GR020000
<U255A> \xC8 # SF380000
<U2554> \xC9 # SF390000
<U2569> \xCA # SF400000
<U2566> \xCB # SF410000
<U2560> \xCC # SF420000
<U2550> \xCD # SF430000
<U256C> \xCE # SF440000
<U03A3> \xCF # GS020000
<U03A4> \xD0 # GT020000
<U03A5> \xD1 # GU020000
<U03A6> \xD2 # GF020000
<U03A7> \xD3 # GH020000
<U03A8> \xD4 # GP620000
<U03A9> \xD5 # GO320000
<U03B1> \xD6 # GA010000
<U03B2> \xD7 # GB010000
<U03B3> \xD8 # GG010000
<U2518> \xD9 # SF040000
<U250C> \xDA # SF010000
<U2588> \xDB # SF610000
<U2584> \xDC # SF570000
<U03B4> \xDD # GD010000
<U03B5> \xDE # GE010000
<U2580> \xDF # SF600000
<U03B6> \xE0 # GZ010000
<U03B7> \xE1 # GE310000
<U03B8> \xE2 # GT610000
<U03B9> \xE3 # GI010000
<U03BA> \xE4 # GK010000
<U03BB> \xE5 # GL010000
<U03BC> \xE6 # GM010000
<U03BD> \xE7 # GN010000
<U03BE> \xE8 # GX010000
<U03BF> \xE9 # GO010000
<U03C0> \xEA # GP010000
<U03C1> \xEB # GR010000
<U03C3> \xEC # GS010000
<U03C2> \xED # GS610000
<U03C4> \xEE # GT010000
<U00B4> \xEF # SD110000
<U00AD> \xF0 # SP320000
<U00B1> \xF1 # SA020000
<U03C5> \xF2 # GU010000
<U03C6> \xF3 # GF010000
<U03C7> \xF4 # GH010000
<U00A7> \xF5 # SM240000
<U03C8> \xF6 # GP610000
<U00B8> \xF7 # SD410000
<U00B0> \xF8 # SM190000
<U00A8> \xF9 # SD170000
<U03C9> \xFA # GO310000
<U03CB> \xFB # GU170000
<U03B0> \xFC # GU730000
<U03CE> \xFD # GO710000
<U25A0> \xFE # SM470000
<U00A0> \xFF # SP300000
#
END CHARMAP
#
#________________________________________________________________________

View file

@ -0,0 +1,315 @@
# *******************************************************************************
# *
# * Copyright (C) 1995-2000, International Business Machines
# * Corporation and others. All Rights Reserved.
# *
# *******************************************************************************
#
# File created on Thu Feb 10 11:47:54 2000
#
# File created manually from source file LMBCS.ALL
#
# Table Version : 1.00
#
<code_set_name> "lmb-excp"
<char_name_mask> "AXXXX"
<mb_cur_max> 2
<mb_cur_min> 1
<uconv_class> "MBCS"
<subchar> \x3F
#
CHARMAP
#
#
#ISO 10646 LMBCS
#_________ _________
<U0027> \x01\x27
<U005E> \x01\x23
<U005E> \x01\x33
<U005E> \x01\x6D
<U0060> \x01\x24
<U0060> \x01\x34
<U007E> \x01\x21
<U007E> \x01\x31
<U007E> \x01\x6C
<U00A0> \x01\x3B
<U00A7> \x01\x15
<U00A8> \x01\x20
<U00A8> \x01\x30
<U00AF> \x01\x67
<U00B4> \x01\x25
<U00B4> \x01\x35
<U00B6> \x01\x14
<U0100> \x06\x2E
<U0101> \x06\x01
<U0108> \x06\x02
<U0109> \x06\x03
<U010A> \x06\x04
<U010B> \x06\x05
<U0112> \x06\x06
<U0113> \x06\x07
<U0116> \x06\x08
<U0117> \x06\x09
<U011C> \x06\x0A
<U011D> \x06\x0B
<U0120> \x06\x0C
<U0121> \x06\x0D
<U0122> \x06\x0E
<U0123> \x06\x0F
<U0124> \x06\x10
<U0125> \x06\x11
<U0126> \x01\x72
<U0127> \x01\x73
<U0128> \x06\x12
<U0129> \x06\x13
<U012A> \x06\x14
<U012B> \x06\x15
<U012E> \x06\x16
<U012F> \x06\x17
<U0132> \x01\x61
<U0133> \x01\x60
<U0134> \x06\x18
<U0135> \x06\x19
<U0136> \x06\x1A
<U0137> \x06\x1B
<U0138> \x01\x7A
<U013B> \x06\x1C
<U013C> \x06\x1D
<U013F> \x01\x66
<U0140> \x01\x65
<U0145> \x06\x1E
<U0146> \x06\x1F
<U0149> \x01\x64
<U014A> \x01\x78
<U014B> \x01\x79
<U014C> \x06\x20
<U014D> \x06\x21
<U0152> \x01\x40
<U0153> \x01\x41
<U0156> \x06\x22
<U0157> \x06\x23
<U015C> \x06\x24
<U015D> \x06\x25
<U0166> \x01\x74
<U0167> \x01\x75
<U0168> \x06\x26
<U0169> \x06\x27
<U016A> \x06\x28
<U016B> \x06\x29
<U016C> \x06\x2A
<U016D> \x06\x2B
<U0172> \x06\x2C
<U0173> \x06\x2D
<U0178> \x01\x42
<U02BC> \x02\x07
<U02BD> \x02\x08
<U02C7> \x01\x6B
<U02D8> \x01\x68
<U02D9> \x01\x43
<U02DA> \x01\x22
<U02DA> \x01\x32
<U02DA> \x01\x44
<U02DB> \x01\x6A
<U02DD> \x01\x69
<U037A> \x02\x01
<U0384> \x02\x06
<U0385> \x02\x02
<U03AA> \x02\x03
<U03AB> \x02\x04
<U03C6> \x02\x6D
<U2013> \x01\x29
<U2014> \x01\x2A
<U2015> \x02\x05
<U2017> \x01\x39
<U2018> \x01\x2B
<U2019> \x01\x2C
<U201A> \x01\x37
<U201C> \x01\x26
<U201D> \x01\x38
<U201E> \x01\x36
<U2020> \x01\x70
<U2021> \x01\x71
<U2022> \x01\x07
<U2026> \x01\x28
<U2030> \x02\x7A
<U2032> \x02\x69
<U2033> \x02\x6A
<U2039> \x01\x2E
<U203A> \x01\x2F
<U203C> \x01\x13
<U203E> \x02\x09
<U2044> \x02\x78
<U207F> \x02\x7C
<U20A4> \x01\x7E
<U20A7> \x01\x7F
<U2111> \x02\x52
<U2113> \x01\x77
<U211C> \x02\x53
<U2122> \x01\x76
<U2126> \x01\x4E
<U2135> \x02\x51
<U215B> \x02\x16
<U215C> \x02\x15
<U215D> \x02\x14
<U215E> \x02\x13
<U2190> \x01\x1B
<U2191> \x01\x18
<U2192> \x01\x1A
<U2193> \x01\x19
<U2194> \x01\x1D
<U2195> \x01\x12
<U21A8> \x01\x17
<U21D0> \x02\x1B
<U21D1> \x02\x18
<U21D2> \x02\x1A
<U21D3> \x02\x19
<U21D4> \x02\x1D
<U21D5> \x02\x12
<U2200> \x02\x66
<U2201> \x02\x64
<U2202> \x02\x50
<U2203> \x02\x67
<U2205> \x02\x7D
<U2207> \x02\x41
<U2208> \x02\x5C
<U2209> \x02\x5D
<U220B> \x02\x5B
<U2219> \x02\x79
<U221A> \x02\x7B
<U221D> \x02\x6C
<U221E> \x02\x6B
<U221F> \x01\x1C
<U2220> \x02\x40
<U2229> \x02\x6F
<U222A> \x02\x6E
<U222B> \x02\x65
<U2245> \x02\x71
<U2248> \x02\x77
<U2260> \x02\x76
<U2261> \x02\x70
<U2264> \x02\x73
<U2265> \x02\x72
<U2282> \x02\x7E
<U2283> \x02\x7F
<U2286> \x02\x5E
<U2287> \x02\x5F
<U2295> \x02\x61
<U2297> \x02\x60
<U22C0> \x02\x63
<U2310> \x01\x7D
<U2318> \x01\x4B
<U2320> \x02\x74
<U2321> \x02\x75
<U2552> \x01\x55
<U2553> \x01\x56
<U2555> \x01\x5C
<U2556> \x01\x5B
<U2558> \x01\x54
<U2559> \x01\x53
<U255B> \x01\x5E
<U255C> \x01\x5D
<U255E> \x01\x46
<U255F> \x01\x47
<U2561> \x01\x59
<U2562> \x01\x5A
<U2564> \x01\x51
<U2565> \x01\x52
<U2567> \x01\x5F
<U2568> \x01\x50
<U256A> \x01\x58
<U256B> \x01\x57
<U258C> \x01\x48
<U2590> \x01\x49
<U25AC> \x01\x16
<U25B2> \x01\x1E
<U25BA> \x01\x10
<U25BC> \x01\x1F
<U25C4> \x01\x11
<U25CA> \x01\x4A
<U25CB> \x01\x09
<U25D8> \x01\x08
<U25D9> \x01\x0A
<U263A> \x01\x01
<U263B> \x01\x02
<U263C> \x01\x0F
<U2640> \x01\x0C
<U2642> \x01\x0B
<U2660> \x01\x06
<U2663> \x01\x05
<U2665> \x01\x03
<U2666> \x01\x04
<U266A> \x01\x0D
<U266B> \x01\x0E
<U2713> \x02\x62
<UF862> \x02\x0E
<UF863> \x02\x0F
<UF864> \x02\x10
<UF865> \x02\x11
<UF866> \x02\x3F
<UF867> \x02\x17
<UF868> \x02\x1C
<UF869> \x02\x1E
<UF86A> \x02\x68
<UF86B> \x02\x5A
<UF86C> \x02\x59
<UF86D> \x02\x58
<UF86E> \x02\x57
<UF86F> \x02\x56
<UF870> \x02\x55
<UF871> \x02\x54
<UF872> \x02\x4F
<UF873> \x02\x4E
<UF874> \x02\x4D
<UF875> \x02\x4C
<UF876> \x02\x4B
<UF877> \x02\x4A
<UF878> \x02\x49
<UF879> \x02\x48
<UF87A> \x02\x47
<UF87B> \x02\x46
<UF87C> \x02\x45
<UF87D> \x02\x44
<UF87E> \x02\x43
<UF87F> \x02\x42
<UF880> \x02\x3E
<UF881> \x02\x3D
<UF882> \x02\x3C
<UF883> \x02\x3B
<UF884> \x02\x3A
<UF885> \x02\x39
<UF886> \x02\x38
<UF887> \x02\x37
<UF888> \x02\x36
<UF889> \x02\x35
<UF88A> \x02\x34
<UF88B> \x02\x33
<UF88C> \x02\x32
<UF88D> \x02\x31
<UF88E> \x02\x30
<UF88F> \x02\x2F
<UF890> \x02\x2E
<UF891> \x02\x2D
<UF892> \x02\x2C
<UF893> \x02\x2B
<UF894> \x02\x2A
<UF895> \x02\x29
<UF896> \x02\x28
<UF897> \x02\x27
<UF898> \x02\x26
<UF899> \x02\x25
<UF89A> \x02\x24
<UF89B> \x02\x23
<UF89C> \x02\x22
<UF89D> \x02\x21
<UF89E> \x02\x20
<UF89F> \x02\x1F
<UF8FB> \x01\x7C
<UF8FC> \x01\x63
<UF8FD> \x01\x62
<UF8FE> \x01\x4D
<UF8FF> \x01\x4C
<UFFFD> \x01\x3D
#
END CHARMAP
#
#________________________________________________________________________

View file

@ -30,6 +30,7 @@ void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
void TestConverterTypesAndStarters(void);
void TestAmbiguous(void);
void TestUTF8(void);
void TestLMBCS(void);
void TestJitterbug255(void);
#define NEW_MAX_BUFFER 999
@ -105,6 +106,7 @@ void addTestNewConvert(TestNode** root)
addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
}
@ -748,6 +750,68 @@ TestUTF8() {
ucnv_close(cnv);
}
void
TestLMBCS() {
/* test input */
static const uint8_t in[]={
0x61,
0x01, 0x29,
0x81,
0xA0,
0x0F, 0x27,
0x0F, 0x91,
0x14, 0x0a, 0x74,
0x14, 0xF6, 0x02,
0x10, 0x88, 0xA0
};
/* expected test results */
static const uint32_t results[]={
/* number of bytes read, code point */
1, 0x0061,
2, 0x2013,
1, 0x00FC,
1, 0x00E1,
2, 0x0007,
2, 0x0091,
3, 0x0a74,
3, 0x0200,
3, 0x5516
};
const char *s=(const char *)in, *s0, *limit=(const char *)in+sizeof(in);
const uint32_t *r=results;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t c;
UConverter *cnv=ucnv_open("LMBCS-1", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
}
else
{
while(s<limit) {
s0=s;
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
break;
} else if((uint32_t)(s-s0)!=*r || c!=*(r+1)) {
log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
c, (s-s0), *(r+1), *r);
break;
}
r+=2;
}
ucnv_close(cnv);
}
}
void TestJitterbug255()
{
const char testBytes[] = { (char)0x95, (char)0xcf, (char)0x8a,

View file

@ -57,4 +57,5 @@ ibm-953.ucm ibm-955.ucm\
ibm-37-s390.ucm\
ibm-1140-s390.ucm ibm-1142-s390.ucm ibm-1143-s390.ucm ibm-1144-s390.ucm\
ibm-1145-s390.ucm ibm-1146-s390.ucm ibm-1147-s390.ucm ibm-1148-s390.ucm\
ibm-1149-s390.ucm ibm-1153-s390.ucm ibm-12712-s390.ucm ibm-16804-s390.ucm
ibm-1149-s390.ucm ibm-1153-s390.ucm ibm-12712-s390.ucm ibm-16804-s390.ucm\
lmb-excp.ucm ibm-851.ucm