ICU-2536 Tag more Java and Windows aliases.

Move ksc5601 to a better mapping table
Move some aliases into a more logical order

X-SVN-Rev: 10831
This commit is contained in:
George Rhoten 2003-01-10 01:57:15 +00:00
parent 10394346b1
commit 701dc0c6d9
2 changed files with 232 additions and 209 deletions
icu4c/source
data/mappings
test/cintltst

View file

@ -82,8 +82,8 @@
{ UTR22 # Name format specified by http://www.unicode.org/unicode/reports/tr22/
# ICU # Can also use ICU_FEATURE
IBM # The IBM CCSID number is specified by ibm-*
# WINDOWS # The Microsoft code page identifier number is specified by cp*
JAVA # Source: Sun JDK. Preferred name must be an exact match. Alias name case is ignored, but dashes are not ignored.
WINDOWS # The Microsoft code page identifier number is specified by the number. The rest are recognized IE names.
JAVA # Source: Sun JDK. Alias name case is ignored, but dashes are not ignored.
# GLIBC
# AIX
# DB2
@ -100,7 +100,7 @@
# Fully algorithmic converters
UTF-8 { IANA* MIME* } ibm-1208 { IBM* }
UTF-8 { IANA* MIME* JAVA* } ibm-1208 { IBM* }
ibm-1209 { IBM }
ibm-5304 { IBM }
ibm-5305 { IBM }
@ -108,20 +108,21 @@ UTF-8 { IANA* MIME* } ibm-1208 { IBM* }
cp65001
# The ICU 2.2 UTF-16/32 converters detect and write a BOM.
UTF-16 { IANA* MIME* } ISO-10646-UCS-2 { IANA } csUnicode ucs-2
UTF-16BE { IANA* MIME* } UTF16_BigEndian
x-utf-16be
UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA } csUnicode ucs-2
UTF-16BE { IANA* MIME* JAVA* } x-utf-16be { JAVA }
# iso-10646-ucs-2 { JAVA } # This is ambiguous
ibm-1200 { IBM* }
ibm-1201 { IBM }
ibm-5297 { IBM }
ibm-13488 { IBM }
ibm-17584 { IBM }
UTF16_BigEndian
cp1201
UTF-16LE { IANA* MIME* } UTF16_LittleEndian
x-utf-16le
UTF-16LE { IANA* MIME* JAVA* } x-utf-16le { JAVA }
ibm-1202 { IBM* }
ibm-13490 { IBM }
ibm-17586 { IBM }
UTF16_LittleEndian
cp1200
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4
@ -165,46 +166,47 @@ CESU-8 { IANA* }
# Standard iso-8859-1, which does not have the Euro update.
# See iso-8859-15 (latin9) for the Euro update
ISO-8859-1 { MIME* IANA }
LATIN_1 # Old ICU name
ibm-819 { IBM* } # This is not truely ibm-819 because it's missing the fallbacks.
ISO-8859-1 { MIME* IANA JAVA* }
ibm-819 { IBM* JAVA } # This is not truely ibm-819 because it's missing the fallbacks.
IBM819 { IANA }
cp819 { IANA }
latin1 { IANA }
8859-1
csISOLatin1 { IANA }
iso-ir-100 { IANA }
ISO_8859-1:1987 { IANA* }
l1 { IANA }
819
cp819 { IANA JAVA }
latin1 { IANA JAVA }
8859_1 { JAVA }
csISOLatin1 { IANA JAVA }
iso-ir-100 { IANA JAVA }
ISO_8859-1:1987 { IANA* JAVA }
l1 { IANA JAVA }
819 { JAVA }
# 28591 { WINDOWS* } # This has odd behavior because it has the Euro update, which isn't correct.
# LATIN_1 # Old ICU name
# ANSI_X3.110-1983 # This is for a different IANA alias. This isn't iso-8859-1.
US-ASCII { MIME* IANA }
ASCII { JAVA* IANA }
ascii-7 { JAVA }
ANSI_X3.4-1968 { IANA* }
ANSI_X3.4-1986 { IANA }
ISO_646.irv:1991 { IANA }
US-ASCII { MIME* IANA JAVA WINDOWS }
ASCII { JAVA* IANA WINDOWS }
ANSI_X3.4-1968 { IANA* WINDOWS }
ANSI_X3.4-1986 { IANA WINDOWS }
ISO_646.irv:1991 { IANA WINDOWS }
iso_646.irv:1983 { JAVA }
ISO646-US { JAVA IANA }
ISO646-US { JAVA IANA WINDOWS }
us { IANA }
csASCII { IANA }
646 { JAVA }
csASCII { IANA WINDOWS }
iso-ir-6 { IANA }
cp367 { IANA }
# IBM367 { IANA } # Leave this on ibm-367 so it can have an IANA name
# Java says "default" too, but that makes no sense.
cp367 { IANA WINDOWS }
ascii7 { JAVA }
646 { JAVA }
20127 { WINDOWS* }
# IBM367 { IANA WINDOWS } # This isn't the default ibm-367.
# Partially algorithmic converters
ISO_2022 ISO-2022 { MIME* } 2022 cp2022
ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA* MIME* } csISO2022JP
ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA* MIME* JAVA* } csISO2022JP { JAVA }
ISO_2022,locale=ja,version=1 ISO-2022-JP-1 JIS JIS_Encoding { IANA* }
ISO_2022,locale=ja,version=2 ISO-2022-JP-2 { IANA* MIME* } csISO2022JP2
ISO_2022,locale=ja,version=3 JIS7 csJISEncoding
ISO_2022,locale=ja,version=4 JIS8
ISO_2022,locale=ko,version=0 ISO-2022-KR { IANA* MIME* } csISO2022KR
ISO_2022,locale=ko,version=1 ibm-25546 ibm-25546_P100 25546
ISO_2022,locale=ko,version=1 ibm-25546
ISO_2022,locale=zh,version=0 ISO-2022-CN { IANA* MIME* } # csISO2022CN
ISO_2022,locale=zh,version=1 ISO-2022-CN-EXT { IANA* MIME* }
HZ HZ-GB-2312 { IANA* MIME* }
@ -241,48 +243,51 @@ LMBCS-19
ibm-367_P100-1995 { UTR22* } ibm-367 { IBM* } IBM367 { IANA* } # This is ASCII, but it has fallbacks
# Central Europe
ibm-912 { IBM* } iso-8859-2 { MIME* IANA }
latin2 { IANA }
# ISO8859_2 { JAVA* } # This is really the default for Java and many others.
8859-2
csISOLatin2 { IANA }
iso-ir-101 { IANA }
ISO_8859-2:1987 { IANA* }
l2 { IANA }
cp912
912
ibm-912 { IBM* JAVA } iso-8859-2 { MIME* IANA JAVA* WINDOWS }
ISO_8859-2:1987 { IANA* WINDOWS JAVA }
latin2 { IANA WINDOWS JAVA }
csISOLatin2 { IANA WINDOWS JAVA }
iso-ir-101 { IANA WINDOWS JAVA }
l2 { IANA WINDOWS JAVA }
8859_2 { JAVA }
cp912 { JAVA }
912 { JAVA }
28592 { WINDOWS* }
# Maltese Esperanto
ibm-913 { IBM* } iso-8859-3 { MIME* IANA }
latin3 { IANA }
8859-3
csISOLatin3 { IANA }
iso-ir-109
ISO_8859-3:1988 { IANA* }
l3 { IANA }
cp913
913
ibm-913 { IBM* JAVA } iso-8859-3 { MIME* IANA WINDOWS JAVA* }
ISO_8859-3:1988 { IANA* WINDOWS JAVA }
latin3 { IANA JAVA WINDOWS }
csISOLatin3 { IANA WINDOWS }
iso-ir-109 { IANA WINDOWS JAVA }
l3 { IANA WINDOWS JAVA }
8859_3 { JAVA }
cp913 { JAVA }
913 { JAVA }
28593 { WINDOWS* }
# Baltic
ibm-914 { IBM* } iso-8859-4 { MIME* IANA }
latin4 { IANA }
8859-4
csISOLatin4 { IANA }
iso-ir-110 { IANA }
ISO_8859-4:1988 { IANA* }
l4 { IANA }
cp914
914
ibm-914 { IBM* JAVA } iso-8859-4 { MIME* IANA WINDOWS JAVA* }
latin4 { IANA WINDOWS JAVA }
csISOLatin4 { IANA WINDOWS JAVA }
iso-ir-110 { IANA WINDOWS JAVA }
ISO_8859-4:1988 { IANA* WINDOWS JAVA }
l4 { IANA WINDOWS JAVA }
8859_4 { JAVA }
cp914 { JAVA }
914 { JAVA }
28594 { WINDOWS* }
# Cyrillic
ibm-915 { IBM* } iso-8859-5 { MIME* IANA }
cyrillic { IANA }
8859-5
csISOLatinCyrillic { IANA }
iso-ir-144 { IANA }
ISO_8859-5:1988 { IANA* }
cp915
915
ibm-915 { IBM* JAVA } iso-8859-5 { MIME* IANA WINDOWS JAVA* }
cyrillic { IANA WINDOWS JAVA }
csISOLatinCyrillic { IANA WINDOWS JAVA }
iso-ir-144 { IANA WINDOWS JAVA }
ISO_8859-5:1988 { IANA* WINDOWS JAVA }
8859_5 { JAVA }
cp915 { JAVA }
915 { JAVA }
28595 { WINDOWS* }
# Arabic
# ISO_8859-6-E and ISO_8859-6-I are similar to this charset, but BiDi is done differently
@ -290,138 +295,146 @@ ibm-915 { IBM* } iso-8859-5 { MIME* IANA }
# -E means explicit. -I means implicit.
# -E requires the client to handle the ISO 6429 bidirectional controls
ibm-1089_P100-1995 { UTR22* }
ibm-1089 { IBM* }
iso-8859-6 { MIME* IANA }
arabic { IANA }
8859-6
csISOLatinArabic { IANA }
iso-ir-127 { IANA }
ISO_8859-6:1987 { IANA* }
ecma-114 { IANA }
asmo-708 { IANA }
ibm-1089 { IBM* JAVA }
iso-8859-6 { MIME* IANA WINDOWS JAVA* }
arabic { IANA WINDOWS JAVA }
csISOLatinArabic { IANA WINDOWS JAVA }
iso-ir-127 { IANA WINDOWS JAVA }
ISO_8859-6:1987 { IANA* WINDOWS JAVA }
ECMA-114 { IANA JAVA }
ASMO-708 { IANA JAVA }
8859_6 { JAVA }
cp1089 { JAVA }
1089 { JAVA }
28596 { WINDOWS* }
ISO-8859-6-I { IANA MIME } # IANA considers this alias different and BiDi needs to be applied.
ISO-8859-6-E { IANA MIME } # IANA considers this alias different and BiDi needs to be applied.
cp1089
1089
# ISO Greek (w/ euro update)
ibm-4909_P100-1999 { UTR22* } ibm-4909 { IBM* }
# ISO Greek (w/o euro update)
ibm-813 iso-8859-7 { MIME* IANA }
greek { IANA }
greek8 { IANA }
elot_928 { IANA }
ecma-118 { IANA }
8859-7
csISOLatinGreek { IANA }
iso-ir-126 { IANA }
ISO_8859-7:1987 { IANA* }
cp813
813
ibm-813 { IBM* JAVA } iso-8859-7 { MIME* IANA WINDOWS JAVA* }
greek { IANA WINDOWS JAVA }
greek8 { IANA WINDOWS JAVA }
ELOT_928 { IANA WINDOWS JAVA }
ECMA-118 { IANA WINDOWS JAVA }
csISOLatinGreek { IANA WINDOWS JAVA }
iso-ir-126 { IANA WINDOWS JAVA }
ISO_8859-7:1987 { IANA* WINDOWS JAVA }
8859_7 { JAVA }
cp813 { JAVA }
813 { JAVA }
28597 { WINDOWS* }
# hebrew
# ISO_8859-8-E and ISO_8859-8-I are similar to this charset, but BiDi is done differently
# From a narrow mapping point of view, there is no difference.
# -E means explicit. -I means implicit.
# -E requires the client to handle the ISO 6429 bidirectional controls
ibm-916 { IBM* } iso-8859-8 { MIME* IANA }
hebrew { IANA }
8859-8
csISOLatinHebrew { IANA }
iso-ir-138 { IANA }
ISO_8859-8:1988 { IANA* }
ISO-8859-8-I { IANA MIME } # IANA considers this alias different and BiDi needs to be applied.
ISO-8859-8-E { IANA MIME } # IANA considers this alias different and BiDi needs to be applied.
cp916
916
ibm-916 { IBM* JAVA } iso-8859-8 { MIME* IANA WINDOWS JAVA* }
hebrew { IANA WINDOWS JAVA }
csISOLatinHebrew { IANA WINDOWS JAVA }
iso-ir-138 { IANA WINDOWS JAVA }
ISO_8859-8:1988 { IANA* WINDOWS JAVA }
ISO-8859-8-I { IANA MIME } # IANA and Windows considers this alias different and BiDi needs to be applied.
ISO-8859-8-E { IANA MIME } # IANA and Windows considers this alias different and BiDi needs to be applied.
8859_8 { JAVA }
cp916 { JAVA }
916 { JAVA }
28598 { WINDOWS* }
# Turkish
ibm-920 { IBM* } iso-8859-9 { MIME* IANA }
ibm-920 { IBM* JAVA } iso-8859-9 { MIME* IANA WINDOWS JAVA* }
latin5 { IANA WINDOWS JAVA }
csISOLatin5 { IANA JAVA }
iso-ir-148 { IANA WINDOWS JAVA }
ISO_8859-9:1989 { IANA* WINDOWS }
l5 { IANA WINDOWS JAVA }
8859_9 { JAVA }
cp920 { JAVA }
920 { JAVA }
28599 { WINDOWS* }
ECMA-128 # IANA doesn't have this alias 6/24/2002
latin5 { IANA }
8859-9
csISOLatin5 { IANA }
iso-ir-148 { IANA }
ISO_8859-9:1989 { IANA* }
l5 { IANA }
cp920
920
# PC Baltic (w/ euro update)
ibm-901 { IBM* } cp921 { MIME* } 921
# iso-8859-13, PC Baltic (w/o euro update)
ibm-921 { IBM* } iso-8859-13 { IANA* }
ibm-921 { IBM* } iso-8859-13 { IANA* MIME* JAVA* } 8859_13 { JAVA } cp921 921
# Latin 9
ibm-923 { IBM* } iso-8859-15 { IANA* MIME* } # IANA only has iso-8859-15 (6/24/2002)
# ISO8859_15 { JAVA* } # This is really the default for Java and many others.
8859-15
latin9
latin0
csisolatin0
csisolatin9
iso8859_15_fdis
cp923
923
ibm-923 { IBM* JAVA } iso-8859-15 { IANA* MIME* WINDOWS JAVA* }
Latin-9 { IANA WINDOWS }
l9 { WINDOWS }
8859_15 { JAVA }
latin0 { JAVA }
csisolatin0 { JAVA }
csisolatin9 { JAVA }
iso8859_15_fdis { JAVA }
cp923 { JAVA }
923 { JAVA }
28605 { WINDOWS* }
# CJK encodings
ibm-942_P12A-2000 { UTR22* } # The normal mapping
ibm-942_P12A-2000 { UTR22* } # ibm-942_P120 is a rarely used alternate mapping (sjis78 is already old)
ibm-942 { IBM* }
ibm-942_VSUB_VPUA
ibm-932 { IBM }
shift_jis78
sjis78
ibm-932_VSUB_VPUA # Old s_jis
ibm-942_VSUB_VPUA
ibm-932_VSUB_VPUA
# Is this "JIS_C6226-1978"?
ibm-943_P130-2000 { UTR22* }
ibm-943 { IBM* }
ibm-943_VASCII_VSUB_VPUA
ibm-943 { IBM* JAVA }
Shift_JIS # Leave untagged because this isn't the default
cp943 { JAVA* } # This is slightly different, but the backslash mapping is the same.
943 { JAVA }
ibm-943_VASCII_VSUB_VPUA
# japanese. Unicode name is \u30b7\u30d5\u30c8\u7b26\u53f7\u5316\u8868\u73fe
ibm-943_P14A-2000 { UTR22* }
ibm-943 # Leave untagged because this isn't the default
ibm-943_VSUB_VPUA
Shift_JIS { IANA* MIME* }
sjis
cp943
cp932
Shift_JIS { IANA* MIME* WINDOWS JAVA }
MS_Kanji { IANA WINDOWS JAVA }
csShiftJIS { IANA WINDOWS JAVA }
windows-31j { IANA JAVA } # A further extension of Shift_JIS to include NEC special characters (Row 13)
csWindows31J { IANA WINDOWS JAVA } # A further extension of Shift_JIS to include NEC special characters (Row 13)
cp932 { WINDOWS } # This isn't really windows, but it's used by ICU for Windows.
932 { WINDOWS* }
x-sjis { WINDOWS JAVA }
x-ms-cp932 { WINDOWS }
cp943c { JAVA* } # This is slightly different, but the backslash mapping is the same.
pck # Probably SOLARIS
MS_Kanji { IANA }
csShiftJIS { IANA }
windows-31j { IANA } # A further extension of Shift_JIS to include NEC special characters (Row 13)
csWindows31J { IANA } # A further extension of Shift_JIS to include NEC special characters (Row 13)
x-sjis # Probably MIME
943
sjis # This might be for ibm-1351
ibm-943_VSUB_VPUA
# cp943 # This isn't Windows, and no one else uses it.
# 943 # This isn't Windows, and no one else uses it.
# Iana says that Windows-31J is an extension to csshiftjis ibm-932
ibm-949_P110-2000 { UTR22* }
ibm-949 { IBM* }
ibm-949_VASCII_VSUB_VPUA
ibm-949_P11A-2000 { UTR22* }
ibm-949
ibm-949_VSUB_VPUA
KS_C_5601-1987 { IANA* }
iso-ir-149 { IANA }
KS_C_5601-1989 { IANA }
csKSC56011987 { IANA }
KSC_5601 { MIME* IANA }
ks_x_1001:1992
949
korean { IANA }
ksc5601_1992 # KSC-5601-1992
ksc5601_1987 # Needed by Java
ibm-1373_P100-2002 { UTR22* }
ibm-33722_P120-2000 { UTR22* } # Japan EUC with \ <-> Yen mapping
ibm-33722 { IBM* JAVA }
ibm-5050 { IBM } # Yes this is correct
cp33722 { JAVA* }
33722 { JAVA }
ibm-33722_VASCII_VPUA
# ibm-954 seems to be a superset of ibm-33722
ibm-33722_P12A-2000 { UTR22* }
ibm-33722 # Leave untagged because this isn't the default
ibm-5050 # Leave untagged because this isn't the default, and yes this alias is correct
EUC-JP { IANA MIME* JAVA* }
Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* JAVA }
csEUCPkdFmtJapanese { IANA JAVA }
X-EUC-JP { JAVA } # Japan EUC. x-euc-jp is a MIME name
eucjis { JAVA }
ibm-33722_VPUA
IBM-eucJP
ibm-1373_P100-2002 { UTR22* } # Taiwan Big-5 (w/ euro update)
ibm-1373 { IBM* }
Big5 { IANA* MIME* JAVA* }
csBig5 { IANA }
cp950 # { WINDOWS* } # This isn't really windows, but it's used by ICU for Windows.
950 { WINDOWS* }
cp950 { WINDOWS } # This isn't really windows, but it's used by ICU for Windows.
x-big5
ibm-1373_VSUB_VPUA
950 # Taiwan Big-5 (w/ euro update)
ibm-950_P110-1999 { UTR22* }
ibm-950 { IBM* } # Taiwan Big-5 (w/o euro update)
ibm-950_P110-1999 { UTR22* } # Taiwan Big-5 (w/o euro update)
ibm-950 { IBM* JAVA }
cp950 { JAVA* }
950 { JAVA }
ibm-1386_P100-2002 { UTR22* }
ibm-1386 { IBM* JAVA }
@ -431,49 +444,56 @@ ibm-1386_P100-2002 { UTR22* }
MS936 { IANA }
cp1386 { JAVA* }
ibm-1386_VSUB_VPUA
zh_cn # Chinese GBK removed
zh_cn
ibm-33722_P120-2000 { UTR22* } # Japan EUC with \ <-> Yen mapping
ibm-33722 { IBM* }
ibm-33722_VASCII_VPUA
ibm-5050 { IBM } # Yes this is correct
cp33722
33722
ibm-33722_P12A-2000 { UTR22* }
ibm-33722 # Leave untagged because this isn't the default
ibm-33722_VPUA
ibm-5050 # Leave untagged because this isn't the default, and yes this is correct
EUC-JP { IANA MIME* }
ibm-eucJP
eucjis
Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* }
csEUCPkdFmtJapanese { IANA }
X-EUC-JP # Japan EUC. x-euc-jp is a MIME name
ibm-970_P110-2000 { UTR22* }
ibm-970 { IBM* }
ibm-970_VPUA
EUC-KR { IANA* MIME* }
ibm-eucKR
csEUCKR { IANA } # Korean EUC. x-euc-kr is a MIME name
ibm-964_P110-2000 { UTR22* }
ibm-964_P110-2000 { UTR22* } # Taiwan EUC. x-euc-tw is a MIME name
ibm-964 { IBM* }
ibm-964_VPUA
EUC-TW
ibm-eucTW
cns11643 # Taiwan EUC. x-euc-tw is a MIME name
cns11643
ibm-964_VPUA
ibm-1363_P110-2000 { UTR22* } # Korean KSC Korean Windows MBCS
ibm-949_P110-2000 { UTR22* }
ibm-949 { IBM* JAVA }
cp949 { JAVA* }
949 { JAVA }
ibm-949_VASCII_VSUB_VPUA
ibm-949_P11A-2000 { UTR22* }
ibm-949 # Leave untagged because this isn't the default
cp949c { JAVA* }
ibm-949_VSUB_VPUA
ibm-970_P110-2000 { UTR22* } # Java and Solaris use euc-kr to also mean ksc5601.
ibm-970 { IBM* }
EUC-KR { IANA* MIME* WINDOWS JAVA* }
51949 { WINDOWS* }
ibm-eucKR { JAVA }
csEUCKR { IANA WINDOWS } # Korean EUC. x-euc-kr is a MIME name
ibm-970_VPUA
ibm-1363_P110-2000 { UTR22* } # Korean KSC MBCS with \ <-> Won mapping
ibm-1363 { IBM* }
ibm-1363_VASCII_VSUB_VPUA
ibm-1363_P11B-2000 { UTR22* }
# Java and Solaris use euc-kr to also mean ksc5601, and _sometimes_ for Windows too.
# Johab seems to be ksc5601-1992 and windows-1361, which is not this.
ibm-1363_P11B-2000 { UTR22* }
ibm-1363 # Leave untagged because this isn't the default
ibm-1363_VSUB_VPUA
windows-949
cp949
KS_C_5601-1987 { IANA* WINDOWS JAVA }
KS_C_5601-1989 { IANA WINDOWS }
KSC_5601 { MIME* IANA WINDOWS }
csKSC56011987 { IANA WINDOWS }
korean { IANA WINDOWS }
iso-ir-149 { IANA WINDOWS }
949 { WINDOWS* }
5601 { JAVA }
windows-949 { JAVA* }
cp949 { WINDOWS } # This isn't really windows, but it's used by ICU for Windows.
cp1363
ksc
# korean # The korean alias from IANA goes to ibm-949_P11A-2000
ibm-1363_VSUB_VPUA
# ks_x_1001:1992 # This seems to mean johab
# ksc5601-1992 # This seems to mean johab
ibm-1383_P110-2000 { UTR22* }
ibm-1383 { IBM* }
@ -493,15 +513,16 @@ ibm-1383_P110-2000 { UTR22* }
ibm-1162_P100-1999 { UTR22* } # Thai (w/ euro update)
ibm-1162 { IBM* }
TIS-620 { IANA* }
cp874
windows-874
ms874
cp9066
874
# iso-8859-11 is similar to TIS-620, but it's not the same. ibm-13162 is a closer match.
TIS-620 { IANA* WINDOWS }
874 { WINDOWS* }
cp874 { WINDOWS } # This isn't really windows, but it's used by ICU for Windows.
windows-874 { JAVA* WINDOWS }
# iso-8859-11 { WINDOWS } # iso-8859-11 is similar to TIS-620, but it's not the same. ibm-13162 is a closer match.
ibm-874 { IBM* } ibm-9066 { IBM } # Thai PC (w/o euro update). Yes ibm-874 == ibm-9066. ibm-1161 has the euro update.
874 { JAVA }
cp874 { JAVA* } # This isn't really windows, but it's used by ICU for Windows.
cp9066
# Platform codepages
ibm-437_P100-1995 { UTR22* } ibm-437 { IBM* } IBM437 { IANA* } cp437 { IANA } 437 { IANA } csPC8CodePage437 { IANA } # PC US
@ -523,10 +544,12 @@ ibm-866 { IBM* } IBM866 { IANA* } cp866 { IANA MIME* } 866 { IANA } csIBM
ibm-868 { IBM* } IBM868 { IANA* } CP868 { IANA MIME* } cp-ar { IANA } csIBM868 { IANA } 868 # PC Urdu
ibm-869 { IBM* } IBM869 { IANA* } cp869 { IANA MIME* } 869 { IANA } cp-gr { IANA } csIBM869 { IANA } # PC Greek (w/o euro update)
ibm-878 { IBM* } KOI8-R { IANA* MIME* } cp878 koi8 csKOI8R { IANA } # Russian internet
ibm-902 { IBM* } # PC Estonian (w/ euro update)
ibm-901 { IBM* } # PC Baltic (w/ euro update), update of ibm-921
ibm-902 { IBM* } # PC Estonian (w/ euro update), update of ibm-922
ibm-922 { IBM* JAVA } cp922 { MIME* JAVA* } 922 { JAVA } # PC Estonian (w/o euro update)
ibm-4909_P100-1999 { UTR22* } ibm-4909 { IBM* } # ISO Greek (w/ euro update), update of ibm-813
# Consider removing these mappings
# These were removed due to age, and they are rarely used.
#ibm-942_P120-2000 { UTR22* }
# #ibm-942 { IBM* }
# ibm-942_VASCII_VSUB_VPUA
@ -537,7 +560,7 @@ ibm-922 { IBM* JAVA } cp922 { MIME* JAVA* } 922 { JAVA } # PC Estonian (w/o eu
#ibm-9238 { IBM* } cp1046 # PC Arabic Extended (w/ euro update)
#ibm-17248_X110-1999 { UTR22* } ibm-17248 { IBM* } # PC Arabic (w/ euro update) Updated version of ibm-864
# ibm-941 jis-208 jisx-208 # Pure DBCS jisx-208 # ibm-941 is not JISX 208 code page
#ibm-941 jis-208 jisx-208 # Pure DBCS jisx-208 # ibm-941 is not JISX 208 code page
ibm-5346_P100-1998 { UTR22* } ibm-5346 { IBM* } windows-1250 { IANA* JAVA } cp1250 { JAVA* } # Windows Latin2 (w/ euro update)
ibm-5347_P100-1998 { UTR22* } ibm-5347 { IBM* } windows-1251 { IANA* JAVA } cp1251 { JAVA* } # Windows Cyrillic (w/ euro update)
@ -581,8 +604,8 @@ ibm-1381_P110-2000 { UTR22* } ibm-1381 { IBM* JAVA } cp1381 { JAVA* } 1381 { J
# EBCDIC codepages according to the CDRA
# without Euro
ibm-37_P100-1995 { UTR22* }
ibm-37 { IBM* } # EBCDIC US
ibm-37_P100-1995 { UTR22* } # EBCDIC US
ibm-37 { IBM* }
IBM037 { IANA* }
ibm-037 { JAVA }
cpibm37

View file

@ -96,7 +96,7 @@ char convertersToCheck[][15] = {
/* "cp1131",
"cp1125",*/
"ibm-902", /* was "cp922" changed 2003jan08 */
"cp921",
"ibm-901", /* was "cp921" changed 2003jan09 */
/*"ibm-17248",*/ /* was "cp864" changed 2002nov25 */
/*"cp1008",
"cp1046",*/