mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-9673 Update EUC-JP mapping to use updated table
X-SVN-Rev: 33039
This commit is contained in:
parent
16192c32d3
commit
5794bfbf66
6 changed files with 13729 additions and 37 deletions
icu4c/source
data/mappings
test
|
@ -1,6 +1,6 @@
|
|||
# ******************************************************************************
|
||||
# *
|
||||
# * Copyright (C) 1995-2012, International Business Machines
|
||||
# * Copyright (C) 1995-2013, International Business Machines
|
||||
# * Corporation and others. All Rights Reserved.
|
||||
# *
|
||||
# ******************************************************************************
|
||||
|
@ -622,10 +622,6 @@ ibm-943_P130-1999 { UTR22* }
|
|||
ibm-33722_P12A_P12A-2009_U2 { UTR22* }
|
||||
ibm-33722 # Leave untagged because this isn't the default
|
||||
ibm-5050 # Leave untagged because this isn't the default, and yes this alias is correct
|
||||
EUC-JP { IANA MIME* WINDOWS }
|
||||
Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* WINDOWS* }
|
||||
csEUCPkdFmtJapanese { IANA WINDOWS }
|
||||
X-EUC-JP { WINDOWS } # Japan EUC. x-euc-jp is a MIME name
|
||||
ibm-33722_VPUA
|
||||
IBM-eucJP
|
||||
windows-51932-2006 { UTR22* }
|
||||
|
@ -646,16 +642,17 @@ ibm-33722_P120-1999 { UTR22* } # Japan EUC with \ <-> Yen mapping
|
|||
# ibm-954 contains more PUA characters than the others.
|
||||
ibm-954_P101-2007 { UTR22* }
|
||||
ibm-954 { IBM* }
|
||||
EUC-JP { JAVA* } # Matches more closely with ibm-1350
|
||||
Extended_UNIX_Code_Packed_Format_for_Japanese { JAVA }
|
||||
csEUCPkdFmtJapanese { JAVA }
|
||||
X-EUC-JP { JAVA } # Japan EUC. x-euc-jp is a MIME name
|
||||
eucjis { JAVA }
|
||||
ujis # Linux sometimes uses this name. This is an unfortunate generic and rarely used name. Its use is discouraged.
|
||||
x-IBM954 { JAVA }
|
||||
x-IBM954 { JAVA* }
|
||||
x-IBM954C { JAVA }
|
||||
# eucJP # This is closest to Solaris EUC-JP.
|
||||
|
||||
euc-jp-2007 { UTR22* }
|
||||
EUC-JP { MIME* IANA JAVA* WINDOWS* }
|
||||
Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* JAVA WINDOWS }
|
||||
csEUCPkdFmtJapanese { IANA JAVA WINDOWS }
|
||||
X-EUC-JP { MIME JAVA WINDOWS } # Japan EUC. x-euc-jp is a MIME name
|
||||
eucjis {JAVA}
|
||||
ujis # Linux sometimes uses this name. This is an unfortunate generic and rarely used name. Its use is discouraged.
|
||||
|
||||
aix-IBM_udcJP-4.3.6 { UTR22* }
|
||||
x-IBM-udcJP { JAVA* }
|
||||
|
||||
|
|
13700
icu4c/source/data/mappings/euc-jp-2007.ucm
Normal file
13700
icu4c/source/data/mappings/euc-jp-2007.ucm
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) 1999-2012, International Business Machines Corporation and
|
||||
# Copyright (c) 1999-2013, International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
# A list of UCM's to build
|
||||
# Note:
|
||||
|
@ -116,5 +116,6 @@ ibm-5478_P100-1995.ucm\
|
|||
icu-internal-25546.ucm lmb-excp.ucm \
|
||||
icu-internal-compound-d1.ucm icu-internal-compound-d2.ucm icu-internal-compound-d3.ucm icu-internal-compound-d4.ucm\
|
||||
icu-internal-compound-d5.ucm icu-internal-compound-d6.ucm icu-internal-compound-d7.ucm \
|
||||
icu-internal-compound-s1.ucm icu-internal-compound-s2.ucm icu-internal-compound-s3.ucm icu-internal-compound-t.ucm
|
||||
icu-internal-compound-s1.ucm icu-internal-compound-s2.ucm icu-internal-compound-s3.ucm icu-internal-compound-t.ucm \
|
||||
euc-jp-2007.ucm
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2012, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/*
|
||||
|
@ -426,7 +426,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
|
|||
log_err("u-> ibm-943 with skip did not match.\n");
|
||||
|
||||
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
|
||||
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
|
||||
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
|
||||
UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
|
||||
log_err("u-> euc-jp with skip did not match.\n");
|
||||
|
||||
|
@ -857,7 +857,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
|
|||
log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
|
||||
|
||||
if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
|
||||
UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
|
||||
log_err("euc-jp->u with skip did not match.\n");
|
||||
|
||||
|
@ -1195,7 +1195,7 @@ static void TestStop(int32_t inputsize, int32_t outputsize)
|
|||
log_err("u-> ibm-943 with stop did not match.\n");
|
||||
|
||||
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
|
||||
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
|
||||
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
|
||||
UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
|
||||
log_err("u-> euc-jp with stop did not match.\n");
|
||||
|
||||
|
@ -1307,7 +1307,7 @@ static void TestStop(int32_t inputsize, int32_t outputsize)
|
|||
log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
|
||||
|
||||
if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
|
||||
UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
|
||||
log_err("euc-jp->u with stop did not match.\n");
|
||||
|
||||
|
@ -1420,7 +1420,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
|
|||
log_err("u-> ibm-943 with substitute did not match.\n");
|
||||
|
||||
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
|
||||
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
|
||||
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
|
||||
UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
|
||||
log_err("u-> euc-jp with substitute did not match.\n");
|
||||
|
||||
|
@ -1590,7 +1590,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
|
|||
|
||||
|
||||
if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
|
||||
UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
|
||||
log_err("euc-jp->u with substitute did not match.\n");
|
||||
|
||||
|
@ -1602,7 +1602,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
|
|||
|
||||
|
||||
if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
|
||||
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
|
||||
UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
|
||||
log_err("euc-jp->u with substitute did not match.\n");
|
||||
}
|
||||
|
@ -2051,7 +2051,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
|
|||
log_err("u-> ibm-943 with subst with value did not match.\n");
|
||||
|
||||
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
|
||||
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
|
||||
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
|
||||
UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
|
||||
log_err("u-> euc-jp with subst with value did not match.\n");
|
||||
|
||||
|
@ -2435,7 +2435,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
|
|||
log_err("ibm-943->u with substitute with value did not match.\n");
|
||||
|
||||
if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
|
||||
EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp",
|
||||
EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP",
|
||||
UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
|
||||
log_err("euc-jp->u with substitute with value did not match.\n");
|
||||
|
||||
|
@ -2724,14 +2724,11 @@ UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t
|
|||
UChar errChars[50]; /* should be sufficient */
|
||||
int8_t errLen = 50;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
const UChar* limit= NULL;
|
||||
const UChar* start= NULL;
|
||||
ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
|
||||
if(U_FAILURE(err)){
|
||||
log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
|
||||
}
|
||||
/* src points to limit of invalid chars */
|
||||
limit = src;
|
||||
/* length of in invalid chars should be equal to returned length*/
|
||||
start = src - errLen;
|
||||
if(u_strncmp(errChars,start,errLen)!=0){
|
||||
|
@ -2922,14 +2919,11 @@ UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *e
|
|||
char errChars[50]; /* should be sufficient */
|
||||
int8_t errLen = 50;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
const char* limit= NULL;
|
||||
const char* start= NULL;
|
||||
ucnv_getInvalidChars(conv,errChars, &errLen, &err);
|
||||
if(U_FAILURE(err)){
|
||||
log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
|
||||
}
|
||||
/* src points to limit of invalid chars */
|
||||
limit = src;
|
||||
/* length of in invalid chars should be equal to returned length*/
|
||||
start = src - errLen;
|
||||
if(uprv_strncmp(errChars,start,errLen)!=0){
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2012, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/*****************************************************************************
|
||||
|
@ -434,10 +434,10 @@ static void TestErrorBehaviour(){
|
|||
log_err("u-> ibm-1363 [UCNV_MBCS] \n");
|
||||
|
||||
if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
|
||||
expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
|
||||
expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR))
|
||||
log_err("u-> euc-jp [UCNV_MBCS] \n");
|
||||
if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
|
||||
expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
|
||||
expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR))
|
||||
log_err("u-> euc-jp [UCNV_MBCS] \n");
|
||||
}
|
||||
|
||||
|
|
6
icu4c/source/test/testdata/conversion.txt
vendored
6
icu4c/source/test/testdata/conversion.txt
vendored
|
@ -1,6 +1,6 @@
|
|||
//*******************************************************************************
|
||||
//
|
||||
// Copyright (C) 2003-2012, International Business Machines
|
||||
// Copyright (C) 2003-2013, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// file name: conversion.txt
|
||||
|
@ -1766,8 +1766,8 @@ conversion:table(nofallback) {
|
|||
{ "UTF-8", "a\ud800b", :bin{ 61efbfbd62 }, :intvector{ 0, 1, 1, 1, 2 }, :int{1}, :int{0}, "", "", "" }
|
||||
|
||||
// Code coverage for the EUC variants.
|
||||
{ "EUC-JP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4ae618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 6, 7, 7 }, :int{1}, :int{0}, "", "0", "" }
|
||||
{ "EUC-JP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4aef4fef4fe618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7 }, :int{1}, :int{0}, "", "", "" }
|
||||
{ "IBM-eucJP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4ae618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 6, 7, 7 }, :int{1}, :int{0}, "", "0", "" }
|
||||
{ "IBM-eucJP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4aef4fef4fe618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7 }, :int{1}, :int{0}, "", "", "" }
|
||||
{ "EUC-TW", "\u0061\u2295\u5BF2\ud801\udc01\ud801\u0061\u8706\u008a", :bin{ 61a2d38ea2dce561e6ca8a }, :intvector{ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8 }, :int{1}, :int{0}, "", "0", "" }
|
||||
{ "EUC-TW", "\u0061\u2295\u5BF2\ud801\udc01\ud801\u0061\u8706\u008a", :bin{ 61a2d38ea2dce5fdfefdfe61e6ca8a }, :intvector{ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8 }, :int{1}, :int{0}, "", "", "" }
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue