ICU-9673 Update EUC-JP mapping to use updated table

X-SVN-Rev: 33039
This commit is contained in:
Michael Ow 2013-01-11 19:47:40 +00:00
parent 16192c32d3
commit 5794bfbf66
6 changed files with 13729 additions and 37 deletions

View file

@ -1,6 +1,6 @@
# ******************************************************************************
# *
# * Copyright (C) 1995-2012, International Business Machines
# * Copyright (C) 1995-2013, International Business Machines
# * Corporation and others. All Rights Reserved.
# *
# ******************************************************************************
@ -622,10 +622,6 @@ ibm-943_P130-1999 { UTR22* }
ibm-33722_P12A_P12A-2009_U2 { UTR22* }
ibm-33722 # Leave untagged because this isn't the default
ibm-5050 # Leave untagged because this isn't the default, and yes this alias is correct
EUC-JP { IANA MIME* WINDOWS }
Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* WINDOWS* }
csEUCPkdFmtJapanese { IANA WINDOWS }
X-EUC-JP { WINDOWS } # Japan EUC. x-euc-jp is a MIME name
ibm-33722_VPUA
IBM-eucJP
windows-51932-2006 { UTR22* }
@ -646,16 +642,17 @@ ibm-33722_P120-1999 { UTR22* } # Japan EUC with \ <-> Yen mapping
# ibm-954 contains more PUA characters than the others.
ibm-954_P101-2007 { UTR22* }
ibm-954 { IBM* }
EUC-JP { JAVA* } # Matches more closely with ibm-1350
Extended_UNIX_Code_Packed_Format_for_Japanese { JAVA }
csEUCPkdFmtJapanese { JAVA }
X-EUC-JP { JAVA } # Japan EUC. x-euc-jp is a MIME name
eucjis { JAVA }
ujis # Linux sometimes uses this name. This is an unfortunate generic and rarely used name. Its use is discouraged.
x-IBM954 { JAVA }
x-IBM954 { JAVA* }
x-IBM954C { JAVA }
# eucJP # This is closest to Solaris EUC-JP.
euc-jp-2007 { UTR22* }
EUC-JP { MIME* IANA JAVA* WINDOWS* }
Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* JAVA WINDOWS }
csEUCPkdFmtJapanese { IANA JAVA WINDOWS }
X-EUC-JP { MIME JAVA WINDOWS } # Japan EUC. x-euc-jp is a MIME name
eucjis {JAVA}
ujis # Linux sometimes uses this name. This is an unfortunate generic and rarely used name. Its use is discouraged.
aix-IBM_udcJP-4.3.6 { UTR22* }
x-IBM-udcJP { JAVA* }

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
# Copyright (c) 1999-2012, International Business Machines Corporation and
# Copyright (c) 1999-2013, International Business Machines Corporation and
# others. All Rights Reserved.
# A list of UCM's to build
# Note:
@ -116,5 +116,6 @@ ibm-5478_P100-1995.ucm\
icu-internal-25546.ucm lmb-excp.ucm \
icu-internal-compound-d1.ucm icu-internal-compound-d2.ucm icu-internal-compound-d3.ucm icu-internal-compound-d4.ucm\
icu-internal-compound-d5.ucm icu-internal-compound-d6.ucm icu-internal-compound-d7.ucm \
icu-internal-compound-s1.ucm icu-internal-compound-s2.ucm icu-internal-compound-s3.ucm icu-internal-compound-t.ucm
icu-internal-compound-s1.ucm icu-internal-compound-s2.ucm icu-internal-compound-s3.ucm icu-internal-compound-t.ucm \
euc-jp-2007.ucm

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2012, International Business Machines Corporation and
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*
@ -426,7 +426,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
log_err("u-> ibm-943 with skip did not match.\n");
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
log_err("u-> euc-jp with skip did not match.\n");
@ -857,7 +857,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
log_err("euc-jp->u with skip did not match.\n");
@ -1195,7 +1195,7 @@ static void TestStop(int32_t inputsize, int32_t outputsize)
log_err("u-> ibm-943 with stop did not match.\n");
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
log_err("u-> euc-jp with stop did not match.\n");
@ -1307,7 +1307,7 @@ static void TestStop(int32_t inputsize, int32_t outputsize)
log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
log_err("euc-jp->u with stop did not match.\n");
@ -1420,7 +1420,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
log_err("u-> ibm-943 with substitute did not match.\n");
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
log_err("u-> euc-jp with substitute did not match.\n");
@ -1590,7 +1590,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
log_err("euc-jp->u with substitute did not match.\n");
@ -1602,7 +1602,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
log_err("euc-jp->u with substitute did not match.\n");
}
@ -2051,7 +2051,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
log_err("u-> ibm-943 with subst with value did not match.\n");
if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
to_euc_jp, sizeof(to_euc_jp), "euc-jp",
to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
log_err("u-> euc-jp with subst with value did not match.\n");
@ -2435,7 +2435,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
log_err("ibm-943->u with substitute with value did not match.\n");
if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp",
EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP",
UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
log_err("euc-jp->u with substitute with value did not match.\n");
@ -2724,14 +2724,11 @@ UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t
UChar errChars[50]; /* should be sufficient */
int8_t errLen = 50;
UErrorCode err = U_ZERO_ERROR;
const UChar* limit= NULL;
const UChar* start= NULL;
ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
if(U_FAILURE(err)){
log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
}
/* src points to limit of invalid chars */
limit = src;
/* length of in invalid chars should be equal to returned length*/
start = src - errLen;
if(u_strncmp(errChars,start,errLen)!=0){
@ -2922,14 +2919,11 @@ UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *e
char errChars[50]; /* should be sufficient */
int8_t errLen = 50;
UErrorCode err = U_ZERO_ERROR;
const char* limit= NULL;
const char* start= NULL;
ucnv_getInvalidChars(conv,errChars, &errLen, &err);
if(U_FAILURE(err)){
log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
}
/* src points to limit of invalid chars */
limit = src;
/* length of in invalid chars should be equal to returned length*/
start = src - errLen;
if(uprv_strncmp(errChars,start,errLen)!=0){

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2012, International Business Machines Corporation and
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*****************************************************************************
@ -434,10 +434,10 @@ static void TestErrorBehaviour(){
log_err("u-> ibm-1363 [UCNV_MBCS] \n");
if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR))
log_err("u-> euc-jp [UCNV_MBCS] \n");
if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR))
log_err("u-> euc-jp [UCNV_MBCS] \n");
}

View file

@ -1,6 +1,6 @@
//*******************************************************************************
//
// Copyright (C) 2003-2012, International Business Machines
// Copyright (C) 2003-2013, International Business Machines
// Corporation and others. All Rights Reserved.
//
// file name: conversion.txt
@ -1766,8 +1766,8 @@ conversion:table(nofallback) {
{ "UTF-8", "a\ud800b", :bin{ 61efbfbd62 }, :intvector{ 0, 1, 1, 1, 2 }, :int{1}, :int{0}, "", "", "" }
// Code coverage for the EUC variants.
{ "EUC-JP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4ae618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 6, 7, 7 }, :int{1}, :int{0}, "", "0", "" }
{ "EUC-JP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4aef4fef4fe618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7 }, :int{1}, :int{0}, "", "", "" }
{ "IBM-eucJP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4ae618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 6, 7, 7 }, :int{1}, :int{0}, "", "0", "" }
{ "IBM-eucJP", "\u0061\u4edd\u5bec\ud801\udc01\ud801\u0061\u00a2", :bin{ 61a1b88ff4aef4fef4fe618ee0 }, :intvector{ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7 }, :int{1}, :int{0}, "", "", "" }
{ "EUC-TW", "\u0061\u2295\u5BF2\ud801\udc01\ud801\u0061\u8706\u008a", :bin{ 61a2d38ea2dce561e6ca8a }, :intvector{ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8 }, :int{1}, :int{0}, "", "0", "" }
{ "EUC-TW", "\u0061\u2295\u5BF2\ud801\udc01\ud801\u0061\u8706\u008a", :bin{ 61a2d38ea2dce5fdfefdfe61e6ca8a }, :intvector{ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8 }, :int{1}, :int{0}, "", "", "" }