mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-5410 Improve testing of CharsetRecognizer::getLanguage
X-SVN-Rev: 20492
This commit is contained in:
parent
f304022ded
commit
348c2eb1ff
2 changed files with 13 additions and 11 deletions
|
@ -199,7 +199,7 @@
|
|||
</test-case>
|
||||
|
||||
<!-- No EUC-JP in this test because it detects as GB18030 -->
|
||||
<test-case id="IUC10-jp" encodings="UTF-8 UTF-32BE UTF-32LE Shift_JIS ISO-2022-JP">
|
||||
<test-case id="IUC10-jp" encodings="UTF-8 UTF-32BE UTF-32LE Shift_JIS/ja ISO-2022-JP">
|
||||
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
||||
|
||||
ヨーロッパ、ソフトウェア、そしてインターネット:
|
||||
|
@ -214,7 +214,7 @@
|
|||
|
||||
</test-case>
|
||||
|
||||
<test-case id="IUC10-ko" encodings="UTF-8 UTF-32BE UTF-32LE EUC-KR ISO-2022-KR">
|
||||
<test-case id="IUC10-ko" encodings="UTF-8 UTF-32BE UTF-32LE EUC-KR/ko ISO-2022-KR">
|
||||
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
||||
|
||||
유럽, 소프트웨어 그리고 인터넷:
|
||||
|
@ -246,8 +246,8 @@
|
|||
|
||||
</test-case>
|
||||
|
||||
<!-- No language for ISO-8859-1 in this test because no-BO is recogonized as Danish... -->
|
||||
<test-case id="IUC10-no-BO" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1">
|
||||
<!-- No language for ISO-8859-1 in this test because no-NO is recogonized as Danish... -->
|
||||
<test-case id="IUC10-no-NO" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/da">
|
||||
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
||||
|
||||
Europa, Programvare og Internet:
|
||||
|
@ -262,7 +262,7 @@
|
|||
|
||||
</test-case>
|
||||
|
||||
<test-case id="IUC10-no-NY" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/no">
|
||||
<test-case id="IUC10-no-NO-NY" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/no">
|
||||
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
||||
|
||||
Europa, programvare og Internett:
|
||||
|
@ -395,7 +395,7 @@
|
|||
<!-- /test-case -->
|
||||
|
||||
<!-- No ISO-2022-CN in this test because Java doesn't support it in both directions :-( -->
|
||||
<test-case id="IUC10-zh-Hans" encodings="UTF-8 UTF-32BE UTF-32LE ISO-2022-CN GB18030">
|
||||
<test-case id="IUC10-zh-Hans" encodings="UTF-8 UTF-32BE UTF-32LE ISO-2022-CN GB18030/zh">
|
||||
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
||||
|
||||
欧洲,软件+互联网
|
||||
|
|
|
@ -27,9 +27,6 @@ import org.w3c.dom.*;
|
|||
|
||||
/**
|
||||
* @author andy
|
||||
*
|
||||
* TODO To change the template for this generated type comment go to
|
||||
* Window - Preferences - Java - Code Style - Code Templates
|
||||
*/
|
||||
public class TestCharsetDetector extends TestFmwk
|
||||
{
|
||||
|
@ -101,7 +98,11 @@ public class TestCharsetDetector extends TestFmwk
|
|||
return;
|
||||
}
|
||||
|
||||
if (! (language == null || m.getLanguage().equals(language))) {
|
||||
String charsetMatchLanguage = m.getLanguage();
|
||||
if ((language != null && !charsetMatchLanguage.equals(language))
|
||||
|| (language == null && charsetMatchLanguage != null)
|
||||
|| (language != null && charsetMatchLanguage == null))
|
||||
{
|
||||
errln(id + ", " + encoding + ": language detection failure - expected " + language + ", got " + m.getLanguage());
|
||||
}
|
||||
|
||||
|
@ -179,7 +180,8 @@ public class TestCharsetDetector extends TestFmwk
|
|||
det.setText(new ByteArrayInputStream(bytes));
|
||||
checkMatch(det, testString, enc, lang, id);
|
||||
} catch (Exception e) {
|
||||
errln(id + ": " + e.toString());
|
||||
errln(id + ": " + e.toString() + "enc=" + enc);
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue