ICU-5410 Improve testing of CharsetRecognizer::getLanguage

X-SVN-Rev: 20492
This commit is contained in:
George Rhoten 2006-10-05 18:51:11 +00:00
parent f304022ded
commit 348c2eb1ff
2 changed files with 13 additions and 11 deletions

View file

@ -199,7 +199,7 @@
</test-case>
<!-- No EUC-JP in this test because it detects as GB18030 -->
<test-case id="IUC10-jp" encodings="UTF-8 UTF-32BE UTF-32LE Shift_JIS ISO-2022-JP">
<test-case id="IUC10-jp" encodings="UTF-8 UTF-32BE UTF-32LE Shift_JIS/ja ISO-2022-JP">
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
ヨーロッパ、ソフトウェア、そしてインターネット:
@ -214,7 +214,7 @@
</test-case>
<test-case id="IUC10-ko" encodings="UTF-8 UTF-32BE UTF-32LE EUC-KR ISO-2022-KR">
<test-case id="IUC10-ko" encodings="UTF-8 UTF-32BE UTF-32LE EUC-KR/ko ISO-2022-KR">
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
유럽, 소프트웨어 그리고 인터넷:
@ -246,8 +246,8 @@
</test-case>
<!-- No language for ISO-8859-1 in this test because no-BO is recogonized as Danish... -->
<test-case id="IUC10-no-BO" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1">
<!-- No language for ISO-8859-1 in this test because no-NO is recogonized as Danish... -->
<test-case id="IUC10-no-NO" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/da">
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
Europa, Programvare og Internet:
@ -262,7 +262,7 @@
</test-case>
<test-case id="IUC10-no-NY" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/no">
<test-case id="IUC10-no-NO-NY" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/no">
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
Europa, programvare og Internett:
@ -395,7 +395,7 @@
<!-- /test-case -->
<!-- No ISO-2022-CN in this test because Java doesn't support it in both directions :-( -->
<test-case id="IUC10-zh-Hans" encodings="UTF-8 UTF-32BE UTF-32LE ISO-2022-CN GB18030">
<test-case id="IUC10-zh-Hans" encodings="UTF-8 UTF-32BE UTF-32LE ISO-2022-CN GB18030/zh">
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
欧洲,软件+互联网

View file

@ -27,9 +27,6 @@ import org.w3c.dom.*;
/**
* @author andy
*
* TODO To change the template for this generated type comment go to
* Window - Preferences - Java - Code Style - Code Templates
*/
public class TestCharsetDetector extends TestFmwk
{
@ -101,7 +98,11 @@ public class TestCharsetDetector extends TestFmwk
return;
}
if (! (language == null || m.getLanguage().equals(language))) {
String charsetMatchLanguage = m.getLanguage();
if ((language != null && !charsetMatchLanguage.equals(language))
|| (language == null && charsetMatchLanguage != null)
|| (language != null && charsetMatchLanguage == null))
{
errln(id + ", " + encoding + ": language detection failure - expected " + language + ", got " + m.getLanguage());
}
@ -179,7 +180,8 @@ public class TestCharsetDetector extends TestFmwk
det.setText(new ByteArrayInputStream(bytes));
checkMatch(det, testString, enc, lang, id);
} catch (Exception e) {
errln(id + ": " + e.toString());
errln(id + ": " + e.toString() + "enc=" + enc);
e.printStackTrace();
}
}