mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-17 02:37:25 +00:00
ICU-6889 Add test for CharsetDetector.detectAll() producing the same encoding multiple times.
X-SVN-Rev: 31905
This commit is contained in:
parent
cfb458d917
commit
709b0884cc
1 changed files with 25 additions and 1 deletions
|
@ -11,6 +11,7 @@ import java.io.ByteArrayOutputStream;
|
|||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.HashSet;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
@ -1104,7 +1105,30 @@ public class TestCharsetDetector extends TestFmwk
|
|||
|
||||
name1 = match1.getName();
|
||||
assertEquals("Wrong charset name after running a second charset detector", "windows-1252", name1);
|
||||
|
||||
}
|
||||
|
||||
public void TestBug6889() {
|
||||
// Verify that CharsetDetector.detectAll() does not return the same encoding multiple times.
|
||||
String text =
|
||||
"This is a small sample of some English text. Just enough to be sure that it detects correctly.";
|
||||
byte[] textBytes;
|
||||
try {
|
||||
textBytes = text.getBytes("ISO-8859-1");
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("Unexpected exception " + e.toString());
|
||||
return;
|
||||
}
|
||||
|
||||
CharsetDetector det = new CharsetDetector();
|
||||
det.setText(textBytes);
|
||||
CharsetMatch matches[] = det.detectAll();
|
||||
|
||||
HashSet<String> detectedEncodings = new HashSet<String>();
|
||||
for (CharsetMatch m: matches) {
|
||||
assertTrue("Charset " + m.getName() + " encountered before",
|
||||
detectedEncodings.add(m.getName()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue