ICU-4060 Add Danish, Dutch 8859-1.

X-SVN-Rev: 17591
This commit is contained in:
Eric Mader 2005-05-13 00:09:32 +00:00
parent e9f300b3b8
commit eda58c8714
2 changed files with 47 additions and 5 deletions

View file

@ -354,11 +354,13 @@ public class CharsetDetector {
recognizers.add(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_jp());
recognizers.add(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_kr());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_da());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_de());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_en());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_es());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_fr());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_it());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_nl());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_1_pt());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_2_cs());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_2_pl());

View file

@ -211,14 +211,34 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
}
}
static class CharsetRecog_8859_1_da extends CharsetRecog_8859_1
{
private static int[] ngrams = {
0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
};
public String getLanguage()
{
return "da";
}
public int match(CharsetDetector det)
{
return match(det, ngrams, byteMap);
}
}
static class CharsetRecog_8859_1_de extends CharsetRecog_8859_1
{
private static int[] ngrams = {
0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
};
0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
};
public String getLanguage()
{
@ -311,6 +331,26 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
}
}
static class CharsetRecog_8859_1_nl extends CharsetRecog_8859_1
{
private static int[] ngrams = {
0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
};
public String getLanguage()
{
return "nl";
}
public int match(CharsetDetector det)
{
return match(det, ngrams, byteMap);
}
}
static class CharsetRecog_8859_1_pt extends CharsetRecog_8859_1
{
private static int[] ngrams = {