ICU-4060 Chean up charset names to match what Java expects, don't emit confidence over 100.

X-SVN-Rev: 17728
This commit is contained in:
Eric Mader 2005-05-28 00:30:14 +00:00
parent e3c674aef7
commit 291516499b
3 changed files with 12 additions and 6 deletions

View file

@ -443,8 +443,8 @@ public class CharsetDetector {
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_5_ru());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_6_ar());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_7_el());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_8_I_he());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_8_he());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_8_he_visual());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_windows_1251());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_windows_1256());
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_KOI8_R());

View file

@ -105,7 +105,7 @@ abstract class CharsetRecog_2022 extends CharsetRecognizer {
};
String getName() {
return "ISO2022-JP";
return "ISO-2022-JP";
}
int match(CharsetDetector det) {
@ -119,7 +119,7 @@ abstract class CharsetRecog_2022 extends CharsetRecognizer {
};
String getName() {
return "ISO2022-KR";
return "ISO-2022-KR";
}
int match(CharsetDetector det) {
@ -144,7 +144,7 @@ abstract class CharsetRecog_2022 extends CharsetRecognizer {
};
String getName() {
return "ISO2022-CN";
return "ISO-2022-CN";
}

View file

@ -144,6 +144,12 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
// return 0;
// }
// TODO - This is a bit of a hack to take care of a case
// were we were getting a confidence of 135...
if (rawPercent > 0.33) {
return 98;
}
return (int) (rawPercent * 300.0);
}
}
@ -757,7 +763,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
}
}
static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8
static class CharsetRecog_8859_8_I_he extends CharsetRecog_8859_8
{
private static int[] ngrams = {
0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
@ -777,7 +783,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
}
}
static class CharsetRecog_8859_8_he_visual extends CharsetRecog_8859_8
static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8
{
private static int[] ngrams = {
0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,