mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-4060 Chean up charset names to match what Java expects, don't emit confidence over 100.
X-SVN-Rev: 17728
This commit is contained in:
parent
e3c674aef7
commit
291516499b
3 changed files with 12 additions and 6 deletions
|
@ -443,8 +443,8 @@ public class CharsetDetector {
|
|||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_5_ru());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_6_ar());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_7_el());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_8_I_he());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_8_he());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_8859_8_he_visual());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_windows_1251());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_windows_1256());
|
||||
recognizers.add(new CharsetRecog_sbcs.CharsetRecog_KOI8_R());
|
||||
|
|
|
@ -105,7 +105,7 @@ abstract class CharsetRecog_2022 extends CharsetRecognizer {
|
|||
};
|
||||
|
||||
String getName() {
|
||||
return "ISO2022-JP";
|
||||
return "ISO-2022-JP";
|
||||
}
|
||||
|
||||
int match(CharsetDetector det) {
|
||||
|
@ -119,7 +119,7 @@ abstract class CharsetRecog_2022 extends CharsetRecognizer {
|
|||
};
|
||||
|
||||
String getName() {
|
||||
return "ISO2022-KR";
|
||||
return "ISO-2022-KR";
|
||||
}
|
||||
|
||||
int match(CharsetDetector det) {
|
||||
|
@ -144,7 +144,7 @@ abstract class CharsetRecog_2022 extends CharsetRecognizer {
|
|||
};
|
||||
|
||||
String getName() {
|
||||
return "ISO2022-CN";
|
||||
return "ISO-2022-CN";
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -144,6 +144,12 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
|
|||
// return 0;
|
||||
// }
|
||||
|
||||
// TODO - This is a bit of a hack to take care of a case
|
||||
// were we were getting a confidence of 135...
|
||||
if (rawPercent > 0.33) {
|
||||
return 98;
|
||||
}
|
||||
|
||||
return (int) (rawPercent * 300.0);
|
||||
}
|
||||
}
|
||||
|
@ -757,7 +763,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
|
|||
}
|
||||
}
|
||||
|
||||
static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8
|
||||
static class CharsetRecog_8859_8_I_he extends CharsetRecog_8859_8
|
||||
{
|
||||
private static int[] ngrams = {
|
||||
0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
|
||||
|
@ -777,7 +783,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer {
|
|||
}
|
||||
}
|
||||
|
||||
static class CharsetRecog_8859_8_he_visual extends CharsetRecog_8859_8
|
||||
static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8
|
||||
{
|
||||
private static int[] ngrams = {
|
||||
0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
|
||||
|
|
Loading…
Add table
Reference in a new issue