mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-5310 White space fixes.
X-SVN-Rev: 20017
This commit is contained in:
parent
4ce961aa13
commit
70d3b36d5f
3 changed files with 183 additions and 183 deletions
|
@ -43,7 +43,7 @@ const int32_t commonChars_euc_jp[] = {
|
|||
0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
|
||||
0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
|
||||
0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
|
||||
0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
|
||||
0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
|
||||
|
||||
const int32_t commonChars_euc_kr[] = {
|
||||
// TODO: This set of data comes from the character frequency-
|
||||
|
|
|
@ -105,7 +105,7 @@ int32_t NGramParser::parse(InputText *det)
|
|||
// TODO: 0x20 might not be a space in all character sets...
|
||||
if (mb != 0) {
|
||||
if (!(mb == 0x20 && ignoreSpace)) {
|
||||
addByte(mb);
|
||||
addByte(mb);
|
||||
}
|
||||
|
||||
ignoreSpace = (mb == 0x20);
|
||||
|
@ -672,12 +672,12 @@ CharsetRecog_8859_1::~CharsetRecog_8859_1()
|
|||
|
||||
const char *CharsetRecog_8859_1::getName() const
|
||||
{
|
||||
return haveC1Bytes? "windows-1252" : "ISO-8859-1";
|
||||
return haveC1Bytes? "windows-1252" : "ISO-8859-1";
|
||||
}
|
||||
|
||||
const char *CharsetRecog_8859_1_en::getLanguage() const
|
||||
{
|
||||
return "en";
|
||||
return "en";
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_en::~CharsetRecog_8859_1_en()
|
||||
|
@ -690,7 +690,7 @@ int32_t CharsetRecog_8859_1_en::match(InputText *textIn)
|
|||
int32_t result = match_sbcs(textIn, ngrams_8859_1_en, charMap_8859_1);
|
||||
|
||||
// printf("8859_1_en: result = %d\n", result);
|
||||
return result; //match_sbcs(textIn, ngrams, charMap);
|
||||
return result; //match_sbcs(textIn, ngrams, charMap);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_da::~CharsetRecog_8859_1_da()
|
||||
|
@ -700,24 +700,24 @@ CharsetRecog_8859_1_da::~CharsetRecog_8859_1_da()
|
|||
|
||||
const char *CharsetRecog_8859_1_da::getLanguage() const
|
||||
{
|
||||
return "da";
|
||||
return "da";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_da::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_da, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_da, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_de::~CharsetRecog_8859_1_de() {}
|
||||
|
||||
const char *CharsetRecog_8859_1_de::getLanguage() const
|
||||
{
|
||||
return "de";
|
||||
return "de";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_de::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_de, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_de, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_es::~CharsetRecog_8859_1_es()
|
||||
|
@ -727,12 +727,12 @@ CharsetRecog_8859_1_es::~CharsetRecog_8859_1_es()
|
|||
|
||||
const char *CharsetRecog_8859_1_es::getLanguage() const
|
||||
{
|
||||
return "es";
|
||||
return "es";
|
||||
}
|
||||
|
||||
int32_t CharsetRecog_8859_1_es::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_es, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_es, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_fr::~CharsetRecog_8859_1_fr()
|
||||
|
@ -742,12 +742,12 @@ CharsetRecog_8859_1_fr::~CharsetRecog_8859_1_fr()
|
|||
|
||||
const char *CharsetRecog_8859_1_fr::getLanguage() const
|
||||
{
|
||||
return "fr";
|
||||
return "fr";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_fr::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_fr, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_fr, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_it::~CharsetRecog_8859_1_it()
|
||||
|
@ -757,12 +757,12 @@ CharsetRecog_8859_1_it::~CharsetRecog_8859_1_it()
|
|||
|
||||
const char *CharsetRecog_8859_1_it::getLanguage() const
|
||||
{
|
||||
return "it";
|
||||
return "it";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_it::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_it, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_it, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_nl::~CharsetRecog_8859_1_nl()
|
||||
|
@ -772,24 +772,24 @@ CharsetRecog_8859_1_nl::~CharsetRecog_8859_1_nl()
|
|||
|
||||
const char *CharsetRecog_8859_1_nl::getLanguage() const
|
||||
{
|
||||
return "nl";
|
||||
return "nl";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_nl::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_nl, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_nl, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_no::~CharsetRecog_8859_1_no() {}
|
||||
|
||||
const char *CharsetRecog_8859_1_no::getLanguage() const
|
||||
{
|
||||
return "no";
|
||||
return "no";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_no::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_no, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_no, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_pt::~CharsetRecog_8859_1_pt()
|
||||
|
@ -799,24 +799,24 @@ CharsetRecog_8859_1_pt::~CharsetRecog_8859_1_pt()
|
|||
|
||||
const char *CharsetRecog_8859_1_pt::getLanguage() const
|
||||
{
|
||||
return "pt";
|
||||
return "pt";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_pt::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_pt, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_pt, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_1_sv::~CharsetRecog_8859_1_sv() {}
|
||||
|
||||
const char *CharsetRecog_8859_1_sv::getLanguage() const
|
||||
{
|
||||
return "sv";
|
||||
return "sv";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_1_sv::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_1_sv, charMap_8859_1);
|
||||
return match_sbcs(textIn, ngrams_8859_1_sv, charMap_8859_1);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_2::~CharsetRecog_8859_2()
|
||||
|
@ -826,7 +826,7 @@ CharsetRecog_8859_2::~CharsetRecog_8859_2()
|
|||
|
||||
const char *CharsetRecog_8859_2::getName() const
|
||||
{
|
||||
return haveC1Bytes? "windows-1250" : "ISO-8859-2";
|
||||
return haveC1Bytes? "windows-1250" : "ISO-8859-2";
|
||||
}
|
||||
|
||||
CharsetRecog_8859_2_cs::~CharsetRecog_8859_2_cs()
|
||||
|
@ -836,12 +836,12 @@ CharsetRecog_8859_2_cs::~CharsetRecog_8859_2_cs()
|
|||
|
||||
const char *CharsetRecog_8859_2_cs::getLanguage() const
|
||||
{
|
||||
return "cs";
|
||||
return "cs";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_2_cs::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_2_cs, charMap_8859_2);
|
||||
return match_sbcs(textIn, ngrams_8859_2_cs, charMap_8859_2);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_2_hu::~CharsetRecog_8859_2_hu()
|
||||
|
@ -851,12 +851,12 @@ CharsetRecog_8859_2_hu::~CharsetRecog_8859_2_hu()
|
|||
|
||||
const char *CharsetRecog_8859_2_hu::getLanguage() const
|
||||
{
|
||||
return "hu";
|
||||
return "hu";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_2_hu::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_2_hu, charMap_8859_2);
|
||||
return match_sbcs(textIn, ngrams_8859_2_hu, charMap_8859_2);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_2_pl::~CharsetRecog_8859_2_pl()
|
||||
|
@ -866,12 +866,12 @@ CharsetRecog_8859_2_pl::~CharsetRecog_8859_2_pl()
|
|||
|
||||
const char *CharsetRecog_8859_2_pl::getLanguage() const
|
||||
{
|
||||
return "pl";
|
||||
return "pl";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_2_pl::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_2_pl, charMap_8859_2);
|
||||
return match_sbcs(textIn, ngrams_8859_2_pl, charMap_8859_2);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_2_ro::~CharsetRecog_8859_2_ro()
|
||||
|
@ -881,12 +881,12 @@ CharsetRecog_8859_2_ro::~CharsetRecog_8859_2_ro()
|
|||
|
||||
const char *CharsetRecog_8859_2_ro::getLanguage() const
|
||||
{
|
||||
return "ro";
|
||||
return "ro";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_2_ro::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_2_ro, charMap_8859_2);
|
||||
return match_sbcs(textIn, ngrams_8859_2_ro, charMap_8859_2);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_5::~CharsetRecog_8859_5()
|
||||
|
@ -896,7 +896,7 @@ CharsetRecog_8859_5::~CharsetRecog_8859_5()
|
|||
|
||||
const char *CharsetRecog_8859_5::getName() const
|
||||
{
|
||||
return "ISO-8859-5";
|
||||
return "ISO-8859-5";
|
||||
}
|
||||
|
||||
CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
|
||||
|
@ -906,12 +906,12 @@ CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
|
|||
|
||||
const char *CharsetRecog_8859_5_ru::getLanguage() const
|
||||
{
|
||||
return "ru";
|
||||
return "ru";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_5_ru::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
|
||||
return match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_6::~CharsetRecog_8859_6()
|
||||
|
@ -921,7 +921,7 @@ CharsetRecog_8859_6::~CharsetRecog_8859_6()
|
|||
|
||||
const char *CharsetRecog_8859_6::getName() const
|
||||
{
|
||||
return "ISO-8859-6";
|
||||
return "ISO-8859-6";
|
||||
}
|
||||
|
||||
CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
|
||||
|
@ -931,12 +931,12 @@ CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
|
|||
|
||||
const char *CharsetRecog_8859_6_ar::getLanguage() const
|
||||
{
|
||||
return "ar";
|
||||
return "ar";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_6_ar::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
|
||||
return match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_7::~CharsetRecog_8859_7()
|
||||
|
@ -946,7 +946,7 @@ CharsetRecog_8859_7::~CharsetRecog_8859_7()
|
|||
|
||||
const char *CharsetRecog_8859_7::getName() const
|
||||
{
|
||||
return haveC1Bytes? "windows-1253" : "ISO-8859-7";
|
||||
return haveC1Bytes? "windows-1253" : "ISO-8859-7";
|
||||
}
|
||||
|
||||
CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
|
||||
|
@ -956,12 +956,12 @@ CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
|
|||
|
||||
const char *CharsetRecog_8859_7_el::getLanguage() const
|
||||
{
|
||||
return "el";
|
||||
return "el";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_7_el::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
|
||||
return match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_8::~CharsetRecog_8859_8()
|
||||
|
@ -971,7 +971,7 @@ CharsetRecog_8859_8::~CharsetRecog_8859_8()
|
|||
|
||||
const char *CharsetRecog_8859_8::getName() const
|
||||
{
|
||||
return haveC1Bytes? "windows-1255" : "ISO-8859-8";
|
||||
return haveC1Bytes? "windows-1255" : "ISO-8859-8";
|
||||
}
|
||||
|
||||
CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
|
||||
|
@ -986,12 +986,12 @@ const char *CharsetRecog_8859_8_I_he::getName() const
|
|||
|
||||
const char *CharsetRecog_8859_8_I_he::getLanguage() const
|
||||
{
|
||||
return "he";
|
||||
return "he";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_8_I_he::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
|
||||
return match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
|
||||
|
@ -1001,12 +1001,12 @@ CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
|
|||
|
||||
const char *CharsetRecog_8859_8_he::getLanguage() const
|
||||
{
|
||||
return "he";
|
||||
return "he";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_8_he::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
|
||||
return match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
|
||||
}
|
||||
|
||||
CharsetRecog_8859_9::~CharsetRecog_8859_9()
|
||||
|
@ -1016,7 +1016,7 @@ CharsetRecog_8859_9::~CharsetRecog_8859_9()
|
|||
|
||||
const char *CharsetRecog_8859_9::getName() const
|
||||
{
|
||||
return haveC1Bytes? "windows-1254" : "ISO-8859-9";
|
||||
return haveC1Bytes? "windows-1254" : "ISO-8859-9";
|
||||
}
|
||||
|
||||
CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
|
||||
|
@ -1026,12 +1026,12 @@ CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
|
|||
|
||||
const char *CharsetRecog_8859_9_tr::getLanguage() const
|
||||
{
|
||||
return "tr";
|
||||
return "tr";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_8859_9_tr::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
|
||||
return match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
|
||||
}
|
||||
|
||||
CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
|
||||
|
@ -1041,17 +1041,17 @@ CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
|
|||
|
||||
const char *CharsetRecog_windows_1256::getName() const
|
||||
{
|
||||
return "windows-1256";
|
||||
return "windows-1256";
|
||||
}
|
||||
|
||||
|
||||
const char *CharsetRecog_windows_1256::getLanguage() const
|
||||
{
|
||||
return "ar";
|
||||
return "ar";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_windows_1256::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
|
||||
return match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
|
||||
}
|
||||
|
||||
CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
|
||||
|
@ -1061,17 +1061,17 @@ CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
|
|||
|
||||
const char *CharsetRecog_windows_1251::getName() const
|
||||
{
|
||||
return "windows-1251";
|
||||
return "windows-1251";
|
||||
}
|
||||
|
||||
|
||||
const char *CharsetRecog_windows_1251::getLanguage() const
|
||||
{
|
||||
return "ru";
|
||||
return "ru";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_windows_1251::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
|
||||
return match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
|
||||
}
|
||||
|
||||
CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
|
||||
|
@ -1081,17 +1081,17 @@ CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
|
|||
|
||||
const char *CharsetRecog_KOI8_R::getName() const
|
||||
{
|
||||
return "KOI8-R";
|
||||
return "KOI8-R";
|
||||
}
|
||||
|
||||
|
||||
const char *CharsetRecog_KOI8_R::getLanguage() const
|
||||
{
|
||||
return "ru";
|
||||
return "ru";
|
||||
}
|
||||
|
||||
|
||||
int32_t CharsetRecog_KOI8_R::match(InputText *textIn)
|
||||
{
|
||||
return match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
|
||||
return match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -18,94 +18,94 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
class NGramParser : public UMemory
|
||||
{
|
||||
private:
|
||||
private:
|
||||
int32_t byteIndex;
|
||||
int32_t ngram;
|
||||
|
||||
|
||||
const int32_t *ngramList;
|
||||
const uint8_t *charMap;
|
||||
|
||||
|
||||
int32_t ngramCount;
|
||||
int32_t hitCount;
|
||||
|
||||
public:
|
||||
|
||||
public:
|
||||
NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
|
||||
|
||||
private:
|
||||
|
||||
private:
|
||||
/*
|
||||
* Binary search for value in table, which must have exactly 64 entries.
|
||||
*/
|
||||
* Binary search for value in table, which must have exactly 64 entries.
|
||||
*/
|
||||
int32_t search(const int32_t *table, int32_t value);
|
||||
|
||||
void lookup(int32_t thisNgram);
|
||||
void addByte(int32_t b);
|
||||
void lookup(int32_t thisNgram);
|
||||
void addByte(int32_t b);
|
||||
int32_t nextByte(InputText *det);
|
||||
|
||||
public:
|
||||
|
||||
public:
|
||||
int32_t parse(InputText *det);
|
||||
|
||||
};
|
||||
|
||||
class CharsetRecog_sbcs : public CharsetRecognizer
|
||||
{
|
||||
protected:
|
||||
protected:
|
||||
UBool haveC1Bytes;
|
||||
|
||||
public:
|
||||
public:
|
||||
CharsetRecog_sbcs();
|
||||
|
||||
virtual ~CharsetRecog_sbcs();
|
||||
|
||||
|
||||
virtual const char *getName() const = 0;
|
||||
|
||||
virtual int32_t match(InputText *det) = 0;
|
||||
|
||||
|
||||
int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_2 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_2();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_5 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_5();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_6 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_6();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_7 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_7();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_8 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_8();
|
||||
|
||||
virtual const char *getName() const;
|
||||
|
@ -113,77 +113,77 @@ class CharsetRecog_8859_8 : public CharsetRecog_sbcs
|
|||
|
||||
class CharsetRecog_8859_9 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_9();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_en : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_en();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_da : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_da();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_de : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_de();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_es : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_es();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_fr : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_fr();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_it : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_it();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_nl : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_nl();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
|
@ -191,170 +191,170 @@ class CharsetRecog_8859_1_nl : public CharsetRecog_8859_1
|
|||
|
||||
class CharsetRecog_8859_1_no : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_no();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_pt : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_pt();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_1_sv : public CharsetRecog_8859_1
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_1_sv();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_2_cs : public CharsetRecog_8859_2
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_2_cs();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_2_hu : public CharsetRecog_8859_2
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_2_hu();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_2_pl : public CharsetRecog_8859_2
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_2_pl();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_2_ro : public CharsetRecog_8859_2
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_2_ro();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_5_ru();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_6_ar();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_7_el();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_8_I_he();
|
||||
|
||||
const char *getName() const;
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_8_he ();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_8859_9_tr ();
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_windows_1256();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_windows_1251();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CharsetRecog_KOI8_R();
|
||||
|
||||
|
||||
const char *getName() const;
|
||||
|
||||
|
||||
const char *getLanguage() const;
|
||||
|
||||
|
||||
int32_t match(InputText *textIn);
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue