Remove diacritics after transliteration. Don't transliterate Thai language.

This commit is contained in:
Daria Volvenkova 2017-04-21 18:40:10 +03:00
parent 315897aa9c
commit 1907335b06
2 changed files with 9 additions and 5 deletions

View file

@ -25,7 +25,7 @@ StringUtf8Multilang::Languages const g_languages = {
{"be", "Беларуская", "Belarusian-Latin/BGN"},
{"ka", "ქართული", "Georgian-Latin"},
{"ko", "한국어", "Hangul-Latin/BGN"},
{"he", "עברית", "Hebrew-Latin/BGN"},
{"he", "עברית", "Hebrew-Latin"},
{"nl", "Nederlands", ""},
{"ga", "Gaeilge", ""},
{"ja_rm", "Japanese (Romanized)", "Any-Latin"},
@ -33,7 +33,7 @@ StringUtf8Multilang::Languages const g_languages = {
{"it", "Italiano", ""},
{"es", "Español", ""},
{"zh_pinyin", "Chinese (Pinyin)", "Any-Latin"},
{"th", "ไทย", "Thai-Latin"},
{"th", "ไทย", ""}, // Thai-Latin
{"cy", "Cymraeg", ""},
{"sr", "Српски", "Serbian-Latin/BGN"},
{"uk", "Українська", "Ukrainian-Latin/BGN"},

View file

@ -63,7 +63,7 @@ bool Transliteration::Transliterate(std::string const & str, int8_t langCode, st
if (str.empty())
return false;
std::string const transliteratorId(StringUtf8Multilang::GetTransliteratorIdByCode(langCode));
std::string transliteratorId(StringUtf8Multilang::GetTransliteratorIdByCode(langCode));
if (transliteratorId.empty())
return false;
@ -81,12 +81,16 @@ bool Transliteration::Transliterate(std::string const & str, int8_t langCode, st
if (!it->second->m_initialized)
{
UErrorCode status = U_ZERO_ERROR;
UnicodeString translitId(it->first.c_str());
std::string const removeDiacriticRule = ";NFD;[\u02B9-\u02D3\u0301-\u0358]Remove;NFC";
transliteratorId.append(removeDiacriticRule);
UnicodeString translitId(transliteratorId.c_str());
it->second->m_transliterator.reset(Transliterator::createInstance(translitId, UTRANS_FORWARD, status));
if (it->second->m_transliterator == nullptr)
LOG(LWARNING, ("Cannot create transliterator \"", it->first, "\", icu error =", status));
LOG(LWARNING, ("Cannot create transliterator \"", transliteratorId, "\", icu error =", status));
it->second->m_initialized = true;
}