From 1907335b06f4bfd5a8755701ffcd0600ad370a3f Mon Sep 17 00:00:00 2001 From: Daria Volvenkova Date: Fri, 21 Apr 2017 18:40:10 +0300 Subject: [PATCH] Remove diacritics after transliteration. Don't transliterate Thai language. --- coding/multilang_utf8_string.cpp | 4 ++-- coding/transliteration.cpp | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/coding/multilang_utf8_string.cpp b/coding/multilang_utf8_string.cpp index 0a2fce644b..333bfdcc5f 100644 --- a/coding/multilang_utf8_string.cpp +++ b/coding/multilang_utf8_string.cpp @@ -25,7 +25,7 @@ StringUtf8Multilang::Languages const g_languages = { {"be", "Беларуская", "Belarusian-Latin/BGN"}, {"ka", "ქართული", "Georgian-Latin"}, {"ko", "한국어", "Hangul-Latin/BGN"}, - {"he", "עברית", "Hebrew-Latin/BGN"}, + {"he", "עברית", "Hebrew-Latin"}, {"nl", "Nederlands", ""}, {"ga", "Gaeilge", ""}, {"ja_rm", "Japanese (Romanized)", "Any-Latin"}, @@ -33,7 +33,7 @@ StringUtf8Multilang::Languages const g_languages = { {"it", "Italiano", ""}, {"es", "Español", ""}, {"zh_pinyin", "Chinese (Pinyin)", "Any-Latin"}, - {"th", "ไทย", "Thai-Latin"}, + {"th", "ไทย", ""}, // Thai-Latin {"cy", "Cymraeg", ""}, {"sr", "Српски", "Serbian-Latin/BGN"}, {"uk", "Українська", "Ukrainian-Latin/BGN"}, diff --git a/coding/transliteration.cpp b/coding/transliteration.cpp index 618c602471..84cc8fca75 100644 --- a/coding/transliteration.cpp +++ b/coding/transliteration.cpp @@ -63,7 +63,7 @@ bool Transliteration::Transliterate(std::string const & str, int8_t langCode, st if (str.empty()) return false; - std::string const transliteratorId(StringUtf8Multilang::GetTransliteratorIdByCode(langCode)); + std::string transliteratorId(StringUtf8Multilang::GetTransliteratorIdByCode(langCode)); if (transliteratorId.empty()) return false; @@ -81,12 +81,16 @@ bool Transliteration::Transliterate(std::string const & str, int8_t langCode, st if (!it->second->m_initialized) { UErrorCode status = U_ZERO_ERROR; - UnicodeString translitId(it->first.c_str()); + + std::string const removeDiacriticRule = ";NFD;[\u02B9-\u02D3\u0301-\u0358]Remove;NFC"; + transliteratorId.append(removeDiacriticRule); + + UnicodeString translitId(transliteratorId.c_str()); it->second->m_transliterator.reset(Transliterator::createInstance(translitId, UTRANS_FORWARD, status)); if (it->second->m_transliterator == nullptr) - LOG(LWARNING, ("Cannot create transliterator \"", it->first, "\", icu error =", status)); + LOG(LWARNING, ("Cannot create transliterator \"", transliteratorId, "\", icu error =", status)); it->second->m_initialized = true; }