diff --git a/coding/coding_tests/multilang_utf8_string_test.cpp b/coding/coding_tests/multilang_utf8_string_test.cpp index eed1fa4965..2b76949e09 100644 --- a/coding/coding_tests/multilang_utf8_string_test.cpp +++ b/coding/coding_tests/multilang_utf8_string_test.cpp @@ -2,6 +2,9 @@ #include "../multilang_utf8_string.hpp" +#include "../../3party/utfcpp/source/utf8.h" + + namespace { struct lang_string @@ -16,11 +19,14 @@ namespace for (size_t i = 0; i < count; ++i) { - s.AddString(arr[i].m_lang, arr[i].m_str); + string src(arr[i].m_str); + TEST(utf8::is_valid(src.begin(), src.end()), ()); + + s.AddString(arr[i].m_lang, src); string comp; TEST(s.GetString(arr[i].m_lang, comp), ()); - TEST_EQUAL(arr[i].m_str, comp, ()); + TEST_EQUAL(src, comp, ()); } for (size_t i = 0; i < count; ++i) @@ -39,8 +45,10 @@ UNIT_TEST(MultilangString_Smoke) { StringUtf8Multilang s; - lang_string arr[] = { {"en", "abcd"}, {"ru", "\xD0\xA0\xD0\xB0\xD1\x88\xD0\xBA\xD0\xB0"}, - {"omim", "\xE2\x82\xAC\xF0\xA4\xAD\xA2"} }; + lang_string arr[] = { {"def", "default"}, + {"en", "abcd"}, + {"ru", "\xD0\xA0\xD0\xB0\xD1\x88\xD0\xBA\xD0\xB0"}, + {"be", "\xE2\x82\xAC\xF0\xA4\xAD\xA2"} }; TestMultilangString(arr, ARRAY_SIZE(arr)); } diff --git a/coding/multilang_utf8_string.cpp b/coding/multilang_utf8_string.cpp index 678f16e6a2..fbeeb2f27d 100644 --- a/coding/multilang_utf8_string.cpp +++ b/coding/multilang_utf8_string.cpp @@ -4,12 +4,19 @@ char StringUtf8Multilang::GetLangIndex(string const & lang) const { - if (lang.empty() || lang == "en") - return 0; - else if (lang == "ru") - return 1; - else if (lang == "be") - return 2; + char const * arr[] = { "def", + "en", "ja", "fr", "ko_rm", "ar", "de", "ru", "sv", "zh", "fi", + "ko", "ka", "he", "be", "nl", "ga", "ja_rm", "el", "it", "es", + "th", "statcan_rbuid", "zh_pinyin", "ca", "cy", "hu", "hsb", "sr", "fa", "eu", + "pl", "br", "uk", "sl", "ro", "sq", "am", "fy", "gd", "cs", + "sk", "af", "hr", "hy", "tr", "kn", "pt", "lt", "lb", "bg", + "eo", "kk", "la", "et", "vi", "mn", "mk", "lv", "fur", "gsw" }; + + STATIC_ASSERT(ARRAY_SIZE(arr) <= 64); + + for (size_t i = 0; i < ARRAY_SIZE(arr); ++i) + if (lang == arr[i]) + return static_cast(i); return -1; } @@ -23,18 +30,18 @@ size_t StringUtf8Multilang::GetNextIndex(size_t i) const { if ((m_s[i] & 0x80) == 0) i += 1; - else if ((m_s[i] & 0xC0) == 0xC0) - i += 2; - else if ((m_s[i] & 0xE0) == 0xE0) - i += 3; - else if ((m_s[i] & 0xF0) == 0xF0) - i += 4; - else if ((m_s[i] & 0xF8) == 0xF8) - i += 5; - else if ((m_s[i] & 0xFC) == 0xFC) - i += 6; else if ((m_s[i] & 0xFE) == 0xFE) i += 7; + else if ((m_s[i] & 0xFC) == 0xFC) + i += 6; + else if ((m_s[i] & 0xF8) == 0xF8) + i += 5; + else if ((m_s[i] & 0xF0) == 0xF0) + i += 4; + else if ((m_s[i] & 0xE0) == 0xE0) + i += 3; + else if ((m_s[i] & 0xC0) == 0xC0) + i += 2; } return i;