diff --git a/indexer/indexer_tests/search_string_utils_test.cpp b/indexer/indexer_tests/search_string_utils_test.cpp index 8278d87ab8..26ed3b52e7 100644 --- a/indexer/indexer_tests/search_string_utils_test.cpp +++ b/indexer/indexer_tests/search_string_utils_test.cpp @@ -18,14 +18,20 @@ UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines) "Iiİı", "iiii", // Famous turkish "I" letter bug. "ЙЁйёШКИЙй", "йейешкийй", // Better handling of Russian й letter. "ØøÆ挜", "ooaeaeoeoe", - "バス", "ハス" + "バス", "ハス", + "âàáạăốợồôểềệếỉđưựứửýĂÂĐÊÔƠƯ", + "aaaaaooooeeeeiduuuuyaadeoou", // Vietnamese + "ăâț", "aat" // Romanian }; */ string const arr[] = {"ÜbërÅłłęšß", "uberallesss", // Basic test case. "Iiİı", "iiii", // Famous turkish "I" letter bug. "ЙЁйёШКИЙй", "иеиешкиии", // Better handling of Russian й letter. - "ØøÆ挜", "ooaeaeoeoe", - "バス", "ハス" + "ØøÆ挜", "ooaeaeoeoe", // Dansk + "バス", "ハス", + "âàáạăốợồôểềệếỉđưựứửýĂÂĐÊÔƠƯ", + "aaaaaooooeeeeiduuuuyaadeoou", // Vietnamese + "ăâț", "aat" // Romanian }; for (size_t i = 0; i < ARRAY_SIZE(arr); i += 2) diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index d27e43991a..86c9b10092 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -17,6 +17,10 @@ inline strings::UniString NormalizeAndSimplifyString(string const & s) UniChar & c = uniString[i]; switch (c) { + // Replace "d with stroke" to simple d letter. Used in Vietnamese. + // (unicode-compliant implementation leaves it unchanged) + case 0x0110: + case 0x0111: c = 'd'; break; // Replace small turkish dotless 'ı' with dotted 'i'. // Our own invented hack to avoid well-known Turkish I-letter bug. case 0x0131: c = 'i'; break;