diff --git a/coding/coding_tests/string_utf8_multilang_tests.cpp b/coding/coding_tests/string_utf8_multilang_tests.cpp index b002ad134f..81a99321e8 100644 --- a/coding/coding_tests/string_utf8_multilang_tests.cpp +++ b/coding/coding_tests/string_utf8_multilang_tests.cpp @@ -147,3 +147,50 @@ UNIT_TEST(MultilangString_HasString) TEST(!s.HasString(1), ()); TEST(!s.HasString(32), ()); } + +UNIT_TEST(MultilangString_ForEachLanguage) +{ + using Translations = vector>; + StringUtf8Multilang s; + Translations const scotlandTranslations = { + {"be", "Шатландыя"}, {"cs", "Skotsko"}, {"cy", "Yr Alban"}, {"da", "Skotland"}, + {"de", "Schottland"}, {"eo", "Skotlando"}, {"es", "Escocia"}, {"eu", "Eskozia"}, + {"fi", "Skotlanti"}, {"fr", "Écosse"}, {"ga", "Albain"}, {"gd", "Alba"}, + {"hr", "Škotska"}, {"ia", "Scotia"}, {"io", "Skotia"}, {"ja", "スコットランド"}, + {"ku", "Skotland"}, {"lfn", "Scotland"}, {"nl", "Schotland"}, {"pl", "Szkocja"}, + {"ru", "Шотландия"}, {"sco", "Scotland"}, {"sk", "Škótsko"}, {"sr", "Шкотска"}, + {"sv", "Skottland"}, {"tok", "Sukosi"}, {"tzl", "Escot"}, {"uk", "Шотландія"}, + {"vo", "Skotän"}, {"zh", "苏格兰"}}; + + Translations const usedTranslations = { + {"be", "Шатландыя"}, {"cs", "Skotsko"}, {"eu", "Eskozia"}, {"zh", "苏格兰"}}; + + for (auto const & langAndTranslation : scotlandTranslations) + { + s.AddString(langAndTranslation.first, langAndTranslation.second); + } + + set testAccumulator; + vector const preferredLanguages = {"cs", "eu", "be", "zh"}; + vector const preferredTranslations = {"Skotsko", "Eskozia", "Шатландыя", "苏格兰"}; + + auto const fn = [&testAccumulator, &usedTranslations](int8_t code, string const & name) { + testAccumulator.insert(name); + if (usedTranslations.size() > testAccumulator.size()) + return base::ControlFlow::Continue; + return base::ControlFlow::Break; + }; + + TEST(s.ForEachLanguage(preferredLanguages, fn), ()); + TEST_EQUAL(testAccumulator.size(), preferredTranslations.size(), ()); + + for (string const & translation : preferredTranslations) + { + TEST(testAccumulator.find(translation) != testAccumulator.end(), ()); + } + + testAccumulator.clear(); + vector const corruptedLanguages = {"Матерный", "Детский", "BirdLanguage"}; + TEST(!s.ForEachLanguage(corruptedLanguages, fn), ()); + TEST_EQUAL(testAccumulator.size(), 0, ()); +} diff --git a/coding/string_utf8_multilang.cpp b/coding/string_utf8_multilang.cpp index 734b7d380c..8c50cdc9f2 100644 --- a/coding/string_utf8_multilang.cpp +++ b/coding/string_utf8_multilang.cpp @@ -229,6 +229,30 @@ bool StringUtf8Multilang::GetString(int8_t lang, string & utf8s) const return false; } +StringUtf8Multilang::TranslationPositions StringUtf8Multilang::GenerateTranslationPositions() const +{ + TranslationPositions result; + size_t i = 0; + size_t const sz = m_s.size(); + while (i < sz) + { + size_t const next = GetNextIndex(i); + int8_t const code = m_s[i] & 0x3F; + if (GetLangByCode(code) != kReservedLang) + result[code] = Position{i + 1, next - i - 1}; + + i = next; + } + + return result; +} + +std::string StringUtf8Multilang::GetTranslation( + StringUtf8Multilang::Position const & position) const +{ + return m_s.substr(position.m_begin, position.m_length); +} + bool StringUtf8Multilang::HasString(int8_t lang) const { if (!IsSupportedLangCode(lang)) diff --git a/coding/string_utf8_multilang.hpp b/coding/string_utf8_multilang.hpp index 68163d6e5e..70824a33f3 100644 --- a/coding/string_utf8_multilang.hpp +++ b/coding/string_utf8_multilang.hpp @@ -73,6 +73,14 @@ public: char const * m_transliteratorId; }; + struct Position + { + size_t m_begin = 0; + size_t m_length = 0; + }; + + using TranslationPositions = std::map; + static int8_t constexpr kUnsupportedLanguageCode = -1; static int8_t constexpr kDefaultCode = 0; static int8_t constexpr kEnglishCode = 1; @@ -129,6 +137,33 @@ public: } } + /// Used for ordered languages, if you want to do something with priority of that order. + /// \param languages ordered languages names. + /// \param fn function or functor, using base::ControlFlow as return value. + /// \return true if ForEachLanguage was stopped by base::ControlFlow::Break, false otherwise. + template + bool ForEachLanguage(std::vector const & languages, Fn && fn) const + { + auto const & translationPositions = GenerateTranslationPositions(); + + base::ControlFlowWrapper wrapper(std::forward(fn)); + for (std::string const & language : languages) + { + int8_t const languageCode = GetLangIndex(language); + if (GetLangByCode(languageCode) != kReservedLang) + { + auto const & translationPositionsIt = translationPositions.find(languageCode); + if (translationPositionsIt != translationPositions.end() && + wrapper(languageCode, GetTranslation(translationPositionsIt->second)) == + base::ControlFlow::Break) + { + return true; + } + } + } + return false; + }; + bool GetString(int8_t lang, std::string & utf8s) const; bool GetString(std::string const & lang, std::string & utf8s) const { @@ -157,6 +192,9 @@ public: } private: + TranslationPositions GenerateTranslationPositions() const; + std::string GetTranslation(Position const & position) const; + size_t GetNextIndex(size_t i) const; std::string m_s; diff --git a/generator/generator_tests/regions_tests.cpp b/generator/generator_tests/regions_tests.cpp index 2061e95f9f..f19f0e85b8 100644 --- a/generator/generator_tests/regions_tests.cpp +++ b/generator/generator_tests/regions_tests.cpp @@ -10,6 +10,8 @@ #include "platform/platform.hpp" +#include "coding/transliteration.hpp" + #include "base/file_name_utils.hpp" #include "base/macros.hpp" @@ -18,8 +20,8 @@ #include #include #include -#include #include +#include using namespace generator_tests; using namespace generator::regions; @@ -33,7 +35,7 @@ using Tags = std::vector>; FeatureBuilder const kEmptyFeature; OsmElement CreateOsmRelation(uint64_t id, std::string const & adminLevel, - std::string const & place = "") + std::string const & place = "") { OsmElement el; el.m_id = id; @@ -67,7 +69,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector) FeatureBuilder fb; fb.AddName("default", "Country_1"); fb.SetOsmId(MakeOsmRelation(1 /* id */)); - vector poly = {{2, 8}, {3, 12}, {8, 15}, {13, 12}, {15, 7}, {11, 2}, {4, 4}, {2, 8}}; + vector poly = {{2, 8}, {3, 12}, {8, 15}, {13, 12}, + {15, 7}, {11, 2}, {4, 4}, {2, 8}}; fb.AddPolygon(poly); fb.SetHoles({{{5, 8}, {7, 10}, {10, 10}, {11, 7}, {10, 4}, {7, 5}, {5, 8}}}); fb.SetArea(); @@ -108,8 +111,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector) FeatureBuilder fb; fb.AddName("default", "Country_1_Region_4"); fb.SetOsmId(MakeOsmRelation(4 /* id */)); - vector poly = {{7, 10}, {9, 12}, {8, 15}, {13, 12}, {15, 7}, {12, 9}, - {11, 7}, {10, 10}, {7, 10}}; + vector poly = {{7, 10}, {9, 12}, {8, 15}, {13, 12}, {15, 7}, + {12, 9}, {11, 7}, {10, 10}, {7, 10}}; fb.AddPolygon(poly); fb.SetArea(); regions.emplace_back(Region(fb, collector.Get(MakeOsmRelation(4 /* id */)))); @@ -119,8 +122,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector) FeatureBuilder fb; fb.AddName("default", "Country_1_Region_5"); fb.SetOsmId(MakeOsmRelation(5 /* id */)); - vector poly = {{4, 4}, {2, 8}, {3, 12}, {8, 15}, {9, 12}, {7, 10}, {5, 8}, - {7, 5}, {4, 4}}; + vector poly = {{4, 4}, {2, 8}, {3, 12}, {8, 15}, {9, 12}, + {7, 10}, {5, 8}, {7, 5}, {4, 4}}; fb.AddPolygon(poly); fb.SetArea(); regions.emplace_back(Region(fb, collector.Get(MakeOsmRelation(5 /* id */)))); @@ -140,7 +143,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector) FeatureBuilder fb; fb.AddName("default", "Country_1_Region_5_Subregion_7"); fb.SetOsmId(MakeOsmRelation(7 /* id */)); - vector poly = {{3, 12}, {8, 15}, {9, 12}, {7, 10}, {5, 8}, {5, 10}, {4, 10}, {3, 12}}; + vector poly = {{3, 12}, {8, 15}, {9, 12}, {7, 10}, + {5, 8}, {5, 10}, {4, 10}, {3, 12}}; fb.AddPolygon(poly); fb.SetArea(); regions.emplace_back(Region(fb, collector.Get(MakeOsmRelation(7 /* id */)))); @@ -176,8 +180,7 @@ bool NameExists(std::vector const & coll, std::string const & name) { auto const end = std::end(coll); return std::find(std::begin(coll), end, name) != end; -} -; +}; } // namespace UNIT_TEST(RegionsBuilderTest_GetCountryNames) @@ -199,9 +202,11 @@ UNIT_TEST(RegionsBuilderTest_GetCountries) auto const & countries = builder.GetCountriesOuters(); TEST_EQUAL(countries.size(), 3, ()); TEST_EQUAL(std::count_if(std::begin(countries), std::end(countries), - [](const Region & r) {return r.GetName() == "Country_1"; }), 1, ()); + [](const Region & r) { return r.GetName() == "Country_1"; }), + 1, ()); TEST_EQUAL(std::count_if(std::begin(countries), std::end(countries), - [](const Region & r) {return r.GetName() == "Country_2"; }), 2, ()); + [](const Region & r) { return r.GetName() == "Country_2"; }), + 2, ()); } UNIT_TEST(RegionsBuilderTest_GetCountryTrees) @@ -230,3 +235,44 @@ UNIT_TEST(RegionsBuilderTest_GetCountryTrees) TEST(NameExists(bankOfNames, "Country_1Country_1_Region_5Country_1_Region_5_Subregion_6"), ()); TEST(NameExists(bankOfNames, "Country_1Country_1_Region_5Country_1_Region_5_Subregion_7"), ()); } + +using Translations = std::vector>; +bool TestTransliteration(Translations const & translations, + std::string const & expectedTransliteration) +{ + StringUtf8Multilang regionName; + for (auto const & langAndTranslation : translations) + { + regionName.AddString(langAndTranslation.first, langAndTranslation.second); + } + RegionWithName region(regionName); + return region.GetEnglishOrTransliteratedName() == expectedTransliteration; +} + +UNIT_TEST(RegionTransliteration) +{ + Transliteration & translit = Transliteration::Instance(); + translit.Init(GetPlatform().ResourcesDir()); + + Translations const scotlandTranslations = { + {"default", "Scotland"}, {"be", "Шатландыя"}, {"cs", "Skotsko"}, {"cy", "Yr Alban"}, + {"da", "Skotland"}, {"de", "Schottland"}, {"eo", "Skotlando"}, {"es", "Escocia"}, + {"eu", "Eskozia"}, {"fi", "Skotlanti"}, {"fr", "Écosse"}, {"ga", "Albain"}, + {"gd", "Alba"}, {"hr", "Škotska"}, {"ia", "Scotia"}, {"io", "Skotia"}, + {"ja", "スコットランド"}, {"ku", "Skotland"}, {"lfn", "Scotland"}, {"nl", "Schotland"}, + {"pl", "Szkocja"}, {"ru", "Шотландия"}, {"sco", "Scotland"}, {"sk", "Škótsko"}, + {"sr", "Шкотска"}, {"sv", "Skottland"}, {"tok", "Sukosi"}, {"tzl", "Escot"}, + {"uk", "Шотландія"}, {"vo", "Skotän"}, {"zh", "苏格兰"}}; + + Translations const michiganTranslations = { + {"default", "Michigan"}, {"ar", "ميشيغان"}, {"az", "Miçiqan"}, {"be", "Мічыган"}, + {"bg", "Мичиган"}, {"br", "Michigan"}, {"en", "Michigan"}, {"eo", "Miĉigano"}, + {"es", "Míchigan"}, {"fa", "میشیگان"}, {"haw", "Mikikana"}, {"he", "מישיגן"}, + {"hy", "Միչիգան"}, {"ja", "ミシガン州"}, {"ko", "미시간"}, {"lt", "Mičiganas"}, + {"lv", "Mičigana"}, {"nv", "Míshigin"}, {"pl", "Michigan"}, {"ru", "Мичиган"}, + {"sr", "Мичиген"}, {"ta", "மிச்சிகன்"}, {"th", "รัฐมิชิแกน"}, {"tl", "Misigan"}, + {"uk", "Мічиган"}, {"yi", "מישיגן"}, {"zh", "密歇根州"}}; + + TEST(TestTransliteration(scotlandTranslations, "Shotlandiya"), ()); + TEST(TestTransliteration(michiganTranslations, "Michigan"), ()); +} diff --git a/generator/regions/region_base.cpp b/generator/regions/region_base.cpp index 797ff56ff7..447ef5c66b 100644 --- a/generator/regions/region_base.cpp +++ b/generator/regions/region_base.cpp @@ -4,11 +4,22 @@ #include "base/assert.hpp" #include "base/control_flow.hpp" +#include "base/string_utils.hpp" namespace generator { namespace regions { +namespace +{ +// Languages in order for better transliterations for Russian. This is kind +// of workaround before real made translations. +const std::vector kRuPreferredLanguagesForTransliterate = { + "en" /*English*/, + "ru" /*Русский*/, +}; +} // namespace + std::string RegionWithName::GetName(int8_t lang) const { std::string s; @@ -19,12 +30,16 @@ std::string RegionWithName::GetName(int8_t lang) const std::string RegionWithName::GetEnglishOrTransliteratedName() const { std::string s = GetName(StringUtf8Multilang::kEnglishCode); - if (!s.empty()) + if (!s.empty() && strings::IsASCIIString(s)) + return s; + + s = GetName(StringUtf8Multilang::kInternationalCode); + if (!s.empty() && strings::IsASCIIString(s)) return s; auto const fn = [&s](int8_t code, std::string const & name) { if (code != StringUtf8Multilang::kDefaultCode && - Transliteration::Instance().Transliterate(name, code, s)) + Transliteration::Instance().Transliterate(name, code, s) && strings::IsASCIIString(s)) { return base::ControlFlow::Break; } @@ -32,33 +47,20 @@ std::string RegionWithName::GetEnglishOrTransliteratedName() const return base::ControlFlow::Continue; }; - m_name.ForEach(fn); + if (!m_name.ForEachLanguage(kRuPreferredLanguagesForTransliterate, fn)) + m_name.ForEach(fn); + return s; } -StringUtf8Multilang const & RegionWithName::GetMultilangName() const -{ - return m_name; -} +StringUtf8Multilang const & RegionWithName::GetMultilangName() const { return m_name; } -void RegionWithName::SetMultilangName(StringUtf8Multilang const & name) -{ - m_name = name; -} +void RegionWithName::SetMultilangName(StringUtf8Multilang const & name) { m_name = name; } -base::GeoObjectId RegionWithData::GetId() const -{ - return m_regionData.GetOsmId(); -} +base::GeoObjectId RegionWithData::GetId() const { return m_regionData.GetOsmId(); } -bool RegionWithData::HasIsoCode() const -{ - return m_regionData.HasIsoCodeAlpha2(); -} +bool RegionWithData::HasIsoCode() const { return m_regionData.HasIsoCodeAlpha2(); } -std::string RegionWithData::GetIsoCode() const -{ - return m_regionData.GetIsoCodeAlpha2(); -} +std::string RegionWithData::GetIsoCode() const { return m_regionData.GetIsoCodeAlpha2(); } } // namespace regions } // namespace generator