forked from organicmaps/organicmaps
[generator] Use reasonable order for transliteration of region names
This commit is contained in:
parent
9b149eef29
commit
529bdebbe7
5 changed files with 192 additions and 35 deletions
|
@ -147,3 +147,50 @@ UNIT_TEST(MultilangString_HasString)
|
|||
TEST(!s.HasString(1), ());
|
||||
TEST(!s.HasString(32), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(MultilangString_ForEachLanguage)
|
||||
{
|
||||
using Translations = vector<pair<string, string>>;
|
||||
StringUtf8Multilang s;
|
||||
Translations const scotlandTranslations = {
|
||||
{"be", "Шатландыя"}, {"cs", "Skotsko"}, {"cy", "Yr Alban"}, {"da", "Skotland"},
|
||||
{"de", "Schottland"}, {"eo", "Skotlando"}, {"es", "Escocia"}, {"eu", "Eskozia"},
|
||||
{"fi", "Skotlanti"}, {"fr", "Écosse"}, {"ga", "Albain"}, {"gd", "Alba"},
|
||||
{"hr", "Škotska"}, {"ia", "Scotia"}, {"io", "Skotia"}, {"ja", "スコットランド"},
|
||||
{"ku", "Skotland"}, {"lfn", "Scotland"}, {"nl", "Schotland"}, {"pl", "Szkocja"},
|
||||
{"ru", "Шотландия"}, {"sco", "Scotland"}, {"sk", "Škótsko"}, {"sr", "Шкотска"},
|
||||
{"sv", "Skottland"}, {"tok", "Sukosi"}, {"tzl", "Escot"}, {"uk", "Шотландія"},
|
||||
{"vo", "Skotän"}, {"zh", "苏格兰"}};
|
||||
|
||||
Translations const usedTranslations = {
|
||||
{"be", "Шатландыя"}, {"cs", "Skotsko"}, {"eu", "Eskozia"}, {"zh", "苏格兰"}};
|
||||
|
||||
for (auto const & langAndTranslation : scotlandTranslations)
|
||||
{
|
||||
s.AddString(langAndTranslation.first, langAndTranslation.second);
|
||||
}
|
||||
|
||||
set<string> testAccumulator;
|
||||
vector<string> const preferredLanguages = {"cs", "eu", "be", "zh"};
|
||||
vector<string> const preferredTranslations = {"Skotsko", "Eskozia", "Шатландыя", "苏格兰"};
|
||||
|
||||
auto const fn = [&testAccumulator, &usedTranslations](int8_t code, string const & name) {
|
||||
testAccumulator.insert(name);
|
||||
if (usedTranslations.size() > testAccumulator.size())
|
||||
return base::ControlFlow::Continue;
|
||||
return base::ControlFlow::Break;
|
||||
};
|
||||
|
||||
TEST(s.ForEachLanguage(preferredLanguages, fn), ());
|
||||
TEST_EQUAL(testAccumulator.size(), preferredTranslations.size(), ());
|
||||
|
||||
for (string const & translation : preferredTranslations)
|
||||
{
|
||||
TEST(testAccumulator.find(translation) != testAccumulator.end(), ());
|
||||
}
|
||||
|
||||
testAccumulator.clear();
|
||||
vector<string> const corruptedLanguages = {"Матерный", "Детский", "BirdLanguage"};
|
||||
TEST(!s.ForEachLanguage(corruptedLanguages, fn), ());
|
||||
TEST_EQUAL(testAccumulator.size(), 0, ());
|
||||
}
|
||||
|
|
|
@ -229,6 +229,30 @@ bool StringUtf8Multilang::GetString(int8_t lang, string & utf8s) const
|
|||
return false;
|
||||
}
|
||||
|
||||
StringUtf8Multilang::TranslationPositions StringUtf8Multilang::GenerateTranslationPositions() const
|
||||
{
|
||||
TranslationPositions result;
|
||||
size_t i = 0;
|
||||
size_t const sz = m_s.size();
|
||||
while (i < sz)
|
||||
{
|
||||
size_t const next = GetNextIndex(i);
|
||||
int8_t const code = m_s[i] & 0x3F;
|
||||
if (GetLangByCode(code) != kReservedLang)
|
||||
result[code] = Position{i + 1, next - i - 1};
|
||||
|
||||
i = next;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string StringUtf8Multilang::GetTranslation(
|
||||
StringUtf8Multilang::Position const & position) const
|
||||
{
|
||||
return m_s.substr(position.m_begin, position.m_length);
|
||||
}
|
||||
|
||||
bool StringUtf8Multilang::HasString(int8_t lang) const
|
||||
{
|
||||
if (!IsSupportedLangCode(lang))
|
||||
|
|
|
@ -73,6 +73,14 @@ public:
|
|||
char const * m_transliteratorId;
|
||||
};
|
||||
|
||||
struct Position
|
||||
{
|
||||
size_t m_begin = 0;
|
||||
size_t m_length = 0;
|
||||
};
|
||||
|
||||
using TranslationPositions = std::map<int8_t, Position>;
|
||||
|
||||
static int8_t constexpr kUnsupportedLanguageCode = -1;
|
||||
static int8_t constexpr kDefaultCode = 0;
|
||||
static int8_t constexpr kEnglishCode = 1;
|
||||
|
@ -129,6 +137,33 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
/// Used for ordered languages, if you want to do something with priority of that order.
|
||||
/// \param languages ordered languages names.
|
||||
/// \param fn function or functor, using base::ControlFlow as return value.
|
||||
/// \return true if ForEachLanguage was stopped by base::ControlFlow::Break, false otherwise.
|
||||
template <typename Fn>
|
||||
bool ForEachLanguage(std::vector<std::string> const & languages, Fn && fn) const
|
||||
{
|
||||
auto const & translationPositions = GenerateTranslationPositions();
|
||||
|
||||
base::ControlFlowWrapper<Fn> wrapper(std::forward<Fn>(fn));
|
||||
for (std::string const & language : languages)
|
||||
{
|
||||
int8_t const languageCode = GetLangIndex(language);
|
||||
if (GetLangByCode(languageCode) != kReservedLang)
|
||||
{
|
||||
auto const & translationPositionsIt = translationPositions.find(languageCode);
|
||||
if (translationPositionsIt != translationPositions.end() &&
|
||||
wrapper(languageCode, GetTranslation(translationPositionsIt->second)) ==
|
||||
base::ControlFlow::Break)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
bool GetString(int8_t lang, std::string & utf8s) const;
|
||||
bool GetString(std::string const & lang, std::string & utf8s) const
|
||||
{
|
||||
|
@ -157,6 +192,9 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
TranslationPositions GenerateTranslationPositions() const;
|
||||
std::string GetTranslation(Position const & position) const;
|
||||
|
||||
size_t GetNextIndex(size_t i) const;
|
||||
|
||||
std::string m_s;
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "coding/transliteration.hpp"
|
||||
|
||||
#include "base/file_name_utils.hpp"
|
||||
#include "base/macros.hpp"
|
||||
|
||||
|
@ -18,8 +20,8 @@
|
|||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace generator_tests;
|
||||
using namespace generator::regions;
|
||||
|
@ -33,7 +35,7 @@ using Tags = std::vector<std::pair<std::string, std::string>>;
|
|||
FeatureBuilder const kEmptyFeature;
|
||||
|
||||
OsmElement CreateOsmRelation(uint64_t id, std::string const & adminLevel,
|
||||
std::string const & place = "")
|
||||
std::string const & place = "")
|
||||
{
|
||||
OsmElement el;
|
||||
el.m_id = id;
|
||||
|
@ -67,7 +69,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector)
|
|||
FeatureBuilder fb;
|
||||
fb.AddName("default", "Country_1");
|
||||
fb.SetOsmId(MakeOsmRelation(1 /* id */));
|
||||
vector<m2::PointD> poly = {{2, 8}, {3, 12}, {8, 15}, {13, 12}, {15, 7}, {11, 2}, {4, 4}, {2, 8}};
|
||||
vector<m2::PointD> poly = {{2, 8}, {3, 12}, {8, 15}, {13, 12},
|
||||
{15, 7}, {11, 2}, {4, 4}, {2, 8}};
|
||||
fb.AddPolygon(poly);
|
||||
fb.SetHoles({{{5, 8}, {7, 10}, {10, 10}, {11, 7}, {10, 4}, {7, 5}, {5, 8}}});
|
||||
fb.SetArea();
|
||||
|
@ -108,8 +111,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector)
|
|||
FeatureBuilder fb;
|
||||
fb.AddName("default", "Country_1_Region_4");
|
||||
fb.SetOsmId(MakeOsmRelation(4 /* id */));
|
||||
vector<m2::PointD> poly = {{7, 10}, {9, 12}, {8, 15}, {13, 12}, {15, 7}, {12, 9},
|
||||
{11, 7}, {10, 10}, {7, 10}};
|
||||
vector<m2::PointD> poly = {{7, 10}, {9, 12}, {8, 15}, {13, 12}, {15, 7},
|
||||
{12, 9}, {11, 7}, {10, 10}, {7, 10}};
|
||||
fb.AddPolygon(poly);
|
||||
fb.SetArea();
|
||||
regions.emplace_back(Region(fb, collector.Get(MakeOsmRelation(4 /* id */))));
|
||||
|
@ -119,8 +122,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector)
|
|||
FeatureBuilder fb;
|
||||
fb.AddName("default", "Country_1_Region_5");
|
||||
fb.SetOsmId(MakeOsmRelation(5 /* id */));
|
||||
vector<m2::PointD> poly = {{4, 4}, {2, 8}, {3, 12}, {8, 15}, {9, 12}, {7, 10}, {5, 8},
|
||||
{7, 5}, {4, 4}};
|
||||
vector<m2::PointD> poly = {{4, 4}, {2, 8}, {3, 12}, {8, 15}, {9, 12},
|
||||
{7, 10}, {5, 8}, {7, 5}, {4, 4}};
|
||||
fb.AddPolygon(poly);
|
||||
fb.SetArea();
|
||||
regions.emplace_back(Region(fb, collector.Get(MakeOsmRelation(5 /* id */))));
|
||||
|
@ -140,7 +143,8 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector)
|
|||
FeatureBuilder fb;
|
||||
fb.AddName("default", "Country_1_Region_5_Subregion_7");
|
||||
fb.SetOsmId(MakeOsmRelation(7 /* id */));
|
||||
vector<m2::PointD> poly = {{3, 12}, {8, 15}, {9, 12}, {7, 10}, {5, 8}, {5, 10}, {4, 10}, {3, 12}};
|
||||
vector<m2::PointD> poly = {{3, 12}, {8, 15}, {9, 12}, {7, 10},
|
||||
{5, 8}, {5, 10}, {4, 10}, {3, 12}};
|
||||
fb.AddPolygon(poly);
|
||||
fb.SetArea();
|
||||
regions.emplace_back(Region(fb, collector.Get(MakeOsmRelation(7 /* id */))));
|
||||
|
@ -176,8 +180,7 @@ bool NameExists(std::vector<std::string> const & coll, std::string const & name)
|
|||
{
|
||||
auto const end = std::end(coll);
|
||||
return std::find(std::begin(coll), end, name) != end;
|
||||
}
|
||||
;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(RegionsBuilderTest_GetCountryNames)
|
||||
|
@ -199,9 +202,11 @@ UNIT_TEST(RegionsBuilderTest_GetCountries)
|
|||
auto const & countries = builder.GetCountriesOuters();
|
||||
TEST_EQUAL(countries.size(), 3, ());
|
||||
TEST_EQUAL(std::count_if(std::begin(countries), std::end(countries),
|
||||
[](const Region & r) {return r.GetName() == "Country_1"; }), 1, ());
|
||||
[](const Region & r) { return r.GetName() == "Country_1"; }),
|
||||
1, ());
|
||||
TEST_EQUAL(std::count_if(std::begin(countries), std::end(countries),
|
||||
[](const Region & r) {return r.GetName() == "Country_2"; }), 2, ());
|
||||
[](const Region & r) { return r.GetName() == "Country_2"; }),
|
||||
2, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(RegionsBuilderTest_GetCountryTrees)
|
||||
|
@ -230,3 +235,44 @@ UNIT_TEST(RegionsBuilderTest_GetCountryTrees)
|
|||
TEST(NameExists(bankOfNames, "Country_1Country_1_Region_5Country_1_Region_5_Subregion_6"), ());
|
||||
TEST(NameExists(bankOfNames, "Country_1Country_1_Region_5Country_1_Region_5_Subregion_7"), ());
|
||||
}
|
||||
|
||||
using Translations = std::vector<std::pair<std::string, std::string>>;
|
||||
bool TestTransliteration(Translations const & translations,
|
||||
std::string const & expectedTransliteration)
|
||||
{
|
||||
StringUtf8Multilang regionName;
|
||||
for (auto const & langAndTranslation : translations)
|
||||
{
|
||||
regionName.AddString(langAndTranslation.first, langAndTranslation.second);
|
||||
}
|
||||
RegionWithName region(regionName);
|
||||
return region.GetEnglishOrTransliteratedName() == expectedTransliteration;
|
||||
}
|
||||
|
||||
UNIT_TEST(RegionTransliteration)
|
||||
{
|
||||
Transliteration & translit = Transliteration::Instance();
|
||||
translit.Init(GetPlatform().ResourcesDir());
|
||||
|
||||
Translations const scotlandTranslations = {
|
||||
{"default", "Scotland"}, {"be", "Шатландыя"}, {"cs", "Skotsko"}, {"cy", "Yr Alban"},
|
||||
{"da", "Skotland"}, {"de", "Schottland"}, {"eo", "Skotlando"}, {"es", "Escocia"},
|
||||
{"eu", "Eskozia"}, {"fi", "Skotlanti"}, {"fr", "Écosse"}, {"ga", "Albain"},
|
||||
{"gd", "Alba"}, {"hr", "Škotska"}, {"ia", "Scotia"}, {"io", "Skotia"},
|
||||
{"ja", "スコットランド"}, {"ku", "Skotland"}, {"lfn", "Scotland"}, {"nl", "Schotland"},
|
||||
{"pl", "Szkocja"}, {"ru", "Шотландия"}, {"sco", "Scotland"}, {"sk", "Škótsko"},
|
||||
{"sr", "Шкотска"}, {"sv", "Skottland"}, {"tok", "Sukosi"}, {"tzl", "Escot"},
|
||||
{"uk", "Шотландія"}, {"vo", "Skotän"}, {"zh", "苏格兰"}};
|
||||
|
||||
Translations const michiganTranslations = {
|
||||
{"default", "Michigan"}, {"ar", "ميشيغان"}, {"az", "Miçiqan"}, {"be", "Мічыган"},
|
||||
{"bg", "Мичиган"}, {"br", "Michigan"}, {"en", "Michigan"}, {"eo", "Miĉigano"},
|
||||
{"es", "Míchigan"}, {"fa", "میشیگان"}, {"haw", "Mikikana"}, {"he", "מישיגן"},
|
||||
{"hy", "Միչիգան"}, {"ja", "ミシガン州"}, {"ko", "미시간"}, {"lt", "Mičiganas"},
|
||||
{"lv", "Mičigana"}, {"nv", "Míshigin"}, {"pl", "Michigan"}, {"ru", "Мичиган"},
|
||||
{"sr", "Мичиген"}, {"ta", "மிச்சிகன்"}, {"th", "รัฐมิชิแกน"}, {"tl", "Misigan"},
|
||||
{"uk", "Мічиган"}, {"yi", "מישיגן"}, {"zh", "密歇根州"}};
|
||||
|
||||
TEST(TestTransliteration(scotlandTranslations, "Shotlandiya"), ());
|
||||
TEST(TestTransliteration(michiganTranslations, "Michigan"), ());
|
||||
}
|
||||
|
|
|
@ -4,11 +4,22 @@
|
|||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/control_flow.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace regions
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// Languages in order for better transliterations for Russian. This is kind
|
||||
// of workaround before real made translations.
|
||||
const std::vector<std::string> kRuPreferredLanguagesForTransliterate = {
|
||||
"en" /*English*/,
|
||||
"ru" /*Русский*/,
|
||||
};
|
||||
} // namespace
|
||||
|
||||
std::string RegionWithName::GetName(int8_t lang) const
|
||||
{
|
||||
std::string s;
|
||||
|
@ -19,12 +30,16 @@ std::string RegionWithName::GetName(int8_t lang) const
|
|||
std::string RegionWithName::GetEnglishOrTransliteratedName() const
|
||||
{
|
||||
std::string s = GetName(StringUtf8Multilang::kEnglishCode);
|
||||
if (!s.empty())
|
||||
if (!s.empty() && strings::IsASCIIString(s))
|
||||
return s;
|
||||
|
||||
s = GetName(StringUtf8Multilang::kInternationalCode);
|
||||
if (!s.empty() && strings::IsASCIIString(s))
|
||||
return s;
|
||||
|
||||
auto const fn = [&s](int8_t code, std::string const & name) {
|
||||
if (code != StringUtf8Multilang::kDefaultCode &&
|
||||
Transliteration::Instance().Transliterate(name, code, s))
|
||||
Transliteration::Instance().Transliterate(name, code, s) && strings::IsASCIIString(s))
|
||||
{
|
||||
return base::ControlFlow::Break;
|
||||
}
|
||||
|
@ -32,33 +47,20 @@ std::string RegionWithName::GetEnglishOrTransliteratedName() const
|
|||
return base::ControlFlow::Continue;
|
||||
};
|
||||
|
||||
m_name.ForEach(fn);
|
||||
if (!m_name.ForEachLanguage(kRuPreferredLanguagesForTransliterate, fn))
|
||||
m_name.ForEach(fn);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
StringUtf8Multilang const & RegionWithName::GetMultilangName() const
|
||||
{
|
||||
return m_name;
|
||||
}
|
||||
StringUtf8Multilang const & RegionWithName::GetMultilangName() const { return m_name; }
|
||||
|
||||
void RegionWithName::SetMultilangName(StringUtf8Multilang const & name)
|
||||
{
|
||||
m_name = name;
|
||||
}
|
||||
void RegionWithName::SetMultilangName(StringUtf8Multilang const & name) { m_name = name; }
|
||||
|
||||
base::GeoObjectId RegionWithData::GetId() const
|
||||
{
|
||||
return m_regionData.GetOsmId();
|
||||
}
|
||||
base::GeoObjectId RegionWithData::GetId() const { return m_regionData.GetOsmId(); }
|
||||
|
||||
bool RegionWithData::HasIsoCode() const
|
||||
{
|
||||
return m_regionData.HasIsoCodeAlpha2();
|
||||
}
|
||||
bool RegionWithData::HasIsoCode() const { return m_regionData.HasIsoCodeAlpha2(); }
|
||||
|
||||
std::string RegionWithData::GetIsoCode() const
|
||||
{
|
||||
return m_regionData.GetIsoCodeAlpha2();
|
||||
}
|
||||
std::string RegionWithData::GetIsoCode() const { return m_regionData.GetIsoCodeAlpha2(); }
|
||||
} // namespace regions
|
||||
} // namespace generator
|
||||
|
|
Loading…
Add table
Reference in a new issue