From a4e34462452758e667cf3899fd664884e035e3c9 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Tue, 12 Dec 2017 12:20:42 +0300 Subject: [PATCH] [search] Translated names in results. --- search/CMakeLists.txt | 2 + search/engine.cpp | 26 ++--- search/engine.hpp | 3 + search/intermediate_result.cpp | 33 ++++--- search/intermediate_result.hpp | 17 ++-- search/processor.cpp | 4 +- search/processor.hpp | 1 + search/ranker.cpp | 43 ++++++-- search/ranker.hpp | 10 +- search/region_info_getter.cpp | 98 +++++++++++++++++++ search/region_info_getter.hpp | 26 +++++ search/search_tests/CMakeLists.txt | 1 + .../search_tests/region_info_getter_tests.cpp | 65 ++++++++++++ storage/country_tree.hpp | 2 + 14 files changed, 287 insertions(+), 44 deletions(-) create mode 100644 search/region_info_getter.cpp create mode 100644 search/region_info_getter.hpp create mode 100644 search/search_tests/region_info_getter_tests.cpp diff --git a/search/CMakeLists.txt b/search/CMakeLists.txt index 3832eb9be1..6b171db7f2 100644 --- a/search/CMakeLists.txt +++ b/search/CMakeLists.txt @@ -120,6 +120,8 @@ set( ranking_info.hpp ranking_utils.cpp ranking_utils.hpp + region_info_getter.cpp + region_info_getter.hpp result.cpp result.hpp retrieval.cpp diff --git a/search/engine.cpp b/search/engine.cpp index 322d1a0150..2f026ee7fc 100644 --- a/search/engine.cpp +++ b/search/engine.cpp @@ -112,6 +112,7 @@ Engine::Engine(Index & index, CategoriesHolder const & categories, m_threads.emplace_back(&Engine::MainLoop, this, ref(m_contexts[i])); LoadCitiesBoundaries(); + LoadCountriesTree(); } Engine::~Engine() @@ -138,42 +139,43 @@ weak_ptr Engine::Search(SearchParams const & params) void Engine::SetLocale(string const & locale) { - PostMessage(Message::TYPE_BROADCAST, [this, locale](Processor & processor) - { - processor.SetPreferredLocale(locale); - }); + PostMessage(Message::TYPE_BROADCAST, + [locale](Processor & processor) { processor.SetPreferredLocale(locale); }); } void Engine::ClearCaches() { - PostMessage(Message::TYPE_BROADCAST, [this](Processor & processor) - { - processor.ClearCaches(); - }); + PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.ClearCaches(); }); } void Engine::LoadCitiesBoundaries() { PostMessage(Message::TYPE_BROADCAST, - [this](Processor & processor) { processor.LoadCitiesBoundaries(); }); + [](Processor & processor) { processor.LoadCitiesBoundaries(); }); +} + +void Engine::LoadCountriesTree() +{ + PostMessage(Message::TYPE_BROADCAST, + [](Processor & processor) { processor.LoadCountriesTree(); }); } void Engine::OnBookmarksCreated(vector> const & marks) { PostMessage(Message::TYPE_BROADCAST, - [this, marks](Processor & processor) { processor.OnBookmarksCreated(marks); }); + [marks](Processor & processor) { processor.OnBookmarksCreated(marks); }); } void Engine::OnBookmarksUpdated(vector> const & marks) { PostMessage(Message::TYPE_BROADCAST, - [this, marks](Processor & processor) { processor.OnBookmarksUpdated(marks); }); + [marks](Processor & processor) { processor.OnBookmarksUpdated(marks); }); } void Engine::OnBookmarksDeleted(vector const & marks) { PostMessage(Message::TYPE_BROADCAST, - [this, marks](Processor & processor) { processor.OnBookmarksDeleted(marks); }); + [marks](Processor & processor) { processor.OnBookmarksDeleted(marks); }); } void Engine::MainLoop(Context & context) diff --git a/search/engine.hpp b/search/engine.hpp index b90859fe47..387f083d19 100644 --- a/search/engine.hpp +++ b/search/engine.hpp @@ -106,6 +106,9 @@ public: // Posts request to reload cities boundaries tables. void LoadCitiesBoundaries(); + // Posts request to load countries tree. + void LoadCountriesTree(); + void OnBookmarksCreated(std::vector> const & marks); void OnBookmarksUpdated(std::vector> const & marks); void OnBookmarksDeleted(std::vector const & marks); diff --git a/search/intermediate_result.cpp b/search/intermediate_result.cpp index dca4c26dad..10c5267fa8 100644 --- a/search/intermediate_result.cpp +++ b/search/intermediate_result.cpp @@ -112,16 +112,13 @@ RankerResult::RankerResult(double lat, double lon) m_region.SetParams(string(), MercatorBounds::FromLatLon(lat, lon)); } -string RankerResult::GetRegionName(storage::CountryInfoGetter const & infoGetter, - uint32_t ftype) const +bool RankerResult::GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype, + storage::TCountryId & countryId) const { static SkipRegionInfo const checker; if (checker.IsSkip(ftype)) - return string(); - - storage::CountryInfo info; - m_region.GetRegion(infoGetter, info); - return info.m_name; + return false; + return m_region.GetCountryId(infoGetter, countryId); } bool RankerResult::IsEqualCommon(RankerResult const & r) const @@ -153,13 +150,23 @@ uint32_t RankerResult::GetBestType(set const * pPrefferedTypes) const } // RankerResult::RegionInfo ------------------------------------------------------------------------ -void RankerResult::RegionInfo::GetRegion(storage::CountryInfoGetter const & infoGetter, - storage::CountryInfo & info) const +bool RankerResult::RegionInfo::GetCountryId(storage::CountryInfoGetter const & infoGetter, + storage::TCountryId & countryId) const { - if (!m_file.empty()) - infoGetter.GetRegionInfo(m_file, info); - else - infoGetter.GetRegionInfo(m_point, info); + if (!m_countryId.empty()) + { + countryId = m_countryId; + return true; + } + + auto const id = infoGetter.GetRegionCountryId(m_point); + if (id != storage::kInvalidCountryId) + { + countryId = id; + return true; + } + + return false; } // Functions --------------------------------------------------------------------------------------- diff --git a/search/intermediate_result.hpp b/search/intermediate_result.hpp index 989c1e91d8..15f0c97658 100644 --- a/search/intermediate_result.hpp +++ b/search/intermediate_result.hpp @@ -5,6 +5,8 @@ #include "search/ranking_utils.hpp" #include "search/result.hpp" +#include "storage/index.hpp" + #include "indexer/feature_data.hpp" #include "std/set.hpp" @@ -86,7 +88,8 @@ public: double GetDistanceToPivot() const { return m_info.m_distanceToPivot; } double GetLinearModelRank() const { return m_info.GetLinearModelRank(); } - string GetRegionName(storage::CountryInfoGetter const & infoGetter, uint32_t ftype) const; + bool GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype, + storage::TCountryId & countryId) const; bool IsEqualCommon(RankerResult const & r) const; @@ -97,17 +100,17 @@ private: struct RegionInfo { - string m_file; + storage::TCountryId m_countryId; m2::PointD m_point; - inline void SetParams(string const & file, m2::PointD const & pt) + void SetParams(storage::TCountryId const & countryId, m2::PointD const & point) { - m_file = file; - m_point = pt; + m_countryId = countryId; + m_point = point; } - void GetRegion(storage::CountryInfoGetter const & infoGetter, - storage::CountryInfo & info) const; + bool GetCountryId(storage::CountryInfoGetter const & infoGetter, + storage::TCountryId & countryId) const; }; RegionInfo m_region; diff --git a/search/processor.cpp b/search/processor.cpp index 6b9d3e75da..b8c65ef069 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -194,7 +194,7 @@ void Processor::SetPreferredLocale(string const & locale) // Default initialization. // If you want to reset input language, call SetInputLocale before search. SetInputLocale(locale); - m_ranker.SetLocalityLanguage(code); + m_ranker.SetLocale(locale); } void Processor::SetInputLocale(string const & locale) @@ -316,6 +316,8 @@ void Processor::LoadCitiesBoundaries() LOG(LWARNING, ("Can't load cities boundaries")); } +void Processor::LoadCountriesTree() { m_ranker.LoadCountriesTree(); } + void Processor::OnBookmarksCreated(vector> const & /* marks */) { // TODO(@y): do something useful with marks. diff --git a/search/processor.hpp b/search/processor.hpp index 4e3a77d0aa..ee8c99a3fa 100644 --- a/search/processor.hpp +++ b/search/processor.hpp @@ -93,6 +93,7 @@ public: void ClearCaches(); void LoadCitiesBoundaries(); + void LoadCountriesTree(); void OnBookmarksCreated(std::vector> const & marks); void OnBookmarksUpdated(std::vector> const & marks); diff --git a/search/ranker.cpp b/search/ranker.cpp index 65a41d9c99..3fba665e21 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -12,12 +12,15 @@ #include "indexer/feature_algo.hpp" #include "indexer/search_string_utils.hpp" +#include "platform/preferred_languages.hpp" + +#include "coding/multilang_utf8_string.hpp" + #include "base/logging.hpp" #include "base/string_utils.hpp" #include #include -#include #include @@ -42,7 +45,7 @@ void UpdateNameScores(string const & name, TSlice const & slice, NameScores & be template void UpdateNameScores(vector const & tokens, TSlice const & slice, - NameScores & bestScores) + NameScores & bestScores) { bestScores.m_nameScore = max(bestScores.m_nameScore, GetNameScore(tokens, slice)); bestScores.m_errorsMade = ErrorsMade::Min(bestScores.m_errorsMade, GetErrorsMade(tokens, slice)); @@ -368,6 +371,7 @@ Ranker::Ranker(Index const & index, CitiesBoundariesTable const & boundariesTabl , m_categories(categories) , m_suggests(suggests) { + SetLocale("default"); } void Ranker::Init(Params const & params, Geocoder::Params const & geocoderParams) @@ -387,7 +391,6 @@ void Ranker::Finish(bool cancelled) Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress, bool needHighlighting) const { - uint32_t const type = rankerResult.GetBestType(&m_params.m_preferredTypes); string name = rankerResult.GetName(); string address; @@ -403,7 +406,8 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress, name = FormatStreetAndHouse(addr); } - address = rankerResult.GetRegionName(m_infoGetter, type); + address = GetLocalizedRegionInfoForResult(rankerResult); + // Format full address only for suitable results. if (ftypes::IsAddressObjectChecker::Instance()(rankerResult.GetTypes())) address = FormatFullAddress(addressGetter.GetAddress(), address); @@ -415,11 +419,13 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress, { case RankerResult::Type::TYPE_FEATURE: case RankerResult::Type::TYPE_BUILDING: + { + auto const type = rankerResult.GetBestType(&m_params.m_preferredTypes); return Result(r.GetID(), r.GetCenter(), name, address, m_categories.GetReadableFeatureType(type, m_params.m_currentLocaleCode), type, r.GetMetadata()); - case RankerResult::Type::TYPE_LATLON: - return Result(r.GetCenter(), name, address); + } + case RankerResult::Type::TYPE_LATLON: return Result(r.GetCenter(), name, address); } ASSERT(false, ("Bad RankerResult type:", static_cast(r.GetResultType()))); }; @@ -431,7 +437,7 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress, { m_localities.GetLocality(res.GetFeatureCenter(), [&](LocalityItem const & item) { string city; - if (item.GetSpecifiedOrDefaultName(m_localityLang, city)) + if (item.GetSpecifiedOrDefaultName(m_localeCode, city)) res.AppendCity(city); }); } @@ -523,6 +529,14 @@ void Ranker::UpdateResults(bool lastUpdate) void Ranker::ClearCaches() { m_localities.ClearCache(); } +void Ranker::SetLocale(string const & locale) +{ + m_localeCode = StringUtf8Multilang::GetLangIndex(languages::Normalize(locale)); + m_regionInfoGetter.SetLocale(locale); +} + +void Ranker::LoadCountriesTree() { m_regionInfoGetter.LoadCountriesTree(); } + void Ranker::MakeRankerResults(Geocoder::Params const & geocoderParams, vector & results) { @@ -564,8 +578,8 @@ void Ranker::MatchForSuggestions(strings::UniString const & token, int8_t locale for (auto const & suggest : m_suggests) { strings::UniString const & s = suggest.m_name; - if (suggest.m_prefixLength <= token.size() - && token != s // do not push suggestion if it already equals to token + if (suggest.m_prefixLength <= token.size() && + token != s // do not push suggestion if it already equals to token && suggest.m_locale == locale // push suggestions only for needed language && strings::StartsWith(s, token)) { @@ -608,4 +622,15 @@ void Ranker::ProcessSuggestions(vector & vec) const ++i; } } + +string Ranker::GetLocalizedRegionInfoForResult(RankerResult const & result) const +{ + auto const type = result.GetBestType(&m_params.m_preferredTypes); + + storage::TCountryId id; + if (!result.GetCountryId(m_infoGetter, type, id)) + return {}; + + return m_regionInfoGetter.GetLocalizedFullName(id); +} } // namespace search diff --git a/search/ranker.hpp b/search/ranker.hpp index cb24c4c6da..a61c213239 100644 --- a/search/ranker.hpp +++ b/search/ranker.hpp @@ -6,6 +6,7 @@ #include "search/keyword_lang_matcher.hpp" #include "search/locality_finder.hpp" #include "search/mode.hpp" +#include "search/region_info_getter.hpp" #include "search/result.hpp" #include "search/reverse_geocoder.hpp" #include "search/search_params.hpp" @@ -107,7 +108,9 @@ public: void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); } - void SetLocalityLanguage(int8_t code) { m_localityLang = code; } + void SetLocale(std::string const & locale); + + void LoadCountriesTree(); private: friend class RankerResultMaker; @@ -119,6 +122,8 @@ private: std::string const & prolog); void ProcessSuggestions(std::vector & vec) const; + std::string GetLocalizedRegionInfoForResult(RankerResult const & result) const; + Params m_params; Geocoder::Params m_geocoderParams; ReverseGeocoder const m_reverseGeocoder; @@ -126,7 +131,8 @@ private: KeywordLangMatcher & m_keywordsScorer; mutable LocalityFinder m_localities; - int8_t m_localityLang = StringUtf8Multilang::kDefaultCode; + int8_t m_localeCode; + RegionInfoGetter m_regionInfoGetter; Index const & m_index; storage::CountryInfoGetter const & m_infoGetter; diff --git a/search/region_info_getter.cpp b/search/region_info_getter.cpp new file mode 100644 index 0000000000..f0c758532d --- /dev/null +++ b/search/region_info_getter.cpp @@ -0,0 +1,98 @@ +#include "search/region_info_getter.hpp" + +#include "storage/country_decl.hpp" + +#include "base/stl_helpers.hpp" +#include "base/string_utils.hpp" + +#include +#include + +using namespace std; +using namespace storage; + +namespace search +{ +namespace +{ +// Calls |fn| on each node name on the way from |id| to the root of +// the |countries| tree, except the root. Does nothing if there are +// multiple ways from |id| to the |root|. +template +void GetPathToRoot(storage::TCountryId const & id, storage::TCountryTree const & countries, + Fn && fn) +{ + vector nodes; + countries.Find(id, nodes); + + if (nodes.size() != 1 || nodes[0]->IsRoot()) + return; + + auto const * cur = nodes[0]; + do + { + fn(cur->Value().Name()); + cur = &cur->Parent(); + } while (!cur->IsRoot()); +} +} // namespace + +void RegionInfoGetter::LoadCountriesTree() +{ + storage::TMappingAffiliations affiliations; + storage::LoadCountriesFromFile(COUNTRIES_FILE, m_countries, affiliations); +} + +void RegionInfoGetter::SetLocale(string const & locale) +{ + m_nameGetter = platform::GetTextByIdFactory(platform::TextSource::Countries, locale); +} + +string RegionInfoGetter::GetLocalizedFullName(storage::TCountryId const & id) const +{ + size_t const kMaxNumParts = 2; + + vector parts; + GetPathToRoot(id, m_countries, [&](storage::TCountryId const & id) { + parts.push_back(GetLocalizedCountryName(id)); + }); + + if (parts.size() > kMaxNumParts) + parts.erase(parts.begin(), parts.end() - kMaxNumParts); + + my::EraseIf(parts, [&](string const & s) { return s.empty(); }); + + if (!parts.empty()) + return strings::JoinStrings(parts, ", "); + + // Tries to get at least localized name for |id|, if |id| is a + // discussed territory. + auto name = GetLocalizedCountryName(id); + if (!name.empty()) + return name; + + // Tries to transform map name to the full name. + name = id; + storage::CountryInfo::FileName2FullName(name); + if (!name.empty()) + return name; + + return {}; +} + +string RegionInfoGetter::GetLocalizedCountryName(storage::TCountryId const & id) const +{ + if (!m_nameGetter) + return {}; + + auto const shortName = (*m_nameGetter)(id + " Short"); + if (!shortName.empty()) + return shortName; + + auto const officialName = (*m_nameGetter)(id); + if (!officialName.empty()) + return officialName; + + return {}; +} +} // namespace search diff --git a/search/region_info_getter.hpp b/search/region_info_getter.hpp new file mode 100644 index 0000000000..19c9ad475b --- /dev/null +++ b/search/region_info_getter.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +#include "storage/country.hpp" +#include "storage/index.hpp" + +#include "platform/get_text_by_id.hpp" + +namespace search +{ +class RegionInfoGetter +{ +public: + void LoadCountriesTree(); + void SetLocale(std::string const & locale); + + std::string GetLocalizedFullName(storage::TCountryId const & id) const; + std::string GetLocalizedCountryName(storage::TCountryId const & id) const; + +private: + storage::TCountryTree m_countries; + std::unique_ptr m_nameGetter; +}; +} // namespace search diff --git a/search/search_tests/CMakeLists.txt b/search/search_tests/CMakeLists.txt index bf74dd5663..b5cb7eef37 100644 --- a/search/search_tests/CMakeLists.txt +++ b/search/search_tests/CMakeLists.txt @@ -20,6 +20,7 @@ set( point_rect_matcher_tests.cpp query_saver_tests.cpp ranking_tests.cpp + region_info_getter_tests.cpp segment_tree_tests.cpp string_match_test.cpp ) diff --git a/search/search_tests/region_info_getter_tests.cpp b/search/search_tests/region_info_getter_tests.cpp new file mode 100644 index 0000000000..35c9266c08 --- /dev/null +++ b/search/search_tests/region_info_getter_tests.cpp @@ -0,0 +1,65 @@ +#include "testing/testing.hpp" + +#include "search/region_info_getter.hpp" + +using namespace search; + +namespace +{ +class RegionInfoGetterTest +{ +public: + RegionInfoGetterTest() + { + m_regionInfoGetter.LoadCountriesTree(); + SetLocale("default"); + } + + void SetLocale(std::string const & locale) { m_regionInfoGetter.SetLocale(locale); } + + std::string GetLocalizedFullName(storage::TCountryId const & id) const + { + return m_regionInfoGetter.GetLocalizedFullName(id); + } + + std::string GetLocalizedCountryName(storage::TCountryId const & id) const + { + return m_regionInfoGetter.GetLocalizedCountryName(id); + } + +protected: + RegionInfoGetter m_regionInfoGetter; +}; + +UNIT_CLASS_TEST(RegionInfoGetterTest, CountryName) +{ + SetLocale("en"); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "Moscow Oblast", ()); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "Moscow", ()); + TEST_EQUAL(GetLocalizedCountryName("United States of America"), "USA", ()); + + SetLocale("ru"); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "Московская область", ()); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "Москва", ()); + TEST_EQUAL(GetLocalizedCountryName("United States of America"), "США", ()); + TEST_EQUAL(GetLocalizedCountryName("Crimea"), "Крым", ()); + + // En locale should be actually used. + SetLocale("broken locale"); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "Moscow Oblast", ()); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "Moscow", ()); + TEST_EQUAL(GetLocalizedCountryName("United States of America"), "USA", ()); + + SetLocale("zh-Hans"); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "莫斯科州", ()); + TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "莫斯科", ()); + TEST_EQUAL(GetLocalizedCountryName("United States of America"), "美国", ()); +} + +UNIT_CLASS_TEST(RegionInfoGetterTest, FullName) +{ + SetLocale("ru"); + TEST_EQUAL(GetLocalizedFullName("Russia_Moscow Oblast_East"), "Московская область, Россия", ()); + TEST_EQUAL(GetLocalizedFullName("Crimea"), "Крым", ()); +} +} // namespace diff --git a/storage/country_tree.hpp b/storage/country_tree.hpp index b3faa71191..dc72e6b84c 100644 --- a/storage/country_tree.hpp +++ b/storage/country_tree.hpp @@ -68,6 +68,8 @@ public: bool HasParent() const { return m_parent != nullptr; } + bool IsRoot() const { return !HasParent(); } + Node const & Parent() const { CHECK(HasParent(), ());