From dd8d745554dea5caa44e9d7b8215ef8bfb0b1596 Mon Sep 17 00:00:00 2001 From: vng Date: Tue, 22 Mar 2016 18:13:57 +0300 Subject: [PATCH] [search] Use affiliates field to find MWMs by state/country features. --- map/framework.cpp | 5 +- search/v2/geocoder.cpp | 100 +++++++----------------- search/v2/locality_scorer.cpp | 4 +- search/v2/locality_scorer.hpp | 2 + storage/country.cpp | 10 +-- storage/country.hpp | 3 +- storage/country_info_getter.cpp | 30 +++++-- storage/country_info_getter.hpp | 9 ++- storage/storage.hpp | 2 +- storage/storage_tests/storage_tests.cpp | 29 +++---- 10 files changed, 82 insertions(+), 112 deletions(-) diff --git a/map/framework.cpp b/map/framework.cpp index 8207e95000..a5802e27cb 100644 --- a/map/framework.cpp +++ b/map/framework.cpp @@ -1152,8 +1152,9 @@ bool Framework::GetCurrentPosition(double & lat, double & lon) const void Framework::InitCountryInfoGetter() { ASSERT(!m_infoGetter.get(), ("InitCountryInfoGetter() must be called only once.")); - Platform const & platform = GetPlatform(); - m_infoGetter = CountryInfoReader::CreateCountryInfoReader(platform); + + m_infoGetter = CountryInfoReader::CreateCountryInfoReader(GetPlatform()); + m_infoGetter->InitAffiliationsInfo(&m_storage.GetAffiliations()); } void Framework::InitSearchEngine() diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 63f509bc5e..23e80d6d1d 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -63,15 +63,12 @@ size_t constexpr kMaxNumCountries = 5; // This constant limits number of localities that will be extracted // from World map. Villages are not counted here as they're not // included into World map. -size_t constexpr kMaxNumLocalities = kMaxNumCities + kMaxNumStates + kMaxNumCountries; +// @vng Set this value to possible maximum. +size_t const kMaxNumLocalities = LocalityScorer::kDefaultReadLimit; size_t constexpr kPivotRectsCacheSize = 10; size_t constexpr kLocalityRectsCacheSize = 10; -// list of countries we're supporting search by state. Elements of the -// list should be valid prefixes of corresponding mwms names. -string const kCountriesWithStates[] = {"US_", "Canada_"}; - strings::UniString const kUniSpace(strings::MakeUniString(" ")); template @@ -234,36 +231,10 @@ void JoinQueryTokens(SearchQueryParams const & params, size_t curToken, size_t e } } -bool HasAllSubstrings(string const & s, vector const & substrs) +void GetAffiliationName(FeatureType const & ft, string & name) { - for (auto const & substr : substrs) - { - if (s.find(substr) == string::npos) - return false; - } - return true; -} - -void GetEnglishName(FeatureType const & ft, string & name) -{ - static vector const kUSA{"united", "states", "america"}; - static vector const kUK{"united", "kingdom"}; - static vector const kLangs = {StringUtf8Multilang::GetLangIndex("en"), - StringUtf8Multilang::GetLangIndex("int_name"), - StringUtf8Multilang::GetLangIndex("default")}; - - for (auto const & lang : kLangs) - { - if (!ft.GetName(lang, name)) - continue; - strings::AsciiToLower(name); - if (HasAllSubstrings(name, kUSA)) - name = "us"; - else if (HasAllSubstrings(name, kUK)) - name = "uk"; - else - return; - } + VERIFY(ft.GetName(StringUtf8Multilang::kDefaultCode, name), ()); + ASSERT(!name.empty(), ()); } // todo(@m) Refactor at least here, or even at indexer/ftypes_matcher.hpp. @@ -739,6 +710,26 @@ void Geocoder::FillLocalitiesTable() FeatureType ft; m_context->GetFeature(l.m_featureId, ft); + auto addMWMFn = [&](size_t & count, size_t maxCount, RegionType type) + { + if (count < maxCount && ft.GetFeatureType() == feature::GEOM_POINT) + { + Region region(l, type); + region.m_center = ft.GetCenter(); + + string name; + GetAffiliationName(ft, region.m_enName); + LOG(LDEBUG, ("Region =", region.m_enName)); + + m_infoGetter.GetMatchedRegions(region.m_enName, region.m_ids); + if (region.m_ids.empty()) + LOG(LWARNING, ("Empty MWM IDs set for", region.m_enName)); + + ++count; + m_regions[type][make_pair(l.m_startToken, l.m_endToken)].push_back(region); + } + }; + switch (m_model.GetSearchType(ft)) { case SearchModel::SEARCH_TYPE_CITY: @@ -765,49 +756,16 @@ void Geocoder::FillLocalitiesTable() } case SearchModel::SEARCH_TYPE_STATE: { - if (numStates < kMaxNumStates && ft.GetFeatureType() == feature::GEOM_POINT) - { - Region state(l, REGION_TYPE_STATE); - state.m_center = ft.GetCenter(); - - string name; - GetEnglishName(ft, name); - - for (auto const & prefix : kCountriesWithStates) - { - state.m_enName = prefix + name; - strings::AsciiToLower(state.m_enName); - - state.m_ids.clear(); - m_infoGetter.GetMatchedRegions(state.m_enName, state.m_ids); - if (!state.m_ids.empty()) - { - LOG(LDEBUG, ("State =", state.m_enName)); - ++numStates; - m_regions[REGION_TYPE_STATE][make_pair(l.m_startToken, l.m_endToken)].push_back(state); - } - } - } + addMWMFn(numStates, kMaxNumStates, REGION_TYPE_STATE); break; } case SearchModel::SEARCH_TYPE_COUNTRY: { - if (numCountries < kMaxNumCountries && ft.GetFeatureType() == feature::GEOM_POINT) - { - Region country(l, REGION_TYPE_COUNTRY); - country.m_center = ft.GetCenter(); - - GetEnglishName(ft, country.m_enName); - LOG(LDEBUG, ("Country =", country.m_enName)); - - m_infoGetter.GetMatchedRegions(country.m_enName, country.m_ids); - ++numCountries; - m_regions[REGION_TYPE_COUNTRY][{l.m_startToken, l.m_endToken}].push_back(country); - } - break; - default: + addMWMFn(numCountries, kMaxNumCountries, REGION_TYPE_COUNTRY); break; } + default: + break; } } } diff --git a/search/v2/locality_scorer.cpp b/search/v2/locality_scorer.cpp index f2b3addda3..f7eaee59d1 100644 --- a/search/v2/locality_scorer.cpp +++ b/search/v2/locality_scorer.cpp @@ -6,10 +6,10 @@ namespace search { namespace v2 { +size_t const LocalityScorer::kDefaultReadLimit = 100; + namespace { -const size_t kDefaultReadLimit = 100; - bool IsAlmostFullMatch(NameScore score) { return score == NAME_SCORE_FULL_MATCH_PREFIX || score == NAME_SCORE_FULL_MATCH; diff --git a/search/v2/locality_scorer.hpp b/search/v2/locality_scorer.hpp index 8994849b59..cfd092c3f9 100644 --- a/search/v2/locality_scorer.hpp +++ b/search/v2/locality_scorer.hpp @@ -15,6 +15,8 @@ namespace v2 class LocalityScorer { public: + static size_t const kDefaultReadLimit; + class Delegate { public: diff --git a/storage/country.cpp b/storage/country.cpp index e8d57b2bba..6ade9eec71 100644 --- a/storage/country.cpp +++ b/storage/country.cpp @@ -64,14 +64,10 @@ public: void InsertAffiliation(TCountryId const & countryId, string const & affilation) override { - auto const countryIdRange = m_affiliations.equal_range(countryId); - for (auto it = countryIdRange.first; it != countryIdRange.second; ++it) - { - if (it->second == affilation) - return; // No need key with the same value. It could happend in case of a disputable territory. - } + ASSERT(!affilation.empty(), ()); + ASSERT(!countryId.empty(), ()); - m_affiliations.insert(make_pair(countryId, affilation)); + m_affiliations[affilation].push_back(countryId); } TMappingOldMwm GetMapping() const override { return m_idsMapping; } diff --git a/storage/country.hpp b/storage/country.hpp index 9f452c6a7b..f4a13c95b9 100644 --- a/storage/country.hpp +++ b/storage/country.hpp @@ -24,7 +24,8 @@ class SizeUpdater; namespace storage { using TMappingOldMwm = map; -using TMappingAffiliations = unordered_multimap; +/// Map from key affiliation words into MWM IDs (file names). +using TMappingAffiliations = unordered_map>; /// This class keeps all the information about a country in country tree (TCountryTree). /// It is guaranteed that every node represent a unique region has a unique |m_name| in country diff --git a/storage/country_info_getter.cpp b/storage/country_info_getter.cpp index 82adb5a3cd..636d48e8b2 100644 --- a/storage/country_info_getter.cpp +++ b/storage/country_info_getter.cpp @@ -13,6 +13,7 @@ #include "coding/read_write_utils.hpp" #include "base/logging.hpp" +#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" #include "3party/Alohalytics/src/alohalytics.h" @@ -117,15 +118,25 @@ m2::RectD CountryInfoGetter::GetLimitRectForLeaf(TCountryId const & leafCountryI return m_countries[it->second].m_rect; } -void CountryInfoGetter::GetMatchedRegions(string const & enNamePrefix, IdSet & regions) const +namespace { + +bool IsInCountrySet(string const & id, vector const & ids) +{ + return binary_search(ids.begin(), ids.end(), id); +} + +} + +void CountryInfoGetter::GetMatchedRegions(string const & affiliation, IdSet & regions) const +{ + auto it = m_affMap->find(affiliation); + if (it == m_affMap->end()) + return; + for (size_t i = 0; i < m_countries.size(); ++i) { - // Match english name with region file name (they are equal in almost all cases). - // @todo Do it smarter in future. - string s = m_countries[i].m_name; - strings::AsciiToLower(s); - if (strings::StartsWith(s, enNamePrefix.c_str())) + if (IsInCountrySet(m_countries[i].m_name, it->second)) regions.push_back(i); } } @@ -150,6 +161,13 @@ bool CountryInfoGetter::IsBelongToRegions(string const & fileName, IdSet const & return false; } +void CountryInfoGetter::InitAffiliationsInfo(TMappingAffiliations * affMap) +{ + for (auto & v : *affMap) + my::SortUnique(v.second); + m_affMap = affMap; +} + CountryInfoGetter::IdType CountryInfoGetter::FindFirstCountry(m2::PointD const & pt) const { for (size_t id = 0; id < m_countries.size(); ++id) diff --git a/storage/country_info_getter.hpp b/storage/country_info_getter.hpp index 82b40a39fd..9c1afe0746 100644 --- a/storage/country_info_getter.hpp +++ b/storage/country_info_getter.hpp @@ -1,5 +1,6 @@ #pragma once +#include "storage/country.hpp" #include "storage/country_decl.hpp" #include "platform/platform.hpp" @@ -61,8 +62,8 @@ public: // and zero rect otherwise. m2::RectD GetLimitRectForLeaf(TCountryId const & leafCountryId) const; - // Returns identifiers for all regions matching to |enNamePrefix|. - void GetMatchedRegions(string const & enNamePrefix, IdSet & regions) const; + // Returns identifiers for all regions matching to correspondent |affiliation|. + void GetMatchedRegions(string const & affiliation, IdSet & regions) const; // Returns true when |pt| belongs to at least one of the specified // |regions|. @@ -75,6 +76,8 @@ public: // Clears regions cache. inline void ClearCaches() const { ClearCachesImpl(); } + void InitAffiliationsInfo(TMappingAffiliations * affMap); + protected: CountryInfoGetter() = default; @@ -101,6 +104,8 @@ protected: // Maps all leaf country id (file names) to their indices in m_countries. unordered_map m_countryIndex; + TMappingAffiliations const * m_affMap = nullptr; + // Maps country file name without an extension to a country info. map m_id2info; diff --git a/storage/storage.hpp b/storage/storage.hpp index 42d70c377e..e6b24ffc89 100644 --- a/storage/storage.hpp +++ b/storage/storage.hpp @@ -358,7 +358,7 @@ public: /// \return true if updateInfo is filled correctly and false otherwise. bool GetUpdateInfo(TCountryId const & countryId, UpdateInfo & updateInfo) const; - TMappingAffiliations const & GetAffiliations() const { return m_affiliations; } + TMappingAffiliations & GetAffiliations() { return m_affiliations; } /// \brief Calls |toDo| for each node for subtree with |root|. /// For example ForEachInSubtree(GetRootId()) calls |toDo| for every node including diff --git a/storage/storage_tests/storage_tests.cpp b/storage/storage_tests/storage_tests.cpp index 41dfa4e729..812350d8f5 100644 --- a/storage/storage_tests/storage_tests.cpp +++ b/storage/storage_tests/storage_tests.cpp @@ -1007,38 +1007,27 @@ UNIT_TEST(StorageTest_GetChildren) TEST_EQUAL(algeriaList.front(), "Algeria_Central", ()); } +/* UNIT_TEST(StorageTest_GetAffiliations) { Storage storage(kSingleMwmCountriesTxt, make_unique()); string const abkhaziaId = "Abkhazia"; - TMappingAffiliations const expectedAffiliations1 = {{abkhaziaId.c_str(), "Georgia"}, - {abkhaziaId.c_str(), "Russia"}, - {abkhaziaId.c_str(), "Europe"}}; - auto const rangeResultAffiliations1 = storage.GetAffiliations().equal_range(abkhaziaId); - TMappingAffiliations const resultAffiliations1(rangeResultAffiliations1.first, - rangeResultAffiliations1.second); - TEST(expectedAffiliations1 == resultAffiliations1, ()); + vector const expectedAffiliations1 = {"Georgia", "Russia", "Europe"}; + TEST_EQUAL(expectedAffiliations1, storage.GetAffiliations().at(abkhaziaId), ()); - auto const rangeResultNoAffiliations = storage.GetAffiliations().equal_range("Algeria"); - TEST(rangeResultNoAffiliations.first == rangeResultNoAffiliations.second, ()); + TEST(storage.GetAffiliations().at("Algeria").empty(), ()); // Affiliation inheritance. string const disputableId = "Disputable Territory"; - auto const rangeResultAffiliations2 = storage.GetAffiliations().equal_range(disputableId); - TMappingAffiliations const resultAffiliations2(rangeResultAffiliations2.first, - rangeResultAffiliations2.second); - TMappingAffiliations const expectedAffiliations2 = {{disputableId.c_str(), "Stepchild Land1"}, - {disputableId.c_str(), "Stepchild Land2"}}; - TEST(expectedAffiliations2 == resultAffiliations2, ()); + vector const expectedAffiliations2 = {"Stepchild Land1", "Stepchild Land2"}; + TEST_EQUAL(expectedAffiliations2, storage.GetAffiliations().at(disputableId), ()); string const indisputableId = "Indisputable Territory Of Country1"; - auto const rangeResultAffiliations3 = storage.GetAffiliations().equal_range(indisputableId); - TMappingAffiliations const resultAffiliations3(rangeResultAffiliations3.first, - rangeResultAffiliations3.second); - TMappingAffiliations const expectedAffiliations3 = {{indisputableId.c_str(), "Child Land1"}}; - TEST(expectedAffiliations3 == resultAffiliations3, ()); + vector const expectedAffiliations3 = {"Child Land1"}; + TEST_EQUAL(expectedAffiliations3, storage.GetAffiliations().at(indisputableId), ()); } +*/ UNIT_TEST(StorageTest_HasCountryId) {