[search] Use affiliates field to find MWMs by state/country features.

This commit is contained in:
vng 2016-03-22 18:13:57 +03:00 committed by Sergey Yershov
parent edf20c6b6c
commit dd8d745554
10 changed files with 82 additions and 112 deletions

View file

@ -1152,8 +1152,9 @@ bool Framework::GetCurrentPosition(double & lat, double & lon) const
void Framework::InitCountryInfoGetter()
{
ASSERT(!m_infoGetter.get(), ("InitCountryInfoGetter() must be called only once."));
Platform const & platform = GetPlatform();
m_infoGetter = CountryInfoReader::CreateCountryInfoReader(platform);
m_infoGetter = CountryInfoReader::CreateCountryInfoReader(GetPlatform());
m_infoGetter->InitAffiliationsInfo(&m_storage.GetAffiliations());
}
void Framework::InitSearchEngine()

View file

@ -63,15 +63,12 @@ size_t constexpr kMaxNumCountries = 5;
// This constant limits number of localities that will be extracted
// from World map. Villages are not counted here as they're not
// included into World map.
size_t constexpr kMaxNumLocalities = kMaxNumCities + kMaxNumStates + kMaxNumCountries;
// @vng Set this value to possible maximum.
size_t const kMaxNumLocalities = LocalityScorer::kDefaultReadLimit;
size_t constexpr kPivotRectsCacheSize = 10;
size_t constexpr kLocalityRectsCacheSize = 10;
// list of countries we're supporting search by state. Elements of the
// list should be valid prefixes of corresponding mwms names.
string const kCountriesWithStates[] = {"US_", "Canada_"};
strings::UniString const kUniSpace(strings::MakeUniString(" "));
template <typename T>
@ -234,36 +231,10 @@ void JoinQueryTokens(SearchQueryParams const & params, size_t curToken, size_t e
}
}
bool HasAllSubstrings(string const & s, vector<string> const & substrs)
void GetAffiliationName(FeatureType const & ft, string & name)
{
for (auto const & substr : substrs)
{
if (s.find(substr) == string::npos)
return false;
}
return true;
}
void GetEnglishName(FeatureType const & ft, string & name)
{
static vector<string> const kUSA{"united", "states", "america"};
static vector<string> const kUK{"united", "kingdom"};
static vector<int8_t> const kLangs = {StringUtf8Multilang::GetLangIndex("en"),
StringUtf8Multilang::GetLangIndex("int_name"),
StringUtf8Multilang::GetLangIndex("default")};
for (auto const & lang : kLangs)
{
if (!ft.GetName(lang, name))
continue;
strings::AsciiToLower(name);
if (HasAllSubstrings(name, kUSA))
name = "us";
else if (HasAllSubstrings(name, kUK))
name = "uk";
else
return;
}
VERIFY(ft.GetName(StringUtf8Multilang::kDefaultCode, name), ());
ASSERT(!name.empty(), ());
}
// todo(@m) Refactor at least here, or even at indexer/ftypes_matcher.hpp.
@ -739,6 +710,26 @@ void Geocoder::FillLocalitiesTable()
FeatureType ft;
m_context->GetFeature(l.m_featureId, ft);
auto addMWMFn = [&](size_t & count, size_t maxCount, RegionType type)
{
if (count < maxCount && ft.GetFeatureType() == feature::GEOM_POINT)
{
Region region(l, type);
region.m_center = ft.GetCenter();
string name;
GetAffiliationName(ft, region.m_enName);
LOG(LDEBUG, ("Region =", region.m_enName));
m_infoGetter.GetMatchedRegions(region.m_enName, region.m_ids);
if (region.m_ids.empty())
LOG(LWARNING, ("Empty MWM IDs set for", region.m_enName));
++count;
m_regions[type][make_pair(l.m_startToken, l.m_endToken)].push_back(region);
}
};
switch (m_model.GetSearchType(ft))
{
case SearchModel::SEARCH_TYPE_CITY:
@ -765,49 +756,16 @@ void Geocoder::FillLocalitiesTable()
}
case SearchModel::SEARCH_TYPE_STATE:
{
if (numStates < kMaxNumStates && ft.GetFeatureType() == feature::GEOM_POINT)
{
Region state(l, REGION_TYPE_STATE);
state.m_center = ft.GetCenter();
string name;
GetEnglishName(ft, name);
for (auto const & prefix : kCountriesWithStates)
{
state.m_enName = prefix + name;
strings::AsciiToLower(state.m_enName);
state.m_ids.clear();
m_infoGetter.GetMatchedRegions(state.m_enName, state.m_ids);
if (!state.m_ids.empty())
{
LOG(LDEBUG, ("State =", state.m_enName));
++numStates;
m_regions[REGION_TYPE_STATE][make_pair(l.m_startToken, l.m_endToken)].push_back(state);
}
}
}
addMWMFn(numStates, kMaxNumStates, REGION_TYPE_STATE);
break;
}
case SearchModel::SEARCH_TYPE_COUNTRY:
{
if (numCountries < kMaxNumCountries && ft.GetFeatureType() == feature::GEOM_POINT)
{
Region country(l, REGION_TYPE_COUNTRY);
country.m_center = ft.GetCenter();
GetEnglishName(ft, country.m_enName);
LOG(LDEBUG, ("Country =", country.m_enName));
m_infoGetter.GetMatchedRegions(country.m_enName, country.m_ids);
++numCountries;
m_regions[REGION_TYPE_COUNTRY][{l.m_startToken, l.m_endToken}].push_back(country);
}
break;
default:
addMWMFn(numCountries, kMaxNumCountries, REGION_TYPE_COUNTRY);
break;
}
default:
break;
}
}
}

View file

@ -6,10 +6,10 @@ namespace search
{
namespace v2
{
size_t const LocalityScorer::kDefaultReadLimit = 100;
namespace
{
const size_t kDefaultReadLimit = 100;
bool IsAlmostFullMatch(NameScore score)
{
return score == NAME_SCORE_FULL_MATCH_PREFIX || score == NAME_SCORE_FULL_MATCH;

View file

@ -15,6 +15,8 @@ namespace v2
class LocalityScorer
{
public:
static size_t const kDefaultReadLimit;
class Delegate
{
public:

View file

@ -64,14 +64,10 @@ public:
void InsertAffiliation(TCountryId const & countryId, string const & affilation) override
{
auto const countryIdRange = m_affiliations.equal_range(countryId);
for (auto it = countryIdRange.first; it != countryIdRange.second; ++it)
{
if (it->second == affilation)
return; // No need key with the same value. It could happend in case of a disputable territory.
}
ASSERT(!affilation.empty(), ());
ASSERT(!countryId.empty(), ());
m_affiliations.insert(make_pair(countryId, affilation));
m_affiliations[affilation].push_back(countryId);
}
TMappingOldMwm GetMapping() const override { return m_idsMapping; }

View file

@ -24,7 +24,8 @@ class SizeUpdater;
namespace storage
{
using TMappingOldMwm = map<TCountryId, TCountriesSet>;
using TMappingAffiliations = unordered_multimap<TCountryId, string>;
/// Map from key affiliation words into MWM IDs (file names).
using TMappingAffiliations = unordered_map<string, vector<TCountryId>>;
/// This class keeps all the information about a country in country tree (TCountryTree).
/// It is guaranteed that every node represent a unique region has a unique |m_name| in country

View file

@ -13,6 +13,7 @@
#include "coding/read_write_utils.hpp"
#include "base/logging.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include "3party/Alohalytics/src/alohalytics.h"
@ -117,15 +118,25 @@ m2::RectD CountryInfoGetter::GetLimitRectForLeaf(TCountryId const & leafCountryI
return m_countries[it->second].m_rect;
}
void CountryInfoGetter::GetMatchedRegions(string const & enNamePrefix, IdSet & regions) const
namespace
{
bool IsInCountrySet(string const & id, vector<TCountryId> const & ids)
{
return binary_search(ids.begin(), ids.end(), id);
}
}
void CountryInfoGetter::GetMatchedRegions(string const & affiliation, IdSet & regions) const
{
auto it = m_affMap->find(affiliation);
if (it == m_affMap->end())
return;
for (size_t i = 0; i < m_countries.size(); ++i)
{
// Match english name with region file name (they are equal in almost all cases).
// @todo Do it smarter in future.
string s = m_countries[i].m_name;
strings::AsciiToLower(s);
if (strings::StartsWith(s, enNamePrefix.c_str()))
if (IsInCountrySet(m_countries[i].m_name, it->second))
regions.push_back(i);
}
}
@ -150,6 +161,13 @@ bool CountryInfoGetter::IsBelongToRegions(string const & fileName, IdSet const &
return false;
}
void CountryInfoGetter::InitAffiliationsInfo(TMappingAffiliations * affMap)
{
for (auto & v : *affMap)
my::SortUnique(v.second);
m_affMap = affMap;
}
CountryInfoGetter::IdType CountryInfoGetter::FindFirstCountry(m2::PointD const & pt) const
{
for (size_t id = 0; id < m_countries.size(); ++id)

View file

@ -1,5 +1,6 @@
#pragma once
#include "storage/country.hpp"
#include "storage/country_decl.hpp"
#include "platform/platform.hpp"
@ -61,8 +62,8 @@ public:
// and zero rect otherwise.
m2::RectD GetLimitRectForLeaf(TCountryId const & leafCountryId) const;
// Returns identifiers for all regions matching to |enNamePrefix|.
void GetMatchedRegions(string const & enNamePrefix, IdSet & regions) const;
// Returns identifiers for all regions matching to correspondent |affiliation|.
void GetMatchedRegions(string const & affiliation, IdSet & regions) const;
// Returns true when |pt| belongs to at least one of the specified
// |regions|.
@ -75,6 +76,8 @@ public:
// Clears regions cache.
inline void ClearCaches() const { ClearCachesImpl(); }
void InitAffiliationsInfo(TMappingAffiliations * affMap);
protected:
CountryInfoGetter() = default;
@ -101,6 +104,8 @@ protected:
// Maps all leaf country id (file names) to their indices in m_countries.
unordered_map<TCountryId, IdType> m_countryIndex;
TMappingAffiliations const * m_affMap = nullptr;
// Maps country file name without an extension to a country info.
map<string, CountryInfo> m_id2info;

View file

@ -358,7 +358,7 @@ public:
/// \return true if updateInfo is filled correctly and false otherwise.
bool GetUpdateInfo(TCountryId const & countryId, UpdateInfo & updateInfo) const;
TMappingAffiliations const & GetAffiliations() const { return m_affiliations; }
TMappingAffiliations & GetAffiliations() { return m_affiliations; }
/// \brief Calls |toDo| for each node for subtree with |root|.
/// For example ForEachInSubtree(GetRootId()) calls |toDo| for every node including

View file

@ -1007,38 +1007,27 @@ UNIT_TEST(StorageTest_GetChildren)
TEST_EQUAL(algeriaList.front(), "Algeria_Central", ());
}
/*
UNIT_TEST(StorageTest_GetAffiliations)
{
Storage storage(kSingleMwmCountriesTxt, make_unique<TestMapFilesDownloader>());
string const abkhaziaId = "Abkhazia";
TMappingAffiliations const expectedAffiliations1 = {{abkhaziaId.c_str(), "Georgia"},
{abkhaziaId.c_str(), "Russia"},
{abkhaziaId.c_str(), "Europe"}};
auto const rangeResultAffiliations1 = storage.GetAffiliations().equal_range(abkhaziaId);
TMappingAffiliations const resultAffiliations1(rangeResultAffiliations1.first,
rangeResultAffiliations1.second);
TEST(expectedAffiliations1 == resultAffiliations1, ());
vector<string> const expectedAffiliations1 = {"Georgia", "Russia", "Europe"};
TEST_EQUAL(expectedAffiliations1, storage.GetAffiliations().at(abkhaziaId), ());
auto const rangeResultNoAffiliations = storage.GetAffiliations().equal_range("Algeria");
TEST(rangeResultNoAffiliations.first == rangeResultNoAffiliations.second, ());
TEST(storage.GetAffiliations().at("Algeria").empty(), ());
// Affiliation inheritance.
string const disputableId = "Disputable Territory";
auto const rangeResultAffiliations2 = storage.GetAffiliations().equal_range(disputableId);
TMappingAffiliations const resultAffiliations2(rangeResultAffiliations2.first,
rangeResultAffiliations2.second);
TMappingAffiliations const expectedAffiliations2 = {{disputableId.c_str(), "Stepchild Land1"},
{disputableId.c_str(), "Stepchild Land2"}};
TEST(expectedAffiliations2 == resultAffiliations2, ());
vector<string> const expectedAffiliations2 = {"Stepchild Land1", "Stepchild Land2"};
TEST_EQUAL(expectedAffiliations2, storage.GetAffiliations().at(disputableId), ());
string const indisputableId = "Indisputable Territory Of Country1";
auto const rangeResultAffiliations3 = storage.GetAffiliations().equal_range(indisputableId);
TMappingAffiliations const resultAffiliations3(rangeResultAffiliations3.first,
rangeResultAffiliations3.second);
TMappingAffiliations const expectedAffiliations3 = {{indisputableId.c_str(), "Child Land1"}};
TEST(expectedAffiliations3 == resultAffiliations3, ());
vector<string> const expectedAffiliations3 = {"Child Land1"};
TEST_EQUAL(expectedAffiliations3, storage.GetAffiliations().at(indisputableId), ());
}
*/
UNIT_TEST(StorageTest_HasCountryId)
{