[search] All token categories are exposed to PreRankingInfo.

This commit is contained in:
Yuri Gorshenin 2017-02-07 17:29:22 +03:00
parent 803e85ea59
commit 6fec36a795
18 changed files with 234 additions and 419 deletions

View file

@ -27,16 +27,18 @@ set(
feature_offset_match.hpp
features_filter.cpp
features_filter.hpp
features_layer.cpp
features_layer.hpp
features_layer_matcher.cpp
features_layer_matcher.hpp
features_layer_path_finder.cpp
features_layer_path_finder.hpp
features_layer.cpp
features_layer.hpp
geocoder_context.cpp
geocoder_context.hpp
geocoder.cpp
geocoder.hpp
geocoder_context.cpp
geocoder_context.hpp
geocoder_locality.cpp
geocoder_locality.hpp
geometry_cache.cpp
geometry_cache.hpp
geometry_utils.cpp
@ -68,8 +70,6 @@ set(
locality_finder.hpp
locality_scorer.cpp
locality_scorer.hpp
locality.cpp
locality.hpp
mode.cpp
mode.hpp
model.cpp
@ -84,9 +84,9 @@ set(
pre_ranker.hpp
pre_ranking_info.cpp
pre_ranking_info.hpp
processor_factory.hpp
processor.cpp
processor.hpp
processor_factory.hpp
projection_on_street.cpp
projection_on_street.hpp
query_params.cpp
@ -101,8 +101,6 @@ set(
ranking_info.hpp
ranking_utils.cpp
ranking_utils.hpp
region.cpp
region.hpp
result.cpp
result.hpp
retrieval.cpp

View file

@ -349,7 +349,6 @@ Geocoder::Geocoder(Index const & index, storage::CountryInfoGetter const & infoG
, m_filter(nullptr)
, m_matcher(nullptr)
, m_finder(m_cancellable)
, m_lastMatchedRegion(nullptr)
, m_preRanker(preRanker)
{
}
@ -529,8 +528,7 @@ void Geocoder::GoImpl(vector<shared_ptr<MwmInfo>> & infos, bool inViewport)
});
m_lastMatchedRegion = nullptr;
MatchRegions(ctx, REGION_TYPE_COUNTRY);
MatchRegions(ctx, Region::TYPE_COUNTRY);
if (index < numIntersectingMaps || m_preRanker.NumSentResults() == 0)
MatchAroundPivot(ctx);
@ -614,7 +612,7 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx)
if (!m_context->GetFeature(l.m_featureId, ft))
continue;
auto addRegionMaps = [&](size_t & count, size_t maxCount, RegionType type)
auto addRegionMaps = [&](size_t & count, size_t maxCount, Region::Type type)
{
if (count < maxCount && ft.GetFeatureType() == feature::GEOM_POINT)
{
@ -666,12 +664,12 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx)
}
case SearchModel::SEARCH_TYPE_STATE:
{
addRegionMaps(numStates, kMaxNumStates, REGION_TYPE_STATE);
addRegionMaps(numStates, kMaxNumStates, Region::TYPE_STATE);
break;
}
case SearchModel::SEARCH_TYPE_COUNTRY:
{
addRegionMaps(numCountries, kMaxNumCountries, REGION_TYPE_COUNTRY);
addRegionMaps(numCountries, kMaxNumCountries, Region::TYPE_COUNTRY);
break;
}
default: break;
@ -736,21 +734,21 @@ void Geocoder::ForEachCountry(vector<shared_ptr<MwmInfo>> const & infos, TFn &&
}
}
void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
void Geocoder::MatchRegions(BaseContext & ctx, Region::Type type)
{
switch (type)
{
case REGION_TYPE_STATE:
case Region::TYPE_STATE:
// Tries to skip state matching and go to cities matching.
// Then, performs states matching.
MatchCities(ctx);
break;
case REGION_TYPE_COUNTRY:
case Region::TYPE_COUNTRY:
// Tries to skip country matching and go to states matching.
// Then, performs countries matching.
MatchRegions(ctx, REGION_TYPE_STATE);
MatchRegions(ctx, Region::TYPE_STATE);
break;
case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); return;
case Region::TYPE_COUNT: ASSERT(false, ("Invalid region type.")); return;
}
auto const & regions = m_regions[type];
@ -777,8 +775,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
// mwm that is currently being processed belongs to region.
if (isWorld)
{
matches = m_lastMatchedRegion == nullptr ||
m_infoGetter.IsBelongToRegions(region.m_center, m_lastMatchedRegion->m_ids);
matches = ctx.m_regions.empty() ||
m_infoGetter.IsBelongToRegions(region.m_center, ctx.m_regions.back()->m_ids);
}
else
{
@ -788,6 +786,9 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
if (!matches)
continue;
ctx.m_regions.push_back(&region);
MY_SCOPE_GUARD(cleanup, [&ctx]() { ctx.m_regions.pop_back(); });
ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange);
if (ctx.AllTokensUsed())
{
@ -796,16 +797,11 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
continue;
}
m_lastMatchedRegion = &region;
MY_SCOPE_GUARD(cleanup, [this]()
{
m_lastMatchedRegion = nullptr;
});
switch (type)
{
case REGION_TYPE_STATE: MatchCities(ctx); break;
case REGION_TYPE_COUNTRY: MatchRegions(ctx, REGION_TYPE_STATE); break;
case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); break;
case Region::TYPE_STATE: MatchCities(ctx); break;
case Region::TYPE_COUNTRY: MatchRegions(ctx, Region::TYPE_STATE); break;
case Region::TYPE_COUNT: ASSERT(false, ("Invalid region type.")); break;
}
}
}
@ -813,6 +809,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
void Geocoder::MatchCities(BaseContext & ctx)
{
ASSERT(!ctx.m_city, ());
// Localities are ordered my (m_startToken, m_endToken) pairs.
for (auto const & p : m_cities)
{
@ -824,13 +822,16 @@ void Geocoder::MatchCities(BaseContext & ctx)
{
BailIfCancelled();
if (m_lastMatchedRegion &&
!m_infoGetter.IsBelongToRegions(city.m_rect.Center(), m_lastMatchedRegion->m_ids))
if (!ctx.m_regions.empty() &&
!m_infoGetter.IsBelongToRegions(city.m_rect.Center(), ctx.m_regions.back()->m_ids))
{
continue;
}
ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange);
ctx.m_city = &city;
MY_SCOPE_GUARD(cleanup, [&ctx]() { ctx.m_city = nullptr; });
if (ctx.AllTokensUsed())
{
// City matches to search query, we need to emit it as is.
@ -934,12 +935,13 @@ void Geocoder::GreedilyMatchStreets(BaseContext & ctx)
void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx,
StreetsMatcher::Prediction const & prediction)
{
ASSERT(m_layers.empty(), ());
auto & layers = ctx.m_layers;
ASSERT(layers.empty(), ());
m_layers.emplace_back();
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &m_layers));
layers.emplace_back();
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &layers));
auto & layer = m_layers.back();
auto & layer = layers.back();
InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_tokenRange, layer);
vector<uint32_t> sortedFeatures;
@ -955,6 +957,8 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
{
BailIfCancelled();
auto & layers = ctx.m_layers;
curToken = ctx.SkipUsedTokens(curToken);
if (curToken == ctx.m_numTokens)
{
@ -965,7 +969,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
// When there are no layers but user entered a postcode, we have
// to emit all features matching to the postcode.
if (m_layers.size() == 0)
if (layers.size() == 0)
{
CBV filtered = m_postcodes.m_features;
if (m_filter->NeedToFilter(m_postcodes.m_features))
@ -978,7 +982,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
return;
}
if (!(m_layers.size() == 1 && m_layers[0].m_type == SearchModel::SEARCH_TYPE_STREET))
if (!(layers.size() == 1 && layers[0].m_type == SearchModel::SEARCH_TYPE_STREET))
return FindPaths(ctx);
// If there're only one street layer but user also entered a
@ -989,21 +993,21 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
// GreedilyMatchStreets() doesn't (and shouldn't) perform
// postcodes matching.
{
for (auto const & id : *m_layers.back().m_sortedFeatures)
for (auto const & id : *layers.back().m_sortedFeatures)
{
if (!m_postcodes.m_features.HasBit(id))
continue;
EmitResult(ctx, m_context->GetId(), id, SearchModel::SEARCH_TYPE_STREET,
m_layers.back().m_tokenRange);
layers.back().m_tokenRange);
}
}
// Following code creates a fake layer with buildings and
// intersects it with the streets layer.
m_layers.emplace_back();
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &m_layers));
layers.emplace_back();
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &layers));
auto & layer = m_layers.back();
auto & layer = layers.back();
InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_tokenRange, layer);
vector<uint32_t> features;
@ -1012,8 +1016,8 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
return FindPaths(ctx);
}
m_layers.emplace_back();
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &m_layers));
layers.emplace_back();
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &layers));
// Clusters of features by search type. Each cluster is a sorted
// list of ids.
@ -1051,7 +1055,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
BailIfCancelled();
{
auto & layer = m_layers.back();
auto & layer = layers.back();
InitLayer(layer.m_type, TokenRange(curToken, curToken + n), layer);
}
@ -1062,7 +1066,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
filtered = m_filter->Filter(features);
bool const looksLikeHouseNumber = house_numbers::LooksLikeHouseNumber(
m_layers.back().m_subQuery, m_layers.back().m_lastTokenIsPrefix);
layers.back().m_subQuery, layers.back().m_lastTokenIsPrefix);
if (filtered.IsEmpty() && !looksLikeHouseNumber)
break;
@ -1107,7 +1111,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
{
// ATTENTION: DO NOT USE layer after recursive calls to
// MatchPOIsAndBuildings(). This may lead to use-after-free.
auto & layer = m_layers.back();
auto & layer = layers.back();
layer.m_sortedFeatures = &clusters[i];
if (i == SearchModel::SEARCH_TYPE_BUILDING)
@ -1121,26 +1125,26 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
}
layer.m_type = static_cast<SearchModel::SearchType>(i);
if (IsLayerSequenceSane())
if (IsLayerSequenceSane(layers))
MatchPOIsAndBuildings(ctx, curToken + n);
}
}
}
bool Geocoder::IsLayerSequenceSane() const
bool Geocoder::IsLayerSequenceSane(vector<FeaturesLayer> const & layers) const
{
ASSERT(!m_layers.empty(), ());
ASSERT(!layers.empty(), ());
static_assert(SearchModel::SEARCH_TYPE_COUNT <= 32,
"Select a wider type to represent search types mask.");
uint32_t mask = 0;
size_t buildingIndex = m_layers.size();
size_t streetIndex = m_layers.size();
size_t buildingIndex = layers.size();
size_t streetIndex = layers.size();
// Following loop returns false iff there're two different layers
// of the same search type.
for (size_t i = 0; i < m_layers.size(); ++i)
for (size_t i = 0; i < layers.size(); ++i)
{
auto const & layer = m_layers[i];
auto const & layer = layers[i];
ASSERT_NOT_EQUAL(layer.m_type, SearchModel::SEARCH_TYPE_COUNT, ());
// TODO (@y): probably it's worth to check belongs-to-locality here.
@ -1155,14 +1159,14 @@ bool Geocoder::IsLayerSequenceSane() const
streetIndex = i;
}
bool const hasBuildings = buildingIndex != m_layers.size();
bool const hasStreets = streetIndex != m_layers.size();
bool const hasBuildings = buildingIndex != layers.size();
bool const hasStreets = streetIndex != layers.size();
// Checks that building and street layers are neighbours.
if (hasBuildings && hasStreets)
{
auto const & buildings = m_layers[buildingIndex];
auto const & streets = m_layers[streetIndex];
auto const & buildings = layers[buildingIndex];
auto const & streets = layers[streetIndex];
if (!buildings.m_tokenRange.AdjacentTo(streets.m_tokenRange))
return false;
}
@ -1172,13 +1176,15 @@ bool Geocoder::IsLayerSequenceSane() const
void Geocoder::FindPaths(BaseContext const & ctx)
{
if (m_layers.empty())
auto const & layers = ctx.m_layers;
if (layers.empty())
return;
// Layers ordered by search type.
vector<FeaturesLayer const *> sortedLayers;
sortedLayers.reserve(m_layers.size());
for (auto & layer : m_layers)
sortedLayers.reserve(layers.size());
for (auto & layer : layers)
sortedLayers.push_back(&layer);
sort(sortedLayers.begin(), sortedLayers.end(), my::LessBy(&FeaturesLayer::m_type));
@ -1212,19 +1218,27 @@ void Geocoder::EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId,
// TODO (@y, @m): need to skip zero rank features that are too
// distant from the pivot when there're enough results close to the
// pivot.
m_preRanker.Emplace(id, PreRankingInfo(type, tokenRange));
PreRankingInfo info(type, tokenRange);
for (auto const & layer : ctx.m_layers)
info.m_tokenRange[layer.m_type] = layer.m_tokenRange;
for (auto const * region : ctx.m_regions)
{
auto const regionType = Region::ToSearchType(region->m_type);
ASSERT(regionType != SearchModel::SEARCH_TYPE_COUNT, ());
info.m_tokenRange[regionType] = region->m_tokenRange;
}
if (ctx.m_city)
info.m_tokenRange[SearchModel::SEARCH_TYPE_CITY] = ctx.m_city->m_tokenRange;
m_preRanker.Emplace(id, info);
}
void Geocoder::EmitResult(BaseContext const & ctx, Region const & region,
TokenRange const & tokenRange)
{
SearchModel::SearchType type;
switch (region.m_type)
{
case REGION_TYPE_STATE: type = SearchModel::SEARCH_TYPE_STATE; break;
case REGION_TYPE_COUNTRY: type = SearchModel::SEARCH_TYPE_COUNTRY; break;
case REGION_TYPE_COUNT: type = SearchModel::SEARCH_TYPE_COUNT; break;
}
auto const type = Region::ToSearchType(region.m_type);
EmitResult(ctx, region.m_countryId, region.m_featureId, type, tokenRange);
}
@ -1235,7 +1249,7 @@ void Geocoder::EmitResult(BaseContext const & ctx, City const & city, TokenRange
void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken)
{
ASSERT(m_layers.empty(), ());
ASSERT(ctx.m_layers.empty(), ());
// We need to match all unused tokens to UNCLASSIFIED features,
// therefore unused tokens must be adjacent to each other. For
@ -1310,16 +1324,4 @@ bool Geocoder::GetSearchTypeInGeocoding(BaseContext const & ctx, uint32_t featur
return false;
}
string DebugPrint(Geocoder::Locality const & locality)
{
ostringstream os;
os << "Locality [ ";
os << "m_countryId=" << DebugPrint(locality.m_countryId) << ", ";
os << "m_featureId=" << locality.m_featureId << ", ";
os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", ";
os << "m_prob=" << locality.m_prob;
os << " ]";
return os.str();
}
} // namespace search

View file

@ -7,6 +7,7 @@
#include "search/features_layer.hpp"
#include "search/features_layer_path_finder.hpp"
#include "search/geocoder_context.hpp"
#include "search/geocoder_locality.hpp"
#include "search/geometry_cache.hpp"
#include "search/hotels_filter.hpp"
#include "search/mode.hpp"
@ -86,64 +87,6 @@ public:
shared_ptr<hotels_filter::Rule> m_hotelsFilter;
};
enum RegionType
{
REGION_TYPE_STATE,
REGION_TYPE_COUNTRY,
REGION_TYPE_COUNT
};
struct Locality
{
Locality() = default;
Locality(MwmSet::MwmId const & countryId, uint32_t featureId, TokenRange const & tokenRange,
double prob)
: m_countryId(countryId)
, m_featureId(featureId)
, m_tokenRange(tokenRange)
, m_prob(prob)
{
}
MwmSet::MwmId m_countryId;
uint32_t m_featureId = 0;
TokenRange m_tokenRange;
// Measures our belief in the fact that tokens in the range
// [m_startToken, m_endToken) indeed specify a locality. Currently
// it is set only for villages.
double m_prob = 0.0;
};
// This struct represents a country or US- or Canadian- state. It
// is used to filter maps before search.
struct Region : public Locality
{
Region(Locality const & l, RegionType type) : Locality(l), m_center(0, 0), m_type(type) {}
storage::CountryInfoGetter::TRegionIdSet m_ids;
string m_defaultName;
m2::PointD m_center;
RegionType m_type;
};
// This struct represents a city or a village. It is used to filter features
// during search.
// todo(@m) It works well as is, but consider a new naming scheme
// when counties etc. are added. E.g., Region for countries and
// states and Locality for smaller settlements.
struct City : public Locality
{
City(Locality const & l, SearchModel::SearchType type) : Locality(l), m_type(type) {}
m2::RectD m_rect;
SearchModel::SearchType m_type;
#if defined(DEBUG)
string m_defaultName;
#endif
};
Geocoder(Index const & index, storage::CountryInfoGetter const & infoGetter,
PreRanker & preRanker, VillagesCache & villagesCache,
my::Cancellable const & cancellable);
@ -210,7 +153,7 @@ private:
// Tries to find all countries and states in a search query and then
// performs matching of cities in found maps.
void MatchRegions(BaseContext & ctx, RegionType type);
void MatchRegions(BaseContext & ctx, Region::Type type);
// Tries to find all cities in a search query and then performs
// matching of streets in found cities.
@ -245,7 +188,7 @@ private:
// Returns true if current path in the search tree (see comment for
// MatchPOIsAndBuildings()) looks sane. This method is used as a fast
// pre-check to cut off unnecessary work.
bool IsLayerSequenceSane() const;
bool IsLayerSequenceSane(vector<FeaturesLayer> const & layers) const;
// Finds all paths through layers and emits reachable features from
// the lowest layer.
@ -302,7 +245,7 @@ private:
// m_cities stores both big cities that are visible at World.mwm
// and small villages and hamlets that are not.
LocalitiesCache<City> m_cities;
LocalitiesCache<Region> m_regions[REGION_TYPE_COUNT];
LocalitiesCache<Region> m_regions[Region::TYPE_COUNT];
// Caches of features in rects. These caches are separated from
// TLocalitiesCache because the latter are quite lightweight and not
@ -327,14 +270,6 @@ private:
vector<SearchTrieRequest<strings::LevenshteinDFA>> m_tokenRequests;
SearchTrieRequest<strings::PrefixDFAModifier<strings::LevenshteinDFA>> m_prefixTokenRequest;
// Pointer to the most nested region filled during geocoding.
Region const * m_lastMatchedRegion;
// Stack of layers filled during geocoding.
vector<FeaturesLayer> m_layers;
PreRanker & m_preRanker;
};
string DebugPrint(Geocoder::Locality const & locality);
} // namespace search

View file

@ -5,7 +5,7 @@
#include "base/assert.hpp"
#include "base/stl_add.hpp"
#include "std/algorithm.hpp"
#include <algorithm>
namespace search
{
@ -18,14 +18,14 @@ size_t BaseContext::SkipUsedTokens(size_t curToken) const
bool BaseContext::AllTokensUsed() const
{
return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
return std::all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
}
bool BaseContext::HasUsedTokensInRange(TokenRange const & range) const
{
ASSERT(range.IsValid(), (range));
return any_of(m_usedTokens.begin() + range.Begin(), m_usedTokens.begin() + range.End(),
IdFunctor());
return std::any_of(m_usedTokens.begin() + range.Begin(), m_usedTokens.begin() + range.End(),
IdFunctor());
}
size_t BaseContext::NumUnusedTokenGroups() const

View file

@ -1,10 +1,13 @@
#pragma once
#include "search/cbv.hpp"
#include "search/features_layer.hpp"
#include "search/geocoder_locality.hpp"
#include "search/hotels_filter.hpp"
#include "std/unique_ptr.hpp"
#include "std/vector.hpp"
#include <cstddef>
#include <memory>
#include <vector>
namespace search
{
@ -28,17 +31,25 @@ struct BaseContext
// List of bit-vectors of features, where i-th element of the list
// corresponds to the i-th token in the search query.
vector<CBV> m_features;
std::vector<CBV> m_features;
CBV m_villages;
CBV m_streets;
// Stack of layers filled during geocoding.
std::vector<FeaturesLayer> m_layers;
// Stack of regions filled during geocoding.
std::vector<Region const *> m_regions;
City const * m_city = nullptr;
// This vector is used to indicate what tokens were already matched
// and can't be re-used during the geocoding process.
vector<bool> m_usedTokens;
std::vector<bool> m_usedTokens;
// Number of tokens in the query.
size_t m_numTokens = 0;
unique_ptr<hotels_filter::HotelsFilter::ScopedFilter> m_hotelsFilter;
std::unique_ptr<hotels_filter::HotelsFilter::ScopedFilter> m_hotelsFilter;
};
} // namespace search

View file

@ -0,0 +1,29 @@
#include "search/geocoder_locality.hpp"
#include <sstream>
namespace search
{
// static
SearchModel::SearchType Region::ToSearchType(Type type)
{
switch (type)
{
case Region::TYPE_STATE: return SearchModel::SEARCH_TYPE_STATE;
case Region::TYPE_COUNTRY: return SearchModel::SEARCH_TYPE_COUNTRY;
case Region::TYPE_COUNT: return SearchModel::SEARCH_TYPE_COUNT;
}
}
std::string DebugPrint(Locality const & locality)
{
std::ostringstream os;
os << "Locality [ ";
os << "m_countryId=" << DebugPrint(locality.m_countryId) << ", ";
os << "m_featureId=" << locality.m_featureId << ", ";
os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", ";
os << "m_prob=" << locality.m_prob;
os << " ]";
return os.str();
}
} // namespace search

View file

@ -0,0 +1,79 @@
#pragma once
#include "search/model.hpp"
#include "search/token_range.hpp"
#include "indexer/mwm_set.hpp"
#include "storage/country_info_getter.hpp"
#include "geometry/rect2d.hpp"
#include <cstdint>
#include <string>
namespace search
{
struct Locality
{
Locality() = default;
Locality(MwmSet::MwmId const & countryId, uint32_t featureId, TokenRange const & tokenRange,
double prob)
: m_countryId(countryId), m_featureId(featureId), m_tokenRange(tokenRange), m_prob(prob)
{
}
MwmSet::MwmId m_countryId;
uint32_t m_featureId = 0;
TokenRange m_tokenRange;
// Measures our belief in the fact that tokens in the range
// [m_startToken, m_endToken) indeed specify a locality. Currently
// it is set only for villages.
double m_prob = 0.0;
};
// This struct represents a country or US- or Canadian- state. It
// is used to filter maps before search.
struct Region : public Locality
{
enum Type
{
TYPE_STATE,
TYPE_COUNTRY,
TYPE_COUNT
};
Region(Locality const & locality, Type type) : Locality(locality), m_center(0, 0), m_type(type) {}
static SearchModel::SearchType ToSearchType(Type type);
storage::CountryInfoGetter::TRegionIdSet m_ids;
std::string m_defaultName;
m2::PointD m_center;
Type m_type;
};
// This struct represents a city or a village. It is used to filter features
// during search.
// todo(@m) It works well as is, but consider a new naming scheme
// when counties etc. are added. E.g., Region for countries and
// states and Locality for smaller settlements.
struct City : public Locality
{
City(Locality const & locality, SearchModel::SearchType type) : Locality(locality), m_type(type)
{
}
m2::RectD m_rect;
SearchModel::SearchType m_type;
#if defined(DEBUG)
std::string m_defaultName;
#endif
};
std::string DebugPrint(Locality const & locality);
} // namespace search

View file

@ -1,124 +0,0 @@
#include "locality.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/assert.hpp"
#include "std/algorithm.hpp"
#include "std/limits.hpp"
namespace search
{
Locality::Locality()
: m_type(ftypes::NONE)
, m_featureId(numeric_limits<decltype(m_featureId)>::max())
, m_rank(numeric_limits<decltype(m_rank)>::max())
, m_radius(0)
{
}
Locality::Locality(ftypes::Type type, uint32_t featureId, m2::PointD const & center, uint8_t rank)
: m_type(type), m_featureId(featureId), m_center(center), m_rank(rank), m_radius(0)
{
}
bool Locality::IsValid() const
{
if (m_type == ftypes::NONE)
return false;
ASSERT(!m_matchedTokens.empty(), ());
return true;
}
bool Locality::IsSuitable(TTokensArray const & tokens, TToken const & prefix) const
{
bool const isMatched = IsFullNameMatched();
// Do filtering of possible localities.
using namespace ftypes;
switch (m_type)
{
case COUNTRY:
// USA has synonyms: "US" or "USA"
return (isMatched || (m_enName == "usa" && GetSynonymTokenLength(tokens, prefix) <= 3) ||
(m_enName == "uk" && GetSynonymTokenLength(tokens, prefix) == 2));
case STATE: // we process USA, Canada states only for now
// USA states has 2-symbol synonyms
return (isMatched || GetSynonymTokenLength(tokens, prefix) == 2);
case CITY:
// need full name match for cities
return isMatched;
case NONE:
case TOWN:
case VILLAGE:
case LOCALITY_COUNT:
ASSERT(false, ("Unsupported type:", m_type));
return false;
}
}
void Locality::Swap(Locality & rhs)
{
m_name.swap(rhs.m_name);
m_enName.swap(rhs.m_enName);
m_matchedTokens.swap(rhs.m_matchedTokens);
swap(m_type, rhs.m_type);
swap(m_featureId, rhs.m_featureId);
swap(m_center, rhs.m_center);
swap(m_rank, rhs.m_rank);
swap(m_radius, rhs.m_radius);
}
bool Locality::operator<(Locality const & rhs) const
{
if (m_type != rhs.m_type)
return (m_type < rhs.m_type);
if (m_matchedTokens.size() != rhs.m_matchedTokens.size())
return (m_matchedTokens.size() < rhs.m_matchedTokens.size());
return m_rank < rhs.m_rank;
}
bool Locality::IsFullNameMatched() const
{
size_t count = 0;
SplitUniString(NormalizeAndSimplifyString(m_name), [&count](strings::UniString const &)
{
++count;
},
search::Delimiters());
return count <= m_matchedTokens.size();
}
size_t Locality::GetSynonymTokenLength(TTokensArray const & tokens, TToken const & prefix) const
{
// check only one token as a synonym
if (m_matchedTokens.size() == 1)
{
size_t const index = m_matchedTokens[0];
if (index < tokens.size())
return tokens[index].size();
ASSERT_EQUAL(index, tokens.size(), ());
ASSERT(!prefix.empty(), ());
return prefix.size();
}
return size_t(-1);
}
string DebugPrint(Locality const & l)
{
stringstream ss;
ss << "{ Locality: "
<< "Name = " + l.m_name << "; Name English = " << l.m_enName
<< "; Rank = " << static_cast<int>(l.m_rank)
<< "; Matched: " << l.m_matchedTokens.size() << " }";
return ss.str();
}
} // namespace search

View file

@ -1,51 +0,0 @@
#pragma once
#include "indexer/ftypes_matcher.hpp"
#include "geometry/point2d.hpp"
#include "base/buffer_vector.hpp"
#include "base/string_utils.hpp"
#include "std/string.hpp"
#include "std/vector.hpp"
namespace search
{
struct Locality
{
using TToken = strings::UniString;
using TTokensArray = buffer_vector<TToken, 32>;
// Native and English names of locality.
string m_name;
string m_enName;
// Indexes of matched tokens for locality.
vector<size_t> m_matchedTokens;
ftypes::Type m_type;
uint32_t m_featureId;
m2::PointD m_center;
uint8_t m_rank;
double m_radius;
Locality();
Locality(ftypes::Type type, uint32_t featureId, m2::PointD const & center, uint8_t rank);
bool IsValid() const;
bool IsSuitable(TTokensArray const & tokens, TToken const & prefix) const;
void Swap(Locality & rhs);
bool operator<(Locality const & rhs) const;
private:
bool IsFullNameMatched() const;
size_t GetSynonymTokenLength(TTokensArray const & tokens, TToken const & prefix) const;
};
string DebugPrint(Locality const & l);
} // namespace search

View file

@ -26,7 +26,7 @@ LocalityScorer::ExLocality::ExLocality() : m_numTokens(0), m_rank(0), m_nameScor
{
}
LocalityScorer::ExLocality::ExLocality(Geocoder::Locality const & locality)
LocalityScorer::ExLocality::ExLocality(Locality const & locality)
: m_locality(locality)
, m_numTokens(locality.m_tokenRange.Size())
, m_rank(0)
@ -42,7 +42,7 @@ LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & dele
void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx,
CBV const & filter, size_t limit,
std::vector<Geocoder::Locality> & localities)
std::vector<Locality> & localities)
{
CHECK_EQUAL(ctx.m_numTokens, m_params.GetNumTokens(), ());
@ -83,8 +83,7 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte
LeaveTopLocalities(limit, localities);
}
void LocalityScorer::LeaveTopLocalities(size_t limit,
std::vector<Geocoder::Locality> & localities) const
void LocalityScorer::LeaveTopLocalities(size_t limit, std::vector<Locality> & localities) const
{
std::vector<ExLocality> ls;
ls.reserve(localities.size());

View file

@ -1,6 +1,6 @@
#pragma once
#include "search/geocoder.hpp"
#include "search/geocoder_locality.hpp"
#include "search/ranking_utils.hpp"
#include <cstdint>
@ -32,18 +32,17 @@ public:
// Leaves at most |limit| elements of |localities|, ordered by their
// features.
void GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx,
CBV const & filter, size_t limit,
std::vector<Geocoder::Locality> & localities);
CBV const & filter, size_t limit, std::vector<Locality> & localities);
private:
struct ExLocality
{
ExLocality();
explicit ExLocality(Geocoder::Locality const & locality);
explicit ExLocality(Locality const & locality);
inline uint32_t GetId() const { return m_locality.m_featureId; }
Geocoder::Locality m_locality;
Locality m_locality;
size_t m_numTokens;
uint8_t m_rank;
NameScore m_nameScore;
@ -53,7 +52,7 @@ private:
// Leaves at most |limit| elements of |localities|, ordered by some
// combination of ranks and number of matched tokens.
void LeaveTopLocalities(size_t limit, std::vector<Geocoder::Locality> & localities) const;
void LeaveTopLocalities(size_t limit, std::vector<Locality> & localities) const;
void RemoveDuplicates(std::vector<ExLocality> & ls) const;
void LeaveTopByRankAndProb(size_t limit, std::vector<ExLocality> & ls) const;

View file

@ -1,12 +1,12 @@
#include "search/ranking_info.hpp"
#include "std/sstream.hpp"
#include <sstream>
namespace search
{
string DebugPrint(PreRankingInfo const & info)
std::string DebugPrint(PreRankingInfo const & info)
{
ostringstream os;
std::ostringstream os;
os << "PreRankingInfo [";
os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
for (size_t i = 0; i < static_cast<size_t>(SearchModel::SEARCH_TYPE_COUNT); ++i)
@ -17,10 +17,9 @@ string DebugPrint(PreRankingInfo const & info)
auto const type = static_cast<SearchModel::SearchType>(i);
os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ",";
}
os << "m_rank:" << info.m_rank << ",";
os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
os << "m_searchType:" << info.m_searchType;
os << "]";
return os.str();
}
} // namespace search

View file

@ -7,7 +7,8 @@
#include "base/assert.hpp"
#include "std/cstdint.hpp"
#include <cstdint>
#include <string>
namespace search
{
@ -45,6 +46,5 @@ struct PreRankingInfo
SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT;
};
string DebugPrint(PreRankingInfo const & info);
std::string DebugPrint(PreRankingInfo const & info);
} // namespace search

View file

@ -5,12 +5,10 @@
#include "search/geometry_utils.hpp"
#include "search/intermediate_result.hpp"
#include "search/latlon_match.hpp"
#include "search/locality.hpp"
#include "search/pre_ranking_info.hpp"
#include "search/query_params.hpp"
#include "search/ranking_info.hpp"
#include "search/ranking_utils.hpp"
#include "search/region.hpp"
#include "search/search_index_values.hpp"
#include "search/utils.hpp"

View file

@ -1,35 +0,0 @@
#include "search/region.hpp"
#include "base/assert.hpp"
namespace search
{
bool Region::operator<(Region const & rhs) const
{
return (m_matchedTokens.size() < rhs.m_matchedTokens.size());
}
bool Region::IsValid() const
{
if (m_ids.empty())
return false;
ASSERT(!m_matchedTokens.empty(), ());
ASSERT(!m_enName.empty(), ());
return true;
}
void Region::Swap(Region & rhs)
{
m_ids.swap(rhs.m_ids);
m_matchedTokens.swap(rhs.m_matchedTokens);
m_enName.swap(rhs.m_enName);
}
string DebugPrint(Region const & r)
{
string res("Region: ");
res += "Name English: " + r.m_enName;
res += "; Matched: " + ::DebugPrint(r.m_matchedTokens.size());
return res;
}
} // namespace search

View file

@ -1,22 +0,0 @@
#pragma once
#include "std/string.hpp"
#include "std/vector.hpp"
namespace search
{
struct Region
{
vector<size_t> m_ids;
vector<size_t> m_matchedTokens;
string m_enName;
bool IsValid() const;
void Swap(Region & rhs);
bool operator<(Region const & rhs) const;
};
string DebugPrint(Region const & r);
} // namespace search

View file

@ -30,6 +30,7 @@ HEADERS += \
features_layer_path_finder.hpp \
geocoder.hpp \
geocoder_context.hpp \
geocoder_locality.hpp \
geometry_cache.hpp \
geometry_utils.hpp \
hotels_classifier.hpp \
@ -44,7 +45,6 @@ HEADERS += \
keyword_matcher.hpp \
latlon_match.hpp \
lazy_centers_table.hpp \
locality.hpp \
locality_finder.hpp \
locality_scorer.hpp \
mode.hpp \
@ -63,7 +63,6 @@ HEADERS += \
ranker.hpp \
ranking_info.hpp \
ranking_utils.hpp \
region.hpp \
result.hpp \
retrieval.hpp \
reverse_geocoder.hpp \
@ -97,6 +96,7 @@ SOURCES += \
features_layer_path_finder.cpp \
geocoder.cpp \
geocoder_context.cpp \
geocoder_locality.cpp \
geometry_cache.cpp \
geometry_utils.cpp \
hotels_classifier.cpp \
@ -110,7 +110,6 @@ SOURCES += \
keyword_matcher.cpp \
latlon_match.cpp \
lazy_centers_table.cpp \
locality.cpp \
locality_finder.cpp \
locality_scorer.cpp \
mode.cpp \
@ -128,7 +127,6 @@ SOURCES += \
ranker.cpp \
ranking_info.cpp \
ranking_utils.cpp \
region.cpp \
result.cpp \
retrieval.cpp \
reverse_geocoder.cpp \

View file

@ -96,7 +96,7 @@ public:
filter.SetFull();
m_scorer.GetTopLocalities(MwmSet::MwmId(), ctx, filter, limit, m_localities);
sort(m_localities.begin(), m_localities.end(), my::LessBy(&Geocoder::Locality::m_featureId));
sort(m_localities.begin(), m_localities.end(), my::LessBy(&Locality::m_featureId));
}
// LocalityScorer::Delegate overrides:
@ -111,7 +111,7 @@ public:
protected:
QueryParams m_params;
vector<Geocoder::Locality> m_localities;
vector<Locality> m_localities;
unordered_map<uint32_t, vector<string>> m_names;
LocalityScorer m_scorer;