diff --git a/search/CMakeLists.txt b/search/CMakeLists.txt index accb7e4d26..c41b230c83 100644 --- a/search/CMakeLists.txt +++ b/search/CMakeLists.txt @@ -120,6 +120,7 @@ set( streets_matcher.hpp string_intersection.hpp suggest.hpp + token_range.hpp token_slice.cpp token_slice.hpp types_skipper.cpp diff --git a/search/features_layer.cpp b/search/features_layer.cpp index e3c077db77..0f0e4fbe20 100644 --- a/search/features_layer.cpp +++ b/search/features_layer.cpp @@ -12,8 +12,7 @@ void FeaturesLayer::Clear() { m_sortedFeatures = nullptr; m_subQuery.clear(); - m_startToken = 0; - m_endToken = 0; + m_tokenRange.Clear(); m_type = SearchModel::SEARCH_TYPE_COUNT; m_hasDelayedFeatures = false; m_lastTokenIsPrefix = false; @@ -24,8 +23,9 @@ string DebugPrint(FeaturesLayer const & layer) ostringstream os; os << "FeaturesLayer [ size of m_sortedFeatures: " << (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0) - << ", m_subQuery: " << DebugPrint(layer.m_subQuery) << ", m_startToken: " << layer.m_startToken - << ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type) + << ", m_subQuery: " << DebugPrint(layer.m_subQuery) + << ", m_tokenRange: " << DebugPrint(layer.m_tokenRange) + << ", m_type: " << DebugPrint(layer.m_type) << ", m_lastTokenIsPrefix: " << layer.m_lastTokenIsPrefix << " ]"; return os.str(); } diff --git a/search/features_layer.hpp b/search/features_layer.hpp index 9c361d3c45..ce6368351a 100644 --- a/search/features_layer.hpp +++ b/search/features_layer.hpp @@ -1,6 +1,7 @@ #pragma once #include "search/model.hpp" +#include "search/token_range.hpp" #include "base/string_utils.hpp" @@ -22,8 +23,7 @@ struct FeaturesLayer strings::UniString m_subQuery; - size_t m_startToken; - size_t m_endToken; + TokenRange m_tokenRange; SearchModel::SearchType m_type; // *NOTE* This field is meaningful only when m_type equals to diff --git a/search/geocoder.cpp b/search/geocoder.cpp index 8c9e7ed4c7..5e6d41543b 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -79,26 +79,27 @@ UniString const kUniSpace(MakeUniString(" ")); struct ScopedMarkTokens { - ScopedMarkTokens(vector & usedTokens, size_t from, size_t to) - : m_usedTokens(usedTokens), m_from(from), m_to(to) + ScopedMarkTokens(vector & usedTokens, TokenRange const & range) + : m_usedTokens(usedTokens), m_range(range) { - ASSERT_LESS_OR_EQUAL(m_from, m_to, ()); - ASSERT_LESS_OR_EQUAL(m_to, m_usedTokens.size(), ()); + ASSERT(m_range.IsValid(), ()); + ASSERT_LESS_OR_EQUAL(m_range.m_end, m_usedTokens.size(), ()); #if defined(DEBUG) - for (size_t i = m_from; i != m_to; ++i) + for (size_t i : m_range) ASSERT(!m_usedTokens[i], (i)); #endif - fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, true /* used */); + fill(m_usedTokens.begin() + m_range.m_begin, m_usedTokens.begin() + m_range.m_end, + true /* used */); } ~ScopedMarkTokens() { - fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, false /* used */); + fill(m_usedTokens.begin() + m_range.m_begin, m_usedTokens.begin() + m_range.m_end, + false /* used */); } vector & m_usedTokens; - size_t const m_from; - size_t const m_to; + TokenRange const m_range; }; class LazyRankTable : public RankTable @@ -174,14 +175,14 @@ private: LazyRankTable m_ranks; }; -void JoinQueryTokens(QueryParams const & params, size_t curToken, size_t endToken, - UniString const & sep, UniString & res) +void JoinQueryTokens(QueryParams const & params, TokenRange const & range, UniString const & sep, + UniString & res) { - ASSERT_LESS_OR_EQUAL(curToken, endToken, ()); - for (size_t i = curToken; i < endToken; ++i) + ASSERT(range.IsValid(), (range)); + for (size_t i : range) { res.append(params.GetToken(i).m_original); - if (i + 1 != endToken) + if (i + 1 != range.m_end) res.append(sep); } } @@ -623,18 +624,16 @@ void Geocoder::InitBaseContext(BaseContext & ctx) ctx.m_hotelsFilter = m_hotelsFilter.MakeScopedFilter(*m_context, m_params.m_hotelsFilter); } -void Geocoder::InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, +void Geocoder::InitLayer(SearchModel::SearchType type, TokenRange const & tokenRange, FeaturesLayer & layer) { layer.Clear(); layer.m_type = type; - layer.m_startToken = startToken; - layer.m_endToken = endToken; + layer.m_tokenRange = tokenRange; - JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, - layer.m_subQuery); + JoinQueryTokens(m_params, layer.m_tokenRange, kUniSpace /* sep */, layer.m_subQuery); layer.m_lastTokenIsPrefix = - layer.m_startToken < layer.m_endToken && m_params.IsPrefixToken(layer.m_endToken - 1); + !layer.m_tokenRange.Empty() && m_params.IsPrefixToken(layer.m_tokenRange.m_end - 1); } void Geocoder::FillLocalityCandidates(BaseContext const & ctx, CBV const & filter, @@ -685,7 +684,7 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx) } ++count; - m_regions[type][make_pair(l.m_startToken, l.m_endToken)].push_back(region); + m_regions[type][l.m_tokenRange].push_back(region); } }; @@ -709,7 +708,7 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx) LOG(LINFO, ("City =", city.m_defaultName, "radius =", radius, "prob =", city.m_prob)); #endif - m_cities[{l.m_startToken, l.m_endToken}].push_back(city); + m_cities[city.m_tokenRange].push_back(city); } break; } @@ -758,7 +757,7 @@ void Geocoder::FillVillageLocalities(BaseContext const & ctx) LOG(LDEBUG, ("Village =", village.m_defaultName, "radius =", radius, "prob =", village.m_prob)); #endif - m_cities[{l.m_startToken, l.m_endToken}].push_back(village); + m_cities[village.m_tokenRange].push_back(village); if (numVillages >= kMaxNumVillages) break; } @@ -812,9 +811,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) { BailIfCancelled(); - size_t const startToken = p.first.first; - size_t const endToken = p.first.second; - if (ctx.HasUsedTokensInRange(startToken, endToken)) + auto const & tokenRange = p.first; + if (ctx.HasUsedTokensInRange(tokenRange)) continue; for (auto const & region : p.second) @@ -838,11 +836,11 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) if (!matches) continue; - ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken); + ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange); if (ctx.AllTokensUsed()) { // Region matches to search query, we need to emit it as is. - EmitResult(ctx, region, startToken, endToken); + EmitResult(ctx, region, tokenRange); continue; } @@ -866,9 +864,8 @@ void Geocoder::MatchCities(BaseContext & ctx) // Localities are ordered my (m_startToken, m_endToken) pairs. for (auto const & p : m_cities) { - size_t const startToken = p.first.first; - size_t const endToken = p.first.second; - if (ctx.HasUsedTokensInRange(startToken, endToken)) + auto const & tokenRange = p.first; + if (ctx.HasUsedTokensInRange(tokenRange)) continue; for (auto const & city : p.second) @@ -881,11 +878,11 @@ void Geocoder::MatchCities(BaseContext & ctx) continue; } - ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken); + ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange); if (ctx.AllTokensUsed()) { // City matches to search query, we need to emit it as is. - EmitResult(ctx, city, startToken, endToken); + EmitResult(ctx, city, tokenRange); continue; } @@ -947,7 +944,7 @@ void Geocoder::WithPostcodes(BaseContext & ctx, TFn && fn) if (ctx.m_usedTokens[startToken + n - 1]) break; - TokenSlice slice(m_params, startToken, startToken + n); + TokenSlice slice(m_params, TokenRange(startToken, startToken + n)); auto const isPrefix = startToken + n == ctx.m_numTokens; if (LooksLikePostcode(QuerySlice(slice), isPrefix)) endToken = startToken + n; @@ -955,20 +952,17 @@ void Geocoder::WithPostcodes(BaseContext & ctx, TFn && fn) if (startToken == endToken) continue; - auto postcodes = - RetrievePostcodeFeatures(*m_context, TokenSlice(m_params, startToken, endToken)); - MY_SCOPE_GUARD(cleanup, [&]() - { - m_postcodes.Clear(); - }); + TokenRange const tokenRange(startToken, endToken); + + auto postcodes = RetrievePostcodeFeatures(*m_context, TokenSlice(m_params, tokenRange)); + MY_SCOPE_GUARD(cleanup, [&]() { m_postcodes.Clear(); }); if (!postcodes.IsEmpty()) { - ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken); + ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange); m_postcodes.Clear(); - m_postcodes.m_startToken = startToken; - m_postcodes.m_endToken = endToken; + m_postcodes.m_tokenRange = tokenRange; m_postcodes.m_features = move(postcodes); fn(); @@ -994,14 +988,14 @@ void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx, MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); auto & layer = m_layers.back(); - InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_startToken, prediction.m_endToken, layer); + InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_tokenRange, layer); vector sortedFeatures; sortedFeatures.reserve(prediction.m_features.PopCount()); prediction.m_features.ForEach(MakeBackInsertFunctor(sortedFeatures)); layer.m_sortedFeatures = &sortedFeatures; - ScopedMarkTokens mark(ctx.m_usedTokens, prediction.m_startToken, prediction.m_endToken); + ScopedMarkTokens mark(ctx.m_usedTokens, prediction.m_tokenRange); MatchPOIsAndBuildings(ctx, 0 /* curToken */); } @@ -1024,15 +1018,11 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) CBV filtered = m_postcodes.m_features; if (m_filter->NeedToFilter(m_postcodes.m_features)) filtered = m_filter->Filter(m_postcodes.m_features); - filtered.ForEach([&](uint32_t id) - { - SearchModel::SearchType searchType; - if (GetSearchTypeInGeocoding(ctx, id, searchType)) - { - EmitResult(ctx, m_context->GetId(), id, searchType, - m_postcodes.m_startToken, m_postcodes.m_endToken); - } - }); + filtered.ForEach([&](uint32_t id) { + SearchModel::SearchType searchType; + if (GetSearchTypeInGeocoding(ctx, id, searchType)) + EmitResult(ctx, m_context->GetId(), id, searchType, m_postcodes.m_tokenRange); + }); return; } @@ -1052,7 +1042,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) if (!m_postcodes.m_features.HasBit(id)) continue; EmitResult(ctx, m_context->GetId(), id, SearchModel::SEARCH_TYPE_STREET, - m_layers.back().m_startToken, m_layers.back().m_endToken); + m_layers.back().m_tokenRange); } } @@ -1062,8 +1052,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); auto & layer = m_layers.back(); - InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_startToken, m_postcodes.m_endToken, - layer); + InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_tokenRange, layer); vector features; m_postcodes.m_features.ForEach(MakeBackInsertFunctor(features)); @@ -1111,7 +1100,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) { auto & layer = m_layers.back(); - InitLayer(layer.m_type, curToken, curToken + n, layer); + InitLayer(layer.m_type, TokenRange(curToken, curToken + n), layer); } features = features.Intersect(ctx.m_features[curToken + n - 1]); @@ -1222,11 +1211,8 @@ bool Geocoder::IsLayerSequenceSane() const { auto const & buildings = m_layers[buildingIndex]; auto const & streets = m_layers[streetIndex]; - if (buildings.m_startToken != streets.m_endToken && - buildings.m_endToken != streets.m_startToken) - { + if (!buildings.m_tokenRange.AdjacentTo(streets.m_tokenRange)) return false; - } } return true; @@ -1257,12 +1243,12 @@ void Geocoder::FindPaths(BaseContext const & ctx) // TODO(@y, @m, @vng): use rest fields of IntersectionResult for // better scoring. EmitResult(ctx, m_context->GetId(), result.InnermostResult(), innermostLayer.m_type, - innermostLayer.m_startToken, innermostLayer.m_endToken); + innermostLayer.m_tokenRange); }); } void Geocoder::EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId, uint32_t ftId, - SearchModel::SearchType type, size_t startToken, size_t endToken) + SearchModel::SearchType type, TokenRange const & tokenRange) { FeatureID id(mwmId, ftId); @@ -1276,14 +1262,13 @@ void Geocoder::EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId, // pivot. PreRankingInfo info; info.m_searchType = type; - info.m_startToken = startToken; - info.m_endToken = endToken; + info.m_tokenRange = tokenRange; m_preRanker.Emplace(id, info); } -void Geocoder::EmitResult(BaseContext const & ctx, Region const & region, size_t startToken, - size_t endToken) +void Geocoder::EmitResult(BaseContext const & ctx, Region const & region, + TokenRange const & tokenRange) { SearchModel::SearchType type; switch (region.m_type) @@ -1292,13 +1277,12 @@ void Geocoder::EmitResult(BaseContext const & ctx, Region const & region, size_t case REGION_TYPE_COUNTRY: type = SearchModel::SEARCH_TYPE_COUNTRY; break; case REGION_TYPE_COUNT: type = SearchModel::SEARCH_TYPE_COUNT; break; } - EmitResult(ctx, region.m_countryId, region.m_featureId, type, startToken, endToken); + EmitResult(ctx, region.m_countryId, region.m_featureId, type, tokenRange); } -void Geocoder::EmitResult(BaseContext const & ctx, City const & city, size_t startToken, - size_t endToken) +void Geocoder::EmitResult(BaseContext const & ctx, City const & city, TokenRange const & tokenRange) { - EmitResult(ctx, city.m_countryId, city.m_featureId, city.m_type, startToken, endToken); + EmitResult(ctx, city.m_countryId, city.m_featureId, city.m_type, tokenRange); } void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken) @@ -1334,7 +1318,7 @@ void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken) if (!GetSearchTypeInGeocoding(ctx, featureId, searchType)) return; if (searchType == SearchModel::SEARCH_TYPE_UNCLASSIFIED) - EmitResult(ctx, m_context->GetId(), featureId, searchType, startToken, curToken); + EmitResult(ctx, m_context->GetId(), featureId, searchType, TokenRange(startToken, curToken)); }; allFeatures.ForEach(emitUnclassified); } @@ -1385,7 +1369,7 @@ string DebugPrint(Geocoder::Locality const & locality) os << "Locality [ "; os << "m_countryId=" << DebugPrint(locality.m_countryId) << ", "; os << "m_featureId=" << locality.m_featureId << ", "; - os << "token range=[" << locality.m_startToken << ", " << locality.m_endToken << "), "; + os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", "; os << "m_prob=" << locality.m_prob; os << " ]"; return os.str(); diff --git a/search/geocoder.hpp b/search/geocoder.hpp index b3cf16632e..f1eb9bcb08 100644 --- a/search/geocoder.hpp +++ b/search/geocoder.hpp @@ -17,6 +17,7 @@ #include "search/query_params.hpp" #include "search/ranking_utils.hpp" #include "search/streets_matcher.hpp" +#include "search/token_range.hpp" #include "indexer/index.hpp" #include "indexer/mwm_set.hpp" @@ -96,20 +97,18 @@ public: { Locality() = default; - Locality(MwmSet::MwmId const & countryId, uint32_t featureId, size_t startToken, - size_t endToken, double prob) + Locality(MwmSet::MwmId const & countryId, uint32_t featureId, TokenRange const & tokenRange, + double prob) : m_countryId(countryId) , m_featureId(featureId) - , m_startToken(startToken) - , m_endToken(endToken) + , m_tokenRange(tokenRange) , m_prob(prob) { } MwmSet::MwmId m_countryId; uint32_t m_featureId = 0; - size_t m_startToken = 0; - size_t m_endToken = 0; + TokenRange m_tokenRange; // Measures our belief in the fact that tokens in the range // [m_startToken, m_endToken) indeed specify a locality. Currently @@ -173,20 +172,18 @@ private: { void Clear() { - m_startToken = 0; - m_endToken = 0; + m_tokenRange.Clear(); m_features.Reset(); } - size_t m_startToken = 0; - size_t m_endToken = 0; + TokenRange m_tokenRange; CBV m_features; }; void GoImpl(vector> & infos, bool inViewport); - template - using TLocalitiesCache = map, vector>; + template + using LocalitiesCache = map>; QueryParams::Token const & GetTokens(size_t i) const; @@ -194,7 +191,7 @@ private: // for each token and saves it to m_addressFeatures. void InitBaseContext(BaseContext & ctx); - void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, + void InitLayer(SearchModel::SearchType type, TokenRange const & tokenRange, FeaturesLayer & layer); void FillLocalityCandidates(BaseContext const & ctx, @@ -256,10 +253,9 @@ private: // Forms result and feeds it to |m_preRanker|. void EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId, uint32_t ftId, - SearchModel::SearchType type, size_t startToken, size_t endToken); - void EmitResult(BaseContext const & ctx, Region const & region, size_t startToken, - size_t endToken); - void EmitResult(BaseContext const & ctx, City const & city, size_t startToken, size_t endToken); + SearchModel::SearchType type, TokenRange const & tokenRange); + void EmitResult(BaseContext const & ctx, Region const & region, TokenRange const & tokenRange); + void EmitResult(BaseContext const & ctx, City const & city, TokenRange const & tokenRange); // Tries to match unclassified objects from lower layers, like // parks, forests, lakes, rivers, etc. This method finds all @@ -305,8 +301,8 @@ private: // m_cities stores both big cities that are visible at World.mwm // and small villages and hamlets that are not. - TLocalitiesCache m_cities; - TLocalitiesCache m_regions[REGION_TYPE_COUNT]; + LocalitiesCache m_cities; + LocalitiesCache m_regions[REGION_TYPE_COUNT]; // Caches of features in rects. These caches are separated from // TLocalitiesCache because the latter are quite lightweight and not diff --git a/search/geocoder_context.cpp b/search/geocoder_context.cpp index 9f03510916..0ff49bed4e 100644 --- a/search/geocoder_context.cpp +++ b/search/geocoder_context.cpp @@ -1,5 +1,8 @@ #include "search/geocoder_context.hpp" +#include "search/token_range.hpp" + +#include "base/assert.hpp" #include "base/stl_add.hpp" #include "std/algorithm.hpp" @@ -18,9 +21,11 @@ bool BaseContext::AllTokensUsed() const return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor()); } -bool BaseContext::HasUsedTokensInRange(size_t from, size_t to) const +bool BaseContext::HasUsedTokensInRange(TokenRange const & range) const { - return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor()); + ASSERT(range.IsValid(), (range)); + return any_of(m_usedTokens.begin() + range.m_begin, m_usedTokens.begin() + range.m_end, + IdFunctor()); } size_t BaseContext::NumUnusedTokenGroups() const diff --git a/search/geocoder_context.hpp b/search/geocoder_context.hpp index 216c323987..1a54adc8a6 100644 --- a/search/geocoder_context.hpp +++ b/search/geocoder_context.hpp @@ -9,6 +9,7 @@ namespace search { class FeaturesFilter; +struct TokenRange; struct BaseContext { @@ -19,9 +20,8 @@ struct BaseContext // Returns true iff all tokens are used. bool AllTokensUsed() const; - // Returns true if there exists at least one used token in [from, - // to). - bool HasUsedTokensInRange(size_t from, size_t to) const; + // Returns true if there exists at least one used token in |range|. + bool HasUsedTokensInRange(TokenRange const & range) const; // Counts number of groups of consecutive unused tokens. size_t NumUnusedTokenGroups() const; diff --git a/search/intersection_result.hpp b/search/intersection_result.hpp index 31144e77e5..6bea9335a5 100644 --- a/search/intersection_result.hpp +++ b/search/intersection_result.hpp @@ -33,5 +33,4 @@ struct IntersectionResult }; string DebugPrint(IntersectionResult const & result); - } // namespace search diff --git a/search/locality_scorer.cpp b/search/locality_scorer.cpp index e2047a40a7..7f9996c343 100644 --- a/search/locality_scorer.cpp +++ b/search/locality_scorer.cpp @@ -28,7 +28,7 @@ LocalityScorer::ExLocality::ExLocality() : m_numTokens(0), m_rank(0), m_nameScor LocalityScorer::ExLocality::ExLocality(Geocoder::Locality const & locality) : m_locality(locality) - , m_numTokens(locality.m_endToken - locality.m_startToken) + , m_numTokens(locality.m_tokenRange.Size()) , m_rank(0) , m_nameScore(NAME_SCORE_ZERO) { @@ -58,13 +58,14 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte for (size_t endToken = startToken + 1; endToken <= ctx.m_numTokens; ++endToken) { + TokenRange const tokenRange(startToken, endToken); // Skip locality candidates that match only numbers. - if (!m_params.IsNumberTokens(startToken, endToken)) + if (!m_params.IsNumberTokens(tokenRange)) { intersection.ForEach([&](uint32_t featureId) { double const prob = static_cast(intersection.PopCount()) / static_cast(unfilteredIntersection.PopCount()); - localities.emplace_back(countryId, featureId, startToken, endToken, prob); + localities.emplace_back(countryId, featureId, tokenRange, prob); }); } @@ -144,10 +145,7 @@ void LocalityScorer::SortByNameAndProb(std::vector & ls) const auto score = NAME_SCORE_ZERO; for (auto const & name : names) - { - score = max(score, GetNameScore(name, TokenSlice(m_params, l.m_locality.m_startToken, - l.m_locality.m_endToken))); - } + score = max(score, GetNameScore(name, TokenSlice(m_params, l.m_locality.m_tokenRange))); l.m_nameScore = score; std::sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) { diff --git a/search/pre_ranker.cpp b/search/pre_ranker.cpp index fb8c62cc00..6ec6021238 100644 --- a/search/pre_ranker.cpp +++ b/search/pre_ranker.cpp @@ -42,7 +42,7 @@ struct ComparePreResult1 auto const & rinfo = rhs.GetInfo(); if (linfo.GetNumTokens() != rinfo.GetNumTokens()) return linfo.GetNumTokens() > rinfo.GetNumTokens(); - return linfo.m_startToken < rinfo.m_startToken; + return linfo.m_tokenRange.m_begin < rinfo.m_tokenRange.m_begin; } }; diff --git a/search/pre_ranking_info.cpp b/search/pre_ranking_info.cpp index 8de435ac55..49bde42cb6 100644 --- a/search/pre_ranking_info.cpp +++ b/search/pre_ranking_info.cpp @@ -9,8 +9,7 @@ string DebugPrint(PreRankingInfo const & info) ostringstream os; os << "PreRankingInfo ["; os << "m_distanceToPivot:" << info.m_distanceToPivot << ","; - os << "m_startToken:" << info.m_startToken << ","; - os << "m_endToken:" << info.m_endToken << ","; + os << "m_tokenRange:" << DebugPrint(info.m_tokenRange) << ","; os << "m_rank:" << info.m_rank << ","; os << "m_searchType:" << info.m_searchType; os << "]"; diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp index 43aada03ab..e71525b7b8 100644 --- a/search/pre_ranking_info.hpp +++ b/search/pre_ranking_info.hpp @@ -1,6 +1,7 @@ #pragma once #include "search/model.hpp" +#include "search/token_range.hpp" #include "geometry/point2d.hpp" @@ -10,7 +11,7 @@ namespace search { struct PreRankingInfo { - inline size_t GetNumTokens() const { return m_endToken - m_startToken; } + inline size_t GetNumTokens() const { return m_tokenRange.Size(); } // An abstract distance from the feature to the pivot. Measurement // units do not matter here. @@ -19,10 +20,8 @@ struct PreRankingInfo m2::PointD m_center = m2::PointD::Zero(); bool m_centerLoaded = false; - // Tokens [m_startToken, m_endToken) match to the feature name or - // house number. - size_t m_startToken = 0; - size_t m_endToken = 0; + // Tokens match to the feature name or house number. + TokenRange m_tokenRange; // Rank of the feature. uint8_t m_rank = 0; diff --git a/search/query_params.cpp b/search/query_params.cpp index db5e31cac8..22e271ff75 100644 --- a/search/query_params.cpp +++ b/search/query_params.cpp @@ -1,5 +1,7 @@ #include "search/query_params.hpp" +#include "search/token_range.hpp" + #include "indexer/feature_impl.hpp" #include "std/algorithm.hpp" @@ -88,15 +90,15 @@ QueryParams::Token & QueryParams::GetToken(size_t i) return i < m_tokens.size() ? m_tokens[i] : m_prefixToken; } -bool QueryParams::IsNumberTokens(size_t start, size_t end) const +bool QueryParams::IsNumberTokens(TokenRange const & range) const { - ASSERT_LESS(start, end, ()); - ASSERT_LESS_OR_EQUAL(end, GetNumTokens(), ()); + ASSERT(range.IsValid(), (range)); + ASSERT_LESS_OR_EQUAL(range.m_end, GetNumTokens(), ()); - for (; start != end; ++start) + for (size_t i : range) { bool number = false; - GetToken(start).ForEach([&number](String const & s) { + GetToken(i).ForEach([&number](String const & s) { if (feature::IsNumber(s)) { number = true; diff --git a/search/query_params.hpp b/search/query_params.hpp index e1844bfd89..f6a09a1249 100644 --- a/search/query_params.hpp +++ b/search/query_params.hpp @@ -13,6 +13,8 @@ namespace search { +struct TokenRange; + class QueryParams { public: @@ -101,9 +103,8 @@ public: Token const & GetToken(size_t i) const; Token & GetToken(size_t i); - // Returns true if all tokens in [start, end) range have integral - // synonyms. - bool IsNumberTokens(size_t start, size_t end) const; + // Returns true if all tokens in |range| have integral synonyms. + bool IsNumberTokens(TokenRange const & range) const; void RemoveToken(size_t i); diff --git a/search/ranker.cpp b/search/ranker.cpp index 335ed40ec1..9e5e6b5e9d 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -179,8 +179,8 @@ class PreResult2Maker info.m_searchType = preInfo.m_searchType; info.m_nameScore = NAME_SCORE_ZERO; - TokenSlice slice(m_params, preInfo.m_startToken, preInfo.m_endToken); - TokenSliceNoCategories sliceNoCategories(m_params, preInfo.m_startToken, preInfo.m_endToken); + TokenSlice slice(m_params, preInfo.m_tokenRange); + TokenSliceNoCategories sliceNoCategories(m_params, preInfo.m_tokenRange); for (auto const & lang : m_params.GetLangs()) { diff --git a/search/search.pro b/search/search.pro index 7c7cacc87f..813afb9428 100644 --- a/search/search.pro +++ b/search/search.pro @@ -75,6 +75,7 @@ HEADERS += \ streets_matcher.hpp \ string_intersection.hpp \ suggest.hpp \ + token_range.hpp \ token_slice.hpp \ types_skipper.hpp \ utils.hpp \ diff --git a/search/search_integration_tests/pre_ranker_test.cpp b/search/search_integration_tests/pre_ranker_test.cpp index b67ce9488c..b071715ae9 100644 --- a/search/search_integration_tests/pre_ranker_test.cpp +++ b/search/search_integration_tests/pre_ranker_test.cpp @@ -133,8 +133,7 @@ UNIT_CLASS_TEST(PreRankerTest, Smoke) FeatureID id(mwmId, index); PreRankingInfo info; - info.m_startToken = 0; - info.m_endToken = 1; + info.m_tokenRange = TokenRange(0, 1); info.m_searchType = SearchModel::SEARCH_TYPE_POI; preRanker.Emplace(id, info); diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index 92e99aa947..114205fbd5 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -4,6 +4,7 @@ #include "search/search_integration_tests/helpers.hpp" #include "search/search_tests_support/test_results_matching.hpp" #include "search/search_tests_support/test_search_request.hpp" +#include "search/token_range.hpp" #include "search/token_slice.hpp" #include "generator/feature_builder.hpp" @@ -537,8 +538,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestPostcodes) strings::UniString const tokens[] = {strings::MakeUniString("141702")}; params.InitNoPrefix(tokens, tokens + ARRAY_SIZE(tokens)); } - auto features = RetrievePostcodeFeatures(context, cancellable, - TokenSlice(params, 0, params.GetNumTokens())); + auto features = RetrievePostcodeFeatures( + context, cancellable, TokenSlice(params, TokenRange(0, params.GetNumTokens()))); TEST_EQUAL(1, features->PopCount(), ()); uint64_t index = 0; diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index d5b14c004d..e9dbc4af09 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -2,6 +2,7 @@ #include "search/query_params.hpp" #include "search/ranking_utils.hpp" +#include "search/token_range.hpp" #include "search/token_slice.hpp" #include "indexer/search_delimiters.hpp" @@ -18,7 +19,7 @@ using namespace strings; namespace { -NameScore GetScore(string const & name, string const & query, size_t startToken, size_t endToken) +NameScore GetScore(string const & name, string const & query, TokenRange const & tokenRange) { search::Delimiters delims; QueryParams params; @@ -36,17 +37,20 @@ NameScore GetScore(string const & name, string const & query, size_t startToken, params.InitNoPrefix(tokens.begin(), tokens.end()); } - return GetNameScore(name, TokenSlice(params, startToken, endToken)); + return GetNameScore(name, TokenSlice(params, tokenRange)); } UNIT_TEST(NameTest_Smoke) { - TEST_EQUAL(GetScore("New York", "Central Park, New York, US", 2, 4), NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("New York", "York", 0, 1), NAME_SCORE_SUBSTRING, ()); - TEST_EQUAL(GetScore("Moscow", "Red Square Mosc", 2, 3), NAME_SCORE_FULL_MATCH_PREFIX, ()); - TEST_EQUAL(GetScore("Moscow", "Red Square Moscow", 2, 3), NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("San Francisco", "Fran", 0, 1), NAME_SCORE_SUBSTRING_PREFIX, ()); - TEST_EQUAL(GetScore("San Francisco", "Fran ", 0, 1), NAME_SCORE_ZERO, ()); - TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтов", 0, 1), NAME_SCORE_FULL_MATCH_PREFIX, ()); + TEST_EQUAL(GetScore("New York", "Central Park, New York, US", TokenRange(2, 4)), + NAME_SCORE_FULL_MATCH, ()); + TEST_EQUAL(GetScore("New York", "York", TokenRange(0, 1)), NAME_SCORE_SUBSTRING, ()); + TEST_EQUAL(GetScore("Moscow", "Red Square Mosc", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH_PREFIX, + ()); + TEST_EQUAL(GetScore("Moscow", "Red Square Moscow", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH, ()); + TEST_EQUAL(GetScore("San Francisco", "Fran", TokenRange(0, 1)), NAME_SCORE_SUBSTRING_PREFIX, ()); + TEST_EQUAL(GetScore("San Francisco", "Fran ", TokenRange(0, 1)), NAME_SCORE_ZERO, ()); + TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтов", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH_PREFIX, + ()); } } // namespace diff --git a/search/streets_matcher.cpp b/search/streets_matcher.cpp index 733f19c8b4..68083174fd 100644 --- a/search/streets_matcher.cpp +++ b/search/streets_matcher.cpp @@ -23,7 +23,7 @@ bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Predicti if (lhs.GetNumTokens() != rhs.GetNumTokens()) return lhs.GetNumTokens() > rhs.GetNumTokens(); - return lhs.m_startToken < rhs.m_startToken; + return lhs.m_tokenRange.m_begin < rhs.m_tokenRange.m_begin; } } // namespace @@ -109,8 +109,7 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & predictions.emplace_back(); auto & prediction = predictions.back(); - prediction.m_startToken = startToken; - prediction.m_endToken = curToken; + prediction.m_tokenRange = TokenRange(startToken, curToken); ASSERT_NOT_EQUAL(fs.PopCount(), 0, ()); ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ()); diff --git a/search/streets_matcher.hpp b/search/streets_matcher.hpp index d362c0dd9a..e81881592d 100644 --- a/search/streets_matcher.hpp +++ b/search/streets_matcher.hpp @@ -2,6 +2,7 @@ #include "search/cbv.hpp" #include "search/geocoder_context.hpp" +#include "search/token_range.hpp" #include "std/vector.hpp" @@ -15,15 +16,11 @@ class StreetsMatcher public: struct Prediction { - inline size_t GetNumTokens() const { return m_endToken - m_startToken; } + inline size_t GetNumTokens() const { return m_tokenRange.Size(); } CBV m_features; - - size_t m_startToken = 0; - size_t m_endToken = 0; - + TokenRange m_tokenRange; double m_prob = 0.0; - uint64_t m_hash = 0; }; diff --git a/search/token_range.hpp b/search/token_range.hpp new file mode 100644 index 0000000000..eebd708952 --- /dev/null +++ b/search/token_range.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include "base/assert.hpp" +#include "base/range_iterator.hpp" + +#include +#include +#include + +namespace search +{ +struct TokenRange final +{ + TokenRange() = default; + TokenRange(size_t begin, size_t end) : m_begin(begin), m_end(end) + { + ASSERT(IsValid(), (*this)); + } + + inline bool AdjacentTo(TokenRange const & rhs) const + { + ASSERT(IsValid(), (*this)); + ASSERT(rhs.IsValid(), (rhs)); + return m_begin == rhs.m_end || m_end == rhs.m_begin; + } + + inline size_t Size() const + { + ASSERT(IsValid(), (*this)); + return m_end - m_begin; + } + + inline bool Empty() const { return Size() == 0; } + + inline void Clear() + { + m_begin = 0; + m_end = 0; + } + + inline bool IsValid() const { return m_begin <= m_end; } + + inline bool operator<(TokenRange const & rhs) const + { + if (m_begin != rhs.m_begin) + return m_begin < rhs.m_begin; + return m_end < rhs.m_end; + } + + inline bool operator==(TokenRange const & rhs) const + { + return m_begin == rhs.m_begin && m_end == rhs.m_end; + } + + inline my::RangeIterator begin() const { return my::RangeIterator(m_begin); } + inline my::RangeIterator end() const { return my::RangeIterator(m_end); } + + inline my::RangeIterator cbegin() const { return my::RangeIterator(m_begin); } + inline my::RangeIterator cend() const { return my::RangeIterator(m_end); } + + size_t m_begin = 0; + size_t m_end = 0; +}; + +inline std::string DebugPrint(TokenRange const & tokenRange) +{ + std::ostringstream os; + os << "TokenRange [" << tokenRange.m_begin << ", " << tokenRange.m_end << ")"; + return os.str(); +} +} // namespace search diff --git a/search/token_slice.cpp b/search/token_slice.cpp index bff3dc3fd5..b1dc1cb0cf 100644 --- a/search/token_slice.cpp +++ b/search/token_slice.cpp @@ -23,10 +23,10 @@ string SliceToString(string const & name, TSlice const & slice) } // namespace // TokenSlice -------------------------------------------------------------------------------------- -TokenSlice::TokenSlice(QueryParams const & params, size_t startToken, size_t endToken) - : m_params(params), m_offset(startToken), m_size(endToken - startToken) +TokenSlice::TokenSlice(QueryParams const & params, TokenRange const & range) + : m_params(params), m_offset(range.m_begin), m_size(range.Size()) { - ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); + ASSERT(range.IsValid(), (range)); } bool TokenSlice::IsPrefix(size_t i) const @@ -36,14 +36,11 @@ bool TokenSlice::IsPrefix(size_t i) const } // TokenSliceNoCategories -------------------------------------------------------------------------- -TokenSliceNoCategories::TokenSliceNoCategories(QueryParams const & params, size_t startToken, - size_t endToken) +TokenSliceNoCategories::TokenSliceNoCategories(QueryParams const & params, TokenRange const & range) : m_params(params) { - ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); - - m_indexes.reserve(endToken - startToken); - for (size_t i = startToken; i < endToken; ++i) + m_indexes.reserve(range.Size()); + for (size_t i : range) { if (!m_params.IsCategorySynonym(i)) m_indexes.push_back(i); diff --git a/search/token_slice.hpp b/search/token_slice.hpp index 7301103420..789f3abf5e 100644 --- a/search/token_slice.hpp +++ b/search/token_slice.hpp @@ -1,6 +1,7 @@ #pragma once #include "search/query_params.hpp" +#include "search/token_range.hpp" #include "indexer/string_slice.hpp" @@ -15,7 +16,7 @@ namespace search class TokenSlice { public: - TokenSlice(QueryParams const & params, size_t startToken, size_t endToken); + TokenSlice(QueryParams const & params, TokenRange const & range); inline QueryParams::Token const & Get(size_t i) const { @@ -40,7 +41,7 @@ private: class TokenSliceNoCategories { public: - TokenSliceNoCategories(QueryParams const & params, size_t startToken, size_t endToken); + TokenSliceNoCategories(QueryParams const & params, TokenRange const & range); inline QueryParams::Token const & Get(size_t i) const {