From 36fe4a08508b790a16677465e2e0ed5db70f0414 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Thu, 23 Jun 2016 11:59:28 +0300 Subject: [PATCH] [search] Implemented Oracle. --- coding/compressed_bit_vector.hpp | 63 +++--- search/cbv_ptr.cpp | 45 ++++- search/cbv_ptr.hpp | 24 +-- search/geocoder.cpp | 318 ++++++++++--------------------- search/geocoder.hpp | 72 ++----- search/geocoder_context.cpp | 36 ++++ search/geocoder_context.hpp | 38 ++++ search/mwm_context.hpp | 1 - search/query_params.hpp | 4 + search/search.pro | 4 + search/streets_matcher.cpp | 173 +++++++++++++++++ search/streets_matcher.hpp | 37 ++++ 12 files changed, 504 insertions(+), 311 deletions(-) create mode 100644 search/geocoder_context.cpp create mode 100644 search/geocoder_context.hpp create mode 100644 search/streets_matcher.cpp create mode 100644 search/streets_matcher.hpp diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp index 7e51b363aa..5fcf630919 100644 --- a/coding/compressed_bit_vector.hpp +++ b/coding/compressed_bit_vector.hpp @@ -198,18 +198,18 @@ public: static_cast(header); switch (strat) { - case CompressedBitVector::StorageStrategy::Dense: - { - vector bitGroups; - rw::ReadVectorOfPOD(src, bitGroups); - return DenseCBV::BuildFromBitGroups(move(bitGroups)); - } - case CompressedBitVector::StorageStrategy::Sparse: - { - vector setBits; - rw::ReadVectorOfPOD(src, setBits); - return make_unique(move(setBits)); - } + case CompressedBitVector::StorageStrategy::Dense: + { + vector bitGroups; + rw::ReadVectorOfPOD(src, bitGroups); + return DenseCBV::BuildFromBitGroups(move(bitGroups)); + } + case CompressedBitVector::StorageStrategy::Sparse: + { + vector setBits; + rw::ReadVectorOfPOD(src, setBits); + return make_unique(move(setBits)); + } } return unique_ptr(); } @@ -227,19 +227,34 @@ public: CompressedBitVector::StorageStrategy strat = cbv.GetStorageStrategy(); switch (strat) { - case CompressedBitVector::StorageStrategy::Dense: - { - DenseCBV const & denseCBV = static_cast(cbv); - denseCBV.ForEach(f); - return; - } - case CompressedBitVector::StorageStrategy::Sparse: - { - SparseCBV const & sparseCBV = static_cast(cbv); - sparseCBV.ForEach(f); - return; - } + case CompressedBitVector::StorageStrategy::Dense: + { + DenseCBV const & denseCBV = static_cast(cbv); + denseCBV.ForEach(f); + return; + } + case CompressedBitVector::StorageStrategy::Sparse: + { + SparseCBV const & sparseCBV = static_cast(cbv); + sparseCBV.ForEach(f); + return; + } } } }; + +class CompressedBitVectorHasher +{ +public: + static uint64_t Hash(CompressedBitVector const & cbv) + { + uint64_t const kBase = 127; + uint64_t hash = 0; + CompressedBitVectorEnumerator::ForEach(cbv, [&](uint64_t i) + { + hash = hash * kBase + i; + }); + return hash; + } +}; } // namespace coding diff --git a/search/cbv_ptr.cpp b/search/cbv_ptr.cpp index 7bdf63e64a..65eef82fd7 100644 --- a/search/cbv_ptr.cpp +++ b/search/cbv_ptr.cpp @@ -4,6 +4,10 @@ namespace search { CBVPtr::CBVPtr(coding::CompressedBitVector const * p, bool isOwner) { Set(p, isOwner); } +CBVPtr::CBVPtr(CBVPtr && rhs) { *this = move(rhs); } + +CBVPtr::~CBVPtr() { Release(); } + void CBVPtr::Release() { if (m_isOwner) @@ -14,6 +18,12 @@ void CBVPtr::Release() m_isFull = false; } +void CBVPtr::SetFull() +{ + Release(); + m_isFull = true; +} + void CBVPtr::Set(coding::CompressedBitVector const * p, bool isOwner /* = false*/) { Release(); @@ -27,6 +37,22 @@ void CBVPtr::Set(unique_ptr p) Set(p.release(), true /* isOwner */); } +CBVPtr & CBVPtr::operator=(CBVPtr && rhs) noexcept +{ + if (this == &rhs) + return *this; + + m_ptr = rhs.m_ptr; + m_isOwner = rhs.m_isOwner; + m_isFull = rhs.m_isFull; + + rhs.m_ptr = nullptr; + rhs.m_isOwner = false; + rhs.m_isFull = false; + + return *this; +} + void CBVPtr::Union(coding::CompressedBitVector const * p) { if (!p || m_isFull) @@ -36,10 +62,11 @@ void CBVPtr::Union(coding::CompressedBitVector const * p) { m_ptr = p; m_isFull = false; + m_isOwner = false; } else { - Set(coding::CompressedBitVector::Union(*m_ptr, *p).release(), true); + Set(coding::CompressedBitVector::Union(*m_ptr, *p).release(), true /* isOwner */); } } @@ -53,15 +80,27 @@ void CBVPtr::Intersect(coding::CompressedBitVector const * p) if (m_ptr) { - Set(coding::CompressedBitVector::Intersect(*m_ptr, *p).release(), true); + Set(coding::CompressedBitVector::Intersect(*m_ptr, *p).release(), true /* isOwner */); } else if (m_isFull) { m_ptr = p; m_isFull = false; + m_isOwner = false; } } -bool CBVPtr::IsEmpty() const { return !m_isFull && coding::CompressedBitVector::IsEmpty(m_ptr); } +void CBVPtr::CopyTo(CBVPtr & rhs) +{ + rhs.Release(); + if (m_isFull) + { + rhs.SetFull(); + return; + } + + if (m_ptr) + rhs.Set(m_ptr->Clone()); +} } // namespace search diff --git a/search/cbv_ptr.hpp b/search/cbv_ptr.hpp index 3c9cd68fd8..a769ac6886 100644 --- a/search/cbv_ptr.hpp +++ b/search/cbv_ptr.hpp @@ -14,7 +14,7 @@ namespace search /// binary operators logic and takes ownership if needed. class CBVPtr { - DISALLOW_COPY_AND_MOVE(CBVPtr); + DISALLOW_COPY(CBVPtr); coding::CompressedBitVector const * m_ptr = nullptr; bool m_isOwner = false; @@ -25,27 +25,28 @@ class CBVPtr public: CBVPtr() = default; CBVPtr(coding::CompressedBitVector const * p, bool isOwner); - ~CBVPtr() { Release(); } - - inline void SetFull() - { - Release(); - m_isFull = true; - } + CBVPtr(CBVPtr && rhs); + ~CBVPtr(); + void SetFull(); void Set(coding::CompressedBitVector const * p, bool isOwner = false); void Set(unique_ptr p); inline coding::CompressedBitVector const * Get() const { return m_ptr; } - coding::CompressedBitVector const & operator*() const { return *m_ptr; } - coding::CompressedBitVector const * operator->() const { return m_ptr; } + inline coding::CompressedBitVector const & operator*() const { return *m_ptr; } + inline coding::CompressedBitVector const * operator->() const { return m_ptr; } + CBVPtr & operator=(CBVPtr && rhs) noexcept; - bool IsEmpty() const; + inline bool IsEmpty() const { return !m_isFull && coding::CompressedBitVector::IsEmpty(m_ptr); } + inline bool IsFull() const { return m_isFull; } + inline bool IsOwner() const noexcept { return m_isOwner; } void Union(coding::CompressedBitVector const * p); void Intersect(coding::CompressedBitVector const * p); + void CopyTo(CBVPtr & rhs); + template void ForEach(TFn && fn) const { @@ -54,5 +55,4 @@ public: coding::CompressedBitVectorEnumerator::ForEach(*m_ptr, forward(fn)); } }; - } // namespace search diff --git a/search/geocoder.cpp b/search/geocoder.cpp index 29a9af2409..0c5079f6fb 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -411,12 +411,10 @@ Geocoder::Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter, : m_index(index) , m_infoGetter(infoGetter) , m_cancellable(cancellable) - , m_numTokens(0) , m_model(SearchModel::Instance()) , m_pivotRectsCache(kPivotRectsCacheSize, m_cancellable, Processor::kMaxViewportRadiusM) , m_localityRectsCache(kLocalityRectsCacheSize, m_cancellable) , m_pivotFeatures(index) - , m_villages(nullptr) , m_filter(nullptr) , m_matcher(nullptr) , m_finder(m_cancellable) @@ -446,13 +444,10 @@ void Geocoder::SetParams(Params const & params) } m_retrievalParams = m_params; - m_numTokens = m_params.m_tokens.size(); - if (!m_params.m_prefixTokens.empty()) - ++m_numTokens; // Remove all category synonyms for streets, as they're extracted // individually via LoadStreets. - for (size_t i = 0; i < m_numTokens; ++i) + for (size_t i = 0; i < m_params.GetNumTokens(); ++i) { auto & synonyms = m_params.GetTokens(i); ASSERT(!synonyms.empty(), ()); @@ -488,7 +483,7 @@ void Geocoder::GoEverywhere(PreRanker & preRanker) MY_SCOPE_GUARD(stopProfiler, &ProfilerStop); #endif - if (m_numTokens == 0) + if (m_params.GetNumTokens() == 0) return; vector> infos; @@ -499,7 +494,7 @@ void Geocoder::GoEverywhere(PreRanker & preRanker) void Geocoder::GoInViewport(PreRanker & preRanker) { - if (m_numTokens == 0) + if (m_params.GetNumTokens() == 0) return; vector> infos; @@ -533,10 +528,12 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector> & infos // it's ok to save MwmId. m_worldId = handle.GetId(); m_context = make_unique(move(handle)); + if (HasSearchIndex(value)) { - PrepareAddressFeatures(); - FillLocalitiesTable(); + BaseContext ctx; + InitBaseContext(ctx); + FillLocalitiesTable(ctx); } m_context.reset(); } @@ -561,15 +558,13 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector> & infos { ASSERT(context, ()); m_context = move(context); + MY_SCOPE_GUARD(cleanup, [&]() { LOG(LDEBUG, (m_context->GetName(), "geocoding complete.")); m_matcher->OnQueryFinished(); m_matcher = nullptr; m_context.reset(); - m_addressFeatures.clear(); - m_streets = nullptr; - m_villages = nullptr; }); auto it = m_matchersCache.find(m_context->GetId()); @@ -582,7 +577,8 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector> & infos m_matcher = it->second.get(); m_matcher->SetContext(m_context.get()); - PrepareAddressFeatures(); + BaseContext ctx; + InitBaseContext(ctx); coding::CompressedBitVector const * viewportCBV = nullptr; if (inViewport) @@ -590,33 +586,25 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector> & infos if (viewportCBV) { - for (size_t i = 0; i < m_numTokens; ++i) - { - m_addressFeatures[i] = - coding::CompressedBitVector::Intersect(*m_addressFeatures[i], *viewportCBV); - } + for (auto & features : ctx.m_features) + features = coding::CompressedBitVector::Intersect(*features, *viewportCBV); } - // |m_streets| will be initialized in LimitedSearch() and its - // callees, if needed. - m_streets = nullptr; - - m_villages = LoadVillages(*m_context); + ctx.m_villages = LoadVillages(*m_context); auto citiesFromWorld = m_cities; - FillVillageLocalities(); + FillVillageLocalities(ctx); MY_SCOPE_GUARD(remove_villages, [&]() { m_cities = citiesFromWorld; }); - m_usedTokens.assign(m_numTokens, false); m_lastMatchedRegion = nullptr; - MatchRegions(REGION_TYPE_COUNTRY); + MatchRegions(ctx, REGION_TYPE_COUNTRY); if (index < numIntersectingMaps || m_preRanker->IsEmpty()) - MatchAroundPivot(); + MatchAroundPivot(ctx); }; // Iterates through all alive mwms and performs geocoding. @@ -636,17 +624,15 @@ void Geocoder::ClearCaches() m_localityRectsCache.Clear(); m_pivotFeatures.Clear(); - m_addressFeatures.clear(); m_matchersCache.clear(); m_streetsCache.clear(); - m_villages.reset(); m_postcodes.Clear(); } void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) { ASSERT_LESS(curToken, endToken, ()); - ASSERT_LESS_OR_EQUAL(endToken, m_numTokens, ()); + ASSERT_LESS_OR_EQUAL(endToken, m_params.GetNumTokens(), ()); m_retrievalParams.m_tokens.clear(); m_retrievalParams.m_prefixTokens.clear(); @@ -663,15 +649,17 @@ void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) } } -void Geocoder::PrepareAddressFeatures() +void Geocoder::InitBaseContext(BaseContext & ctx) { - m_addressFeatures.resize(m_numTokens); - for (size_t i = 0; i < m_numTokens; ++i) + ctx.m_usedTokens.assign(m_params.GetNumTokens(), false); + ctx.m_numTokens = m_params.GetNumTokens(); + ctx.m_features.resize(ctx.m_numTokens); + for (size_t i = 0; i < ctx.m_features.size(); ++i) { PrepareRetrievalParams(i, i + 1); - m_addressFeatures[i] = RetrieveAddressFeatures(m_context->GetId(), m_context->m_value, - m_cancellable, m_retrievalParams); - ASSERT(m_addressFeatures[i], ()); + ctx.m_features[i] = RetrieveAddressFeatures(m_context->GetId(), m_context->m_value, + m_cancellable, m_retrievalParams); + ASSERT(ctx.m_features[i], ()); } } @@ -688,13 +676,14 @@ void Geocoder::InitLayer(SearchModel::SearchType type, size_t startToken, size_t layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); } -void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter, +void Geocoder::FillLocalityCandidates(BaseContext const & ctx, + coding::CompressedBitVector const * filter, size_t const maxNumLocalities, vector & preLocalities) { preLocalities.clear(); - for (size_t startToken = 0; startToken < m_numTokens; ++startToken) + for (size_t startToken = 0; startToken < ctx.m_numTokens; ++startToken) { CBVPtr intersection; CBVPtr unfilteredIntersection; @@ -703,13 +692,13 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter if (filter) { intersection.Intersect(filter); - unfilteredIntersection.Intersect(m_addressFeatures[startToken].get()); + unfilteredIntersection.Intersect(ctx.m_features[startToken].get()); } - intersection.Intersect(m_addressFeatures[startToken].get()); + intersection.Intersect(ctx.m_features[startToken].get()); if (intersection.IsEmpty()) continue; - for (size_t endToken = startToken + 1; endToken <= m_numTokens; ++endToken) + for (size_t endToken = startToken + 1; endToken <= ctx.m_numTokens; ++endToken) { // Skip locality candidates that match only numbers. if (!m_params.IsNumberTokens(startToken, endToken)) @@ -730,11 +719,11 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter }); } - if (endToken < m_numTokens) + if (endToken < ctx.m_numTokens) { - intersection.Intersect(m_addressFeatures[endToken].get()); + intersection.Intersect(ctx.m_features[endToken].get()); if (filter) - unfilteredIntersection.Intersect(m_addressFeatures[endToken].get()); + unfilteredIntersection.Intersect(ctx.m_features[endToken].get()); if (intersection.IsEmpty()) break; } @@ -746,10 +735,10 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter scorer.GetTopLocalities(maxNumLocalities, preLocalities); } -void Geocoder::FillLocalitiesTable() +void Geocoder::FillLocalitiesTable(BaseContext const & ctx) { vector preLocalities; - FillLocalityCandidates(nullptr, kMaxNumLocalities, preLocalities); + FillLocalityCandidates(ctx, nullptr /* filter */, kMaxNumLocalities, preLocalities); size_t numCities = 0; size_t numStates = 0; @@ -818,10 +807,10 @@ void Geocoder::FillLocalitiesTable() } } -void Geocoder::FillVillageLocalities() +void Geocoder::FillVillageLocalities(BaseContext const & ctx) { vector preLocalities; - FillLocalityCandidates(m_villages.get(), kMaxNumVillages, preLocalities); + FillLocalityCandidates(ctx, ctx.m_villages.get() /* filter */, kMaxNumVillages, preLocalities); size_t numVillages = 0; @@ -874,19 +863,19 @@ void Geocoder::ForEachCountry(vector> const & infos, TFn && } } -void Geocoder::MatchRegions(RegionType type) +void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) { switch (type) { case REGION_TYPE_STATE: // Tries to skip state matching and go to cities matching. // Then, performs states matching. - MatchCities(); + MatchCities(ctx); break; case REGION_TYPE_COUNTRY: // Tries to skip country matching and go to states matching. // Then, performs countries matching. - MatchRegions(REGION_TYPE_STATE); + MatchRegions(ctx, REGION_TYPE_STATE); break; case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); return; } @@ -903,7 +892,7 @@ void Geocoder::MatchRegions(RegionType type) size_t const startToken = p.first.first; size_t const endToken = p.first.second; - if (HasUsedTokensInRange(startToken, endToken)) + if (ctx.HasUsedTokensInRange(startToken, endToken)) continue; for (auto const & region : p.second) @@ -927,8 +916,8 @@ void Geocoder::MatchRegions(RegionType type) if (!matches) continue; - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); - if (AllTokensUsed()) + ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken); + if (ctx.AllTokensUsed()) { // Region matches to search query, we need to emit it as is. EmitResult(region, startToken, endToken); @@ -942,22 +931,22 @@ void Geocoder::MatchRegions(RegionType type) }); switch (type) { - case REGION_TYPE_STATE: MatchCities(); break; - case REGION_TYPE_COUNTRY: MatchRegions(REGION_TYPE_STATE); break; + case REGION_TYPE_STATE: MatchCities(ctx); break; + case REGION_TYPE_COUNTRY: MatchRegions(ctx, REGION_TYPE_STATE); break; case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); break; } } } } -void Geocoder::MatchCities() +void Geocoder::MatchCities(BaseContext & ctx) { // Localities are ordered my (m_startToken, m_endToken) pairs. for (auto const & p : m_cities) { size_t const startToken = p.first.first; size_t const endToken = p.first.second; - if (HasUsedTokensInRange(startToken, endToken)) + if (ctx.HasUsedTokensInRange(startToken, endToken)) continue; for (auto const & city : p.second) @@ -970,8 +959,8 @@ void Geocoder::MatchCities() continue; } - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); - if (AllTokensUsed()) + ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken); + if (ctx.AllTokensUsed()) { // City matches to search query, we need to emit it as is. EmitResult(city, startToken, endToken); @@ -989,12 +978,12 @@ void Geocoder::MatchCities() continue; LocalityFilter filter(*cityFeatures); - LimitedSearch(filter); + LimitedSearch(ctx, filter); } } } -void Geocoder::MatchAroundPivot() +void Geocoder::MatchAroundPivot(BaseContext & ctx) { auto const * features = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT); @@ -1002,10 +991,10 @@ void Geocoder::MatchAroundPivot() return; ViewportFilter filter(*features, m_preRanker->Limit() /* threshold */); - LimitedSearch(filter); + LimitedSearch(ctx, filter); } -void Geocoder::LimitedSearch(FeaturesFilter const & filter) +void Geocoder::LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter) { m_filter = &filter; MY_SCOPE_GUARD(resetFilter, [&]() @@ -1013,36 +1002,36 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter) m_filter = nullptr; }); - if (!m_streets) - m_streets = LoadStreets(*m_context); + if (!ctx.m_streets) + ctx.m_streets = LoadStreets(*m_context); - MatchUnclassified(0 /* curToken */); + MatchUnclassified(ctx, 0 /* curToken */); - auto search = [this]() + auto search = [this, &ctx]() { - GreedilyMatchStreets(); - MatchPOIsAndBuildings(0 /* curToken */); + GreedilyMatchStreets(ctx); + MatchPOIsAndBuildings(ctx, 0 /* curToken */); }; - WithPostcodes(search); + WithPostcodes(ctx, search); search(); } template -void Geocoder::WithPostcodes(TFn && fn) +void Geocoder::WithPostcodes(BaseContext & ctx, TFn && fn) { size_t const maxPostcodeTokens = GetMaxNumTokensInPostcode(); - for (size_t startToken = 0; startToken != m_numTokens; ++startToken) + for (size_t startToken = 0; startToken != ctx.m_numTokens; ++startToken) { size_t endToken = startToken; - for (size_t n = 1; startToken + n <= m_numTokens && n <= maxPostcodeTokens; ++n) + for (size_t n = 1; startToken + n <= ctx.m_numTokens && n <= maxPostcodeTokens; ++n) { - if (m_usedTokens[startToken + n - 1]) + if (ctx.m_usedTokens[startToken + n - 1]) break; TokenSlice slice(m_params, startToken, startToken + n); - auto const isPrefix = startToken + n == m_numTokens; + auto const isPrefix = startToken + n == ctx.m_numTokens; if (LooksLikePostcode(QuerySlice(slice), isPrefix)) endToken = startToken + n; } @@ -1058,7 +1047,7 @@ void Geocoder::WithPostcodes(TFn && fn) if (!coding::CompressedBitVector::IsEmpty(postcodes)) { - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); + ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken); m_postcodes.Clear(); m_postcodes.m_startToken = startToken; @@ -1070,122 +1059,41 @@ void Geocoder::WithPostcodes(TFn && fn) } } -void Geocoder::GreedilyMatchStreets() +void Geocoder::GreedilyMatchStreets(BaseContext & ctx) { - for (size_t startToken = 0; startToken < m_numTokens; ++startToken) - { - if (m_usedTokens[startToken]) - continue; + vector predictions; + StreetsMatcher::Go(ctx, *m_filter, m_params, predictions); - // Here we try to match as many tokens as possible while - // intersection is a non-empty bit vector of streets. Single - // tokens that are synonyms to streets are ignored. Moreover, - // each time a token that looks like a beginning of a house number - // is met, we try to use current intersection of tokens as a - // street layer and try to match BUILDINGs or POIs. - CBVPtr allFeatures(m_streets, false /* isOwner */); - - size_t curToken = startToken; - - // This variable is used for prevention of duplicate calls to - // CreateStreetsLayerAndMatchLowerLayers() with the same - // arguments. - size_t lastToken = startToken; - - // When true, no bit vectors were intersected with allFeatures at - // all. - bool emptyIntersection = true; - - // When true, allFeatures is in the incomplete state and can't be - // used for creation of street layers. - bool incomplete = false; - - auto createStreetsLayerAndMatchLowerLayers = [&]() - { - if (!allFeatures.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken) - { - CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, *allFeatures); - lastToken = curToken; - } - }; - - StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag) - { - auto buffer = coding::CompressedBitVector::Intersect( - *allFeatures, *m_addressFeatures[tag]); - if (tag < curToken) - { - // This is the case for delayed - // street synonym. Therefore, - // allFeatures is temporarily in the - // incomplete state. - allFeatures.Set(move(buffer)); - emptyIntersection = false; - - incomplete = true; - return; - } - ASSERT_EQUAL(tag, curToken, ()); - - // |allFeatures| will become empty - // after the intersection. Therefore - // we need to create streets layer - // right now. - if (coding::CompressedBitVector::IsEmpty(buffer)) - createStreetsLayerAndMatchLowerLayers(); - - allFeatures.Set(move(buffer)); - emptyIntersection = false; - incomplete = false; - }); - - for (; curToken < m_numTokens && !m_usedTokens[curToken] && !allFeatures.IsEmpty(); ++curToken) - { - auto const & token = m_params.GetTokens(curToken).front(); - bool const isPrefix = curToken >= m_params.m_tokens.size(); - - if (house_numbers::LooksLikeHouseNumber(token, isPrefix)) - createStreetsLayerAndMatchLowerLayers(); - - filter.Put(token, isPrefix, curToken); - } - createStreetsLayerAndMatchLowerLayers(); - } + for (auto const & prediction : predictions) + CreateStreetsLayerAndMatchLowerLayers(ctx, prediction); } -void Geocoder::CreateStreetsLayerAndMatchLowerLayers( - size_t startToken, size_t endToken, coding::CompressedBitVector const & features) +void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx, + StreetsMatcher::Prediction const & prediction) { ASSERT(m_layers.empty(), ()); - if (coding::CompressedBitVector::IsEmpty(&features)) - return; - - CBVPtr filtered(&features, false /* isOwner */); - if (m_filter->NeedToFilter(features)) - filtered.Set(m_filter->Filter(features).release(), true /* isOwner */); - m_layers.emplace_back(); MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); auto & layer = m_layers.back(); - InitLayer(SearchModel::SEARCH_TYPE_STREET, startToken, endToken, layer); + InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_startToken, prediction.m_endToken, layer); vector sortedFeatures; - sortedFeatures.reserve(features.PopCount()); - filtered.ForEach(MakeBackInsertFunctor(sortedFeatures)); + sortedFeatures.reserve(prediction.m_features->PopCount()); + prediction.m_features.ForEach(MakeBackInsertFunctor(sortedFeatures)); layer.m_sortedFeatures = &sortedFeatures; - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); - MatchPOIsAndBuildings(0 /* curToken */); + ScopedMarkTokens mark(ctx.m_usedTokens, prediction.m_startToken, prediction.m_endToken); + MatchPOIsAndBuildings(ctx, 0 /* curToken */); } -void Geocoder::MatchPOIsAndBuildings(size_t curToken) +void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) { BailIfCancelled(); - curToken = SkipUsedTokens(curToken); - if (curToken == m_numTokens) + curToken = ctx.SkipUsedTokens(curToken); + if (curToken == ctx.m_numTokens) { // All tokens were consumed, find paths through layers, emit // features. @@ -1203,7 +1111,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) filtered.Set(m_postcodes.m_features.get(), false /* isOwner */); filtered.ForEach([&](uint32_t id) { - EmitResult(m_context->GetId(), id, GetSearchTypeInGeocoding(id), + EmitResult(m_context->GetId(), id, GetSearchTypeInGeocoding(ctx, id), m_postcodes.m_startToken, m_postcodes.m_endToken); }); return; @@ -1257,7 +1165,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) // any. auto clusterize = [&](uint32_t featureId) { - auto const searchType = GetSearchTypeInGeocoding(featureId); + auto const searchType = GetSearchTypeInGeocoding(ctx, featureId); // All SEARCH_TYPE_CITY features were filtered in // MatchCities(). All SEARCH_TYPE_STREET features were @@ -1273,7 +1181,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) features.SetFull(); // Try to consume [curToken, m_numTokens) tokens range. - for (size_t n = 1; curToken + n <= m_numTokens && !m_usedTokens[curToken + n - 1]; ++n) + for (size_t n = 1; curToken + n <= ctx.m_numTokens && !ctx.m_usedTokens[curToken + n - 1]; ++n) { // At this point |features| is the intersection of // m_addressFeatures[curToken], m_addressFeatures[curToken + 1], @@ -1286,7 +1194,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) InitLayer(layer.m_type, curToken, curToken + n, layer); } - features.Intersect(m_addressFeatures[curToken + n - 1].get()); + features.Intersect(ctx.m_features[curToken + n - 1].get()); ASSERT(features.Get(), ()); CBVPtr filtered; @@ -1357,7 +1265,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) layer.m_type = static_cast(i); if (IsLayerSequenceSane()) - MatchPOIsAndBuildings(curToken + n); + MatchPOIsAndBuildings(ctx, curToken + n); } } } @@ -1505,7 +1413,7 @@ void Geocoder::FillMissingFieldsInResults() } } -void Geocoder::MatchUnclassified(size_t curToken) +void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken) { ASSERT(m_layers.empty(), ()); @@ -1516,28 +1424,28 @@ void Geocoder::MatchUnclassified(size_t curToken) // adjacent tokens will be matched to "Hyde Park", whereas it's not // ok to match something to "Park London Hyde", because tokens // "Park" and "Hyde" are not adjacent. - if (NumUnusedTokensGroups() != 1) + if (ctx.NumUnusedTokenGroups() != 1) return; CBVPtr allFeatures; allFeatures.SetFull(); auto startToken = curToken; - for (curToken = SkipUsedTokens(curToken); curToken < m_numTokens && !m_usedTokens[curToken]; - ++curToken) + for (curToken = ctx.SkipUsedTokens(curToken); + curToken < ctx.m_numTokens && !ctx.m_usedTokens[curToken]; ++curToken) { - allFeatures.Intersect(m_addressFeatures[curToken].get()); + allFeatures.Intersect(ctx.m_features[curToken].get()); } if (m_filter->NeedToFilter(*allFeatures)) - allFeatures.Set(m_filter->Filter(*allFeatures).release(), true /* isOwner */); + allFeatures.Set(m_filter->Filter(*allFeatures)); if (allFeatures.IsEmpty()) return; auto emitUnclassified = [&](uint32_t featureId) { - auto type = GetSearchTypeInGeocoding(featureId); + auto type = GetSearchTypeInGeocoding(ctx, featureId); if (type == SearchModel::SEARCH_TYPE_UNCLASSIFIED) EmitResult(m_context->GetId(), featureId, type, startToken, curToken); }; @@ -1614,11 +1522,15 @@ coding::CompressedBitVector const * Geocoder::RetrieveGeometryFeatures(MwmContex } } -SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId) +SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(BaseContext const & ctx, + uint32_t featureId) { - if (m_streets->GetBit(featureId)) + ASSERT(ctx.m_streets, ()); + ASSERT(ctx.m_villages, ()); + + if (ctx.m_streets->GetBit(featureId)) return SearchModel::SEARCH_TYPE_STREET; - if (m_villages->GetBit(featureId)) + if (ctx.m_villages->GetBit(featureId)) return SearchModel::SEARCH_TYPE_VILLAGE; FeatureType feature; @@ -1626,34 +1538,6 @@ SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId) return m_model.GetSearchType(feature); } -bool Geocoder::AllTokensUsed() const -{ - return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor()); -} - -bool Geocoder::HasUsedTokensInRange(size_t from, size_t to) const -{ - return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor()); -} - -size_t Geocoder::NumUnusedTokensGroups() const -{ - size_t numGroups = 0; - for (size_t i = 0; i < m_usedTokens.size(); ++i) - { - if (!m_usedTokens[i] && (i == 0 || m_usedTokens[i - 1])) - ++numGroups; - } - return numGroups; -} - -size_t Geocoder::SkipUsedTokens(size_t curToken) const -{ - while (curToken != m_usedTokens.size() && m_usedTokens[curToken]) - ++curToken; - return curToken; -} - string DebugPrint(Geocoder::Locality const & locality) { ostringstream os; diff --git a/search/geocoder.hpp b/search/geocoder.hpp index 79e549bba6..876f5b762b 100644 --- a/search/geocoder.hpp +++ b/search/geocoder.hpp @@ -3,6 +3,7 @@ #include "search/cancel_exception.hpp" #include "search/features_layer.hpp" #include "search/features_layer_path_finder.hpp" +#include "search/geocoder_context.hpp" #include "search/geometry_cache.hpp" #include "search/mode.hpp" #include "search/model.hpp" @@ -11,6 +12,7 @@ #include "search/pre_ranking_info.hpp" #include "search/query_params.hpp" #include "search/ranking_utils.hpp" +#include "search/streets_matcher.hpp" #include "indexer/index.hpp" #include "indexer/mwm_set.hpp" @@ -195,60 +197,57 @@ private: // Creates a cache of posting lists corresponding to features in m_context // for each token and saves it to m_addressFeatures. - void PrepareAddressFeatures(); + void InitBaseContext(BaseContext & ctx); void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, FeaturesLayer & layer); - void FillLocalityCandidates(coding::CompressedBitVector const * filter, + void FillLocalityCandidates(BaseContext const & ctx, coding::CompressedBitVector const * filter, size_t const maxNumLocalities, vector & preLocalities); - void FillLocalitiesTable(); + void FillLocalitiesTable(BaseContext const & ctx); - void FillVillageLocalities(); + void FillVillageLocalities(BaseContext const & ctx); template void ForEachCountry(vector> const & infos, TFn && fn); // Throws CancelException if cancelled. - inline void BailIfCancelled() - { - ::search::BailIfCancelled(m_cancellable); - } + inline void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); } // Tries to find all countries and states in a search query and then // performs matching of cities in found maps. - void MatchRegions(RegionType type); + void MatchRegions(BaseContext & ctx, RegionType type); // Tries to find all cities in a search query and then performs // matching of streets in found cities. - void MatchCities(); + void MatchCities(BaseContext & ctx); // Tries to do geocoding without localities, ie. find POIs, // BUILDINGs and STREETs without knowledge about country, state, // city or village. If during the geocoding too many features are // retrieved, viewport is used to throw away excess features. - void MatchAroundPivot(); + void MatchAroundPivot(BaseContext & ctx); // Tries to do geocoding in a limited scope, assuming that knowledge // about high-level features, like cities or countries, is // incorporated into |filter|. - void LimitedSearch(FeaturesFilter const & filter); + void LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter); template - void WithPostcodes(TFn && fn); + void WithPostcodes(BaseContext & ctx, TFn && fn); // Tries to match some adjacent tokens in the query as streets and // then performs geocoding in street vicinities. - void GreedilyMatchStreets(); + void GreedilyMatchStreets(BaseContext & ctx); - void CreateStreetsLayerAndMatchLowerLayers(size_t startToken, size_t endToken, - coding::CompressedBitVector const & features); + void CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx, + StreetsMatcher::Prediction const & prediction); // Tries to find all paths in a search tree, where each edge is // marked with some substring of the query tokens. These paths are // called "layer sequence" and current path is stored in |m_layers|. - void MatchPOIsAndBuildings(size_t curToken); + void MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken); // Returns true if current path in the search tree (see comment for // MatchPOIsAndBuildings()) looks sane. This method is used as a fast @@ -271,7 +270,7 @@ private: // Tries to match unclassified objects from lower layers, like // parks, forests, lakes, rivers, etc. This method finds all // UNCLASSIFIED objects that match to all currently unused tokens. - void MatchUnclassified(size_t curToken); + void MatchUnclassified(BaseContext & ctx, size_t curToken); unique_ptr LoadCategories( MwmContext & context, vector const & categories); @@ -290,21 +289,7 @@ private: // This is a faster wrapper around SearchModel::GetSearchType(), as // it uses pre-loaded lists of streets and villages. - SearchModel::SearchType GetSearchTypeInGeocoding(uint32_t featureId); - - // Returns true iff all tokens are used. - bool AllTokensUsed() const; - - // Returns true if there exists at least one used token in [from, - // to). - bool HasUsedTokensInRange(size_t from, size_t to) const; - - // Counts number of groups of consecutive unused tokens. - size_t NumUnusedTokensGroups() const; - - // Advances |curToken| to the nearest unused token, or to the end of - // |m_usedTokens| if there are no unused tokens. - size_t SkipUsedTokens(size_t curToken) const; + SearchModel::SearchType GetSearchTypeInGeocoding(BaseContext const & ctx, uint32_t featureId); Index & m_index; @@ -315,9 +300,6 @@ private: // Geocoder params. Params m_params; - // Total number of search query tokens. - size_t m_numTokens; - // This field is used to map features to a limited number of search // classes. SearchModel const & m_model; @@ -344,30 +326,12 @@ private: // Cache of nested rects used to estimate distance from a feature to the pivot. NestedRectsCache m_pivotFeatures; - // Cache of posting lists for each token in the query. TODO (@y, - // @m, @vng): consider to update this cache lazily, as user inputs - // tokens one-by-one. - vector> m_addressFeatures; - // Cache of street ids in mwms. map> m_streetsCache; - // Street features in the mwm that is currently being processed. - // The initialization of m_streets is postponed in order to gain - // some speed. Therefore m_streets may be used only in - // LimitedSearch() and in all its callees. - coding::CompressedBitVector const * m_streets; - - // Village features in the mwm that is currently being processed. - unique_ptr m_villages; - // Postcodes features in the mwm that is currently being processed. Postcodes m_postcodes; - // This vector is used to indicate what tokens were matched by - // locality and can't be re-used during the geocoding process. - vector m_usedTokens; - // This filter is used to throw away excess features. FeaturesFilter const * m_filter; diff --git a/search/geocoder_context.cpp b/search/geocoder_context.cpp new file mode 100644 index 0000000000..9f03510916 --- /dev/null +++ b/search/geocoder_context.cpp @@ -0,0 +1,36 @@ +#include "search/geocoder_context.hpp" + +#include "base/stl_add.hpp" + +#include "std/algorithm.hpp" + +namespace search +{ +size_t BaseContext::SkipUsedTokens(size_t curToken) const +{ + while (curToken != m_usedTokens.size() && m_usedTokens[curToken]) + ++curToken; + return curToken; +} + +bool BaseContext::AllTokensUsed() const +{ + return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor()); +} + +bool BaseContext::HasUsedTokensInRange(size_t from, size_t to) const +{ + return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor()); +} + +size_t BaseContext::NumUnusedTokenGroups() const +{ + size_t numGroups = 0; + for (size_t i = 0; i < m_usedTokens.size(); ++i) + { + if (!m_usedTokens[i] && (i == 0 || m_usedTokens[i - 1])) + ++numGroups; + } + return numGroups; +} +} // namespace search diff --git a/search/geocoder_context.hpp b/search/geocoder_context.hpp new file mode 100644 index 0000000000..ab43007047 --- /dev/null +++ b/search/geocoder_context.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include "coding/compressed_bit_vector.hpp" + +#include "std/unique_ptr.hpp" +#include "std/vector.hpp" + +namespace search +{ +class FeaturesFilter; + +struct BaseContext +{ + // Advances |curToken| to the nearest unused token, or to the end of + // |m_usedTokens| if there are no unused tokens. + size_t SkipUsedTokens(size_t curToken) const; + + // Returns true iff all tokens are used. + bool AllTokensUsed() const; + + // Returns true if there exists at least one used token in [from, + // to). + bool HasUsedTokensInRange(size_t from, size_t to) const; + + // Counts number of groups of consecutive unused tokens. + size_t NumUnusedTokenGroups() const; + + vector> m_features; + unique_ptr m_villages; + coding::CompressedBitVector const * m_streets = nullptr; + + // This vector is used to indicate what tokens were matched by + // locality and can't be re-used during the geocoding process. + vector m_usedTokens; + + size_t m_numTokens = 0; +}; +} // namespace search diff --git a/search/mwm_context.hpp b/search/mwm_context.hpp index 200f5cd053..ca9b50b524 100644 --- a/search/mwm_context.hpp +++ b/search/mwm_context.hpp @@ -90,5 +90,4 @@ private: DISALLOW_COPY_AND_MOVE(MwmContext); }; - } // namespace search diff --git a/search/query_params.hpp b/search/query_params.hpp index f3bc6f90eb..2f00bc6c34 100644 --- a/search/query_params.hpp +++ b/search/query_params.hpp @@ -36,6 +36,10 @@ struct QueryParams TSynonymsVector const & GetTokens(size_t i) const; TSynonymsVector & GetTokens(size_t i); + inline size_t GetNumTokens() const + { + return m_prefixTokens.empty() ? m_tokens.size() : m_tokens.size() + 1; + } /// @return true if all tokens in [start, end) range has number synonym. bool IsNumberTokens(size_t start, size_t end) const; diff --git a/search/search.pro b/search/search.pro index 6be1bd7a85..5ddaf35a28 100644 --- a/search/search.pro +++ b/search/search.pro @@ -22,6 +22,7 @@ HEADERS += \ features_layer_matcher.hpp \ features_layer_path_finder.hpp \ geocoder.hpp \ + geocoder_context.hpp \ geometry_cache.hpp \ geometry_utils.hpp \ house_detector.hpp \ @@ -60,6 +61,7 @@ HEADERS += \ search_trie.hpp \ stats_cache.hpp \ street_vicinity_loader.hpp \ + streets_matcher.hpp \ string_intersection.hpp \ suggest.hpp \ token_slice.hpp \ @@ -75,6 +77,7 @@ SOURCES += \ features_layer_matcher.cpp \ features_layer_path_finder.cpp \ geocoder.cpp \ + geocoder_context.cpp \ geometry_cache.cpp \ geometry_utils.cpp \ house_detector.cpp \ @@ -108,5 +111,6 @@ SOURCES += \ retrieval.cpp \ reverse_geocoder.cpp \ street_vicinity_loader.cpp \ + streets_matcher.cpp \ token_slice.cpp \ types_skipper.cpp \ diff --git a/search/streets_matcher.cpp b/search/streets_matcher.cpp new file mode 100644 index 0000000000..7d8125437c --- /dev/null +++ b/search/streets_matcher.cpp @@ -0,0 +1,173 @@ +#include "search/streets_matcher.hpp" +#include "search/features_filter.hpp" +#include "search/house_numbers_matcher.hpp" +#include "search/query_params.hpp" + +#include "indexer/search_string_utils.hpp" + +#include "base/logging.hpp" +#include "base/stl_helpers.hpp" + +namespace search +{ +namespace +{ +bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Prediction const & rhs) +{ + if (lhs.m_hash != rhs.m_hash) + return lhs.m_hash < rhs.m_hash; + + if (lhs.m_prob != rhs.m_prob) + return lhs.m_prob > rhs.m_prob; + + if (lhs.GetNumTokens() != rhs.GetNumTokens()) + return lhs.GetNumTokens() > rhs.GetNumTokens(); + + return lhs.m_startToken < rhs.m_startToken; +} +} // namespace + +// static +void StreetsMatcher::Go(BaseContext const & ctx, FeaturesFilter const & filter, + QueryParams const & params, vector & predictions) +{ + size_t const kMaxNumPredictions = 3; + double const kTailProbability = 0.05; + + predictions.clear(); + FindStreets(ctx, filter, params, predictions); + + if (predictions.empty()) + return; + + sort(predictions.begin(), predictions.end(), &LessByHash); + predictions.erase( + unique(predictions.begin(), predictions.end(), my::EqualsBy(&Prediction::m_hash)), + predictions.end()); + + sort(predictions.rbegin(), predictions.rend(), my::LessBy(&Prediction::m_prob)); + while (predictions.size() > kMaxNumPredictions && predictions.back().m_prob < kTailProbability) + predictions.pop_back(); +} + +// static +void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & filter, + QueryParams const & params, vector & predictions) +{ + for (size_t startToken = 0; startToken < ctx.m_numTokens; ++startToken) + { + if (ctx.m_usedTokens[startToken]) + continue; + + // Here we try to match as many tokens as possible while + // intersection is a non-empty bit vector of streets. Single + // tokens that are synonyms to streets are ignored. Moreover, + // each time a token that looks like a beginning of a house number + // is met, we try to use current intersection of tokens as a + // street layer and try to match BUILDINGs or POIs. + CBVPtr streets(ctx.m_streets, false /* isOwner */); + + CBVPtr all; + all.SetFull(); + + size_t curToken = startToken; + + // This variable is used for prevention of duplicate calls to + // CreateStreetsLayerAndMatchLowerLayers() with the same + // arguments. + size_t lastToken = startToken; + + // When true, no bit vectors were intersected with |streets| at all. + bool emptyIntersection = true; + + // When true, |streets| is in the incomplete state and can't be + // used for creation of street layers. + bool incomplete = false; + + auto emit = [&]() + { + if (!streets.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken) + { + CBVPtr fs(streets.Get(), false /* isOwner */); + CBVPtr fa(all.Get(), false /* isOwner */); + + ASSERT(!fs.IsFull(), ()); + ASSERT(!fa.IsFull(), ()); + + if (filter.NeedToFilter(*fs)) + fs.Set(filter.Filter(*fs)); + + if (fs.IsEmpty()) + return; + + if (filter.NeedToFilter(*fa)) + { + fa.Set(filter.Filter(*fa)); + fa.Union(fs.Get()); + } + + predictions.emplace_back(); + auto & prediction = predictions.back(); + + prediction.m_startToken = startToken; + prediction.m_endToken = curToken; + + ASSERT_LESS_OR_EQUAL(fs->PopCount(), fa->PopCount(), ()); + prediction.m_prob = + static_cast(fs->PopCount()) / static_cast(fa->PopCount()); + + if (fs.IsOwner()) + prediction.m_features = move(fs); + else + fs.CopyTo(prediction.m_features); + + prediction.m_hash = coding::CompressedBitVectorHasher::Hash(*prediction.m_features); + } + }; + + StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag) + { + auto buffer = coding::CompressedBitVector::Intersect( + *streets, *ctx.m_features[tag]); + if (tag < curToken) + { + // This is the case for delayed + // street synonym. Therefore, + // |streets| is temporarily in the + // incomplete state. + streets.Set(move(buffer)); + all.Intersect(ctx.m_features[tag].get()); + emptyIntersection = false; + + incomplete = true; + return; + } + ASSERT_EQUAL(tag, curToken, ()); + + // |streets| will become empty after + // the intersection. Therefore we need + // to create streets layer right now. + if (coding::CompressedBitVector::IsEmpty(buffer)) + emit(); + + streets.Set(move(buffer)); + all.Intersect(ctx.m_features[tag].get()); + emptyIntersection = false; + incomplete = false; + }); + + for (; curToken < ctx.m_numTokens && !ctx.m_usedTokens[curToken] && !streets.IsEmpty(); + ++curToken) + { + auto const & token = params.GetTokens(curToken).front(); + bool const isPrefix = curToken >= params.m_tokens.size(); + + if (house_numbers::LooksLikeHouseNumber(token, isPrefix)) + emit(); + + filter.Put(token, isPrefix, curToken); + } + emit(); + } +} +} // namespace search diff --git a/search/streets_matcher.hpp b/search/streets_matcher.hpp new file mode 100644 index 0000000000..43062db676 --- /dev/null +++ b/search/streets_matcher.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include "search/cbv_ptr.hpp" +#include "search/geocoder_context.hpp" + +#include "std/vector.hpp" + +namespace search +{ +class FeaturesFilter; +struct QueryParams; + +class StreetsMatcher +{ +public: + struct Prediction + { + inline size_t GetNumTokens() const { return m_endToken - m_startToken; } + + CBVPtr m_features; + + size_t m_startToken = 0; + size_t m_endToken = 0; + + double m_prob = 0.0; + + uint64_t m_hash = 0; + }; + + static void Go(BaseContext const & ctx, FeaturesFilter const & filter, QueryParams const & params, + vector & predictions); + +private: + static void FindStreets(BaseContext const & ctx, FeaturesFilter const & filter, + QueryParams const & params, vector & prediction); +}; +} // namespace search