diff --git a/search/bookmarks/processor.hpp b/search/bookmarks/processor.hpp index 7bcc763402..a026d31183 100644 --- a/search/bookmarks/processor.hpp +++ b/search/bookmarks/processor.hpp @@ -51,9 +51,7 @@ private: void Retrieve(QueryParams::Token const & token, Fn && fn) const { SearchTrieRequest request; - token.ForEachSynonym([&request](strings::UniString const & s) { - request.m_names.emplace_back(BuildLevenshteinDFA(s)); - }); + FillRequestFromToken(token, request); request.m_langs.insert(StringUtf8Multilang::kDefaultCode); MatchFeaturesInTrie( diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index 5ac0bb6ca3..f9ca26296f 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -334,7 +334,7 @@ void MatchPostcodesInTrie(TokenSlice const & slice, trie::Iterator co // 2. For prefix match query like "streetname 40" where |streetname| is located in 40xxx // postcode zone will give all street vicinity as the result which is wrong. std::vector dfas; - slice.Get(i).ForEachSynonym([&dfas](UniString const & s) { dfas.emplace_back(s); }); + slice.Get(i).ForOriginalAndSynonyms([&dfas](UniString const & s) { dfas.emplace_back(s); }); MatchInTrie(dfas, TrieRootPrefix(*postcodesRoot, edge), intersector); intersector.NextStep(); diff --git a/search/geocoder.cpp b/search/geocoder.cpp index bcd6b2821f..41ddea4fde 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -366,9 +366,7 @@ void Geocoder::SetParams(Params const & params) { m_tokenRequests.emplace_back(); auto & request = m_tokenRequests.back(); - m_params.GetToken(i).ForEachSynonym([&request](UniString const & s) { - request.m_names.emplace_back(BuildLevenshteinDFA(s)); - }); + FillRequestFromToken(m_params.GetToken(i), request); for (auto const & index : m_params.GetTypeIndices(i)) request.m_categories.emplace_back(FeatureTypeToString(index)); request.SetLangs(m_params.GetLangs()); @@ -376,9 +374,7 @@ void Geocoder::SetParams(Params const & params) else { auto & request = m_prefixTokenRequest; - m_params.GetToken(i).ForEachSynonym([&request](UniString const & s) { - request.m_names.emplace_back(BuildLevenshteinDFA(s)); - }); + FillRequestFromToken(m_params.GetToken(i), request); for (auto const & index : m_params.GetTypeIndices(i)) request.m_categories.emplace_back(FeatureTypeToString(index)); request.SetLangs(m_params.GetLangs()); diff --git a/search/locality_scorer.cpp b/search/locality_scorer.cpp index 50debcb535..bda1e02010 100644 --- a/search/locality_scorer.cpp +++ b/search/locality_scorer.cpp @@ -22,6 +22,8 @@ using namespace std; using namespace strings; +using PrefixDFA = PrefixDFAModifier; + namespace search { namespace @@ -30,7 +32,7 @@ class IdfMapDelegate : public IdfMap::Delegate { public: IdfMapDelegate(vector> const & tokensToDf, - vector, uint64_t>> const & prefixToDf) + vector> const & prefixToDf) : m_tokensToDf(tokensToDf), m_prefixToDf(prefixToDf) { } @@ -63,7 +65,7 @@ public: private: vector> const & m_tokensToDf; - vector, uint64_t>> const & m_prefixToDf; + vector> const & m_prefixToDf; }; } // namespace @@ -94,7 +96,7 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte vector intersections(ctx.m_numTokens); vector> tokensToDf; - vector, uint64_t>> prefixToDf; + vector> prefixToDf; bool const havePrefix = ctx.m_numTokens > 0 && m_params.LastTokenIsPrefix(); size_t const nonPrefixTokens = havePrefix ? ctx.m_numTokens - 1 : ctx.m_numTokens; for (size_t i = 0; i < nonPrefixTokens; ++i) @@ -103,8 +105,10 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte auto const df = intersections.back().m_features.PopCount(); if (df != 0) { - m_params.GetToken(i).ForEachSynonym([&tokensToDf, &df](UniString const & s) { - tokensToDf.emplace_back(BuildLevenshteinDFA(s), df); + auto const & token = m_params.GetToken(i); + tokensToDf.emplace_back(BuildLevenshteinDFA(token.GetOriginal()), df); + token.ForEachSynonym([&tokensToDf, &df](UniString const & s) { + tokensToDf.emplace_back(strings::LevenshteinDFA(s, 0 /* maxErrors */), df); }); } } @@ -116,9 +120,10 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte auto const prefixDf = intersections.back().m_features.PopCount(); if (prefixDf != 0) { - m_params.GetToken(count).ForEachSynonym([&prefixToDf, &prefixDf](UniString const & s) { - prefixToDf.emplace_back(PrefixDFAModifier(BuildLevenshteinDFA(s)), - prefixDf); + auto const & token = m_params.GetToken(count); + prefixToDf.emplace_back(PrefixDFA(BuildLevenshteinDFA(token.GetOriginal())), prefixDf); + token.ForEachSynonym([&prefixToDf, &prefixDf](UniString const & s) { + prefixToDf.emplace_back(PrefixDFA(strings::LevenshteinDFA(s, 0 /* maxErrors */)), prefixDf); }); } } diff --git a/search/query_params.cpp b/search/query_params.cpp index e174453ae6..1dd88f7d7e 100644 --- a/search/query_params.cpp +++ b/search/query_params.cpp @@ -102,17 +102,7 @@ bool QueryParams::IsNumberTokens(TokenRange const & range) const for (size_t i : range) { - bool number = false; - GetToken(i).ForEachSynonym([&number](String const & s) { - if (feature::IsNumber(s)) - { - number = true; - return false; // breaks ForEach - } - return true; // continues ForEach - }); - - if (!number) + if (!GetToken(i).AnyOfOriginalOrSynonyms([](String const & s) { return feature::IsNumber(s); })) return false; } diff --git a/search/query_params.hpp b/search/query_params.hpp index 6d91f1c128..847656050b 100644 --- a/search/query_params.hpp +++ b/search/query_params.hpp @@ -36,36 +36,34 @@ public: void AddSynonym(std::string const & s); void AddSynonym(String const & s); - // Calls |fn| on the original token and on synonyms. template std::enable_if_t, void>::value> ForEachSynonym( Fn && fn) const { - fn(m_original); std::for_each(m_synonyms.begin(), m_synonyms.end(), std::forward(fn)); } - // Calls |fn| on the original token and on synonyms until |fn| return false. template - std::enable_if_t, bool>::value> ForEachSynonym( - Fn && fn) const + std::enable_if_t, void>::value> + ForOriginalAndSynonyms(Fn && fn) const { - if (!fn(m_original)) - return; - for (auto const & synonym : m_synonyms) - { - if (!fn(synonym)) - return; - } + fn(m_original); + ForEachSynonym(std::forward(fn)); } template std::enable_if_t, bool>::value, bool> AnyOfSynonyms( Fn && fn) const + { + return std::any_of(m_synonyms.begin(), m_synonyms.end(), std::forward(fn)); + } + + template + std::enable_if_t, bool>::value, bool> + AnyOfOriginalOrSynonyms(Fn && fn) const { if (fn(m_original)) return true; - return std::any_of(m_synonyms.begin(), m_synonyms.end(), std::forward(fn)); } diff --git a/search/ranker.cpp b/search/ranker.cpp index d565c535a8..ccba47588f 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -159,11 +159,7 @@ pair MatchTokenRange(FeatureType & ft, Geocoder::Params cons for (auto const token : range) { - ErrorsMade tokenErrors; - params.GetToken(token).ForEachSynonym([&](strings::UniString const & s) { - tokenErrors = ErrorsMade::Max(tokenErrors, ErrorsMade{GetMaxErrorsForToken(s)}); - }); - errorsMade += tokenErrors; + errorsMade += ErrorsMade{GetMaxErrorsForToken(params.GetToken(token).GetOriginal())}; matchedLength += params.GetToken(token).GetOriginal().size(); } diff --git a/search/ranking_utils.cpp b/search/ranking_utils.cpp index 182dcccc3d..e4fb738bb8 100644 --- a/search/ranking_utils.cpp +++ b/search/ranking_utils.cpp @@ -71,32 +71,30 @@ namespace impl { ErrorsMade GetErrorsMade(QueryParams::Token const & token, strings::UniString const & text) { - ErrorsMade errorsMade; + if (token.AnyOfSynonyms([&text](strings::UniString const & s) { return text == s; })) + return ErrorsMade(0); + auto const dfa = BuildLevenshteinDFA(text); + auto it = dfa.Begin(); + strings::DFAMove(it, token.GetOriginal().begin(), token.GetOriginal().end()); + if (it.Accepts()) + return ErrorsMade(it.ErrorsMade()); - token.ForEachSynonym([&](strings::UniString const & s) { - auto it = dfa.Begin(); - strings::DFAMove(it, s.begin(), s.end()); - if (it.Accepts()) - errorsMade = ErrorsMade::Min(errorsMade, ErrorsMade(it.ErrorsMade())); - }); - - return errorsMade; + return {}; } ErrorsMade GetPrefixErrorsMade(QueryParams::Token const & token, strings::UniString const & text) { - ErrorsMade errorsMade; + if (token.AnyOfSynonyms([&text](strings::UniString const & s) { return StartsWith(text, s); })) + return ErrorsMade(0); + auto const dfa = PrefixDFAModifier(BuildLevenshteinDFA(text)); + auto it = dfa.Begin(); + strings::DFAMove(it, token.GetOriginal().begin(), token.GetOriginal().end()); + if (!it.Rejects()) + return ErrorsMade(it.PrefixErrorsMade()); - token.ForEachSynonym([&](strings::UniString const & s) { - auto it = dfa.Begin(); - strings::DFAMove(it, s.begin(), s.end()); - if (!it.Rejects()) - errorsMade = ErrorsMade::Min(errorsMade, ErrorsMade(it.PrefixErrorsMade())); - }); - - return errorsMade; + return {}; } } // namespace impl diff --git a/search/search_tests/locality_scorer_test.cpp b/search/search_tests/locality_scorer_test.cpp index f12548bc44..63a1054d65 100644 --- a/search/search_tests/locality_scorer_test.cpp +++ b/search/search_tests/locality_scorer_test.cpp @@ -78,7 +78,7 @@ public: bool const isPrefixToken = m_params.IsPrefixToken(i); vector ids; - token.ForEachSynonym([&](UniString const & synonym) { + token.ForOriginalAndSynonyms([&](UniString const & synonym) { if (isPrefixToken) { m_searchIndex.ForEachInSubtree(synonym, diff --git a/search/utils.hpp b/search/utils.hpp index 4d05d41874..0c1ea6d8e6 100644 --- a/search/utils.hpp +++ b/search/utils.hpp @@ -125,4 +125,14 @@ void ForEachOfTypesInRect(DataSource const & dataSource, std::vector c // Returns true iff |query| contains |categoryEn| synonym. bool IsCategorialRequestFuzzy(std::string const & query, std::string const & categoryName); + +template +void FillRequestFromToken(QueryParams::Token const & token, SearchTrieRequest & request) +{ + request.m_names.emplace_back(BuildLevenshteinDFA(token.GetOriginal())); + // Allow misprints for original token only. + token.ForEachSynonym([&request](strings::UniString const & s) { + request.m_names.emplace_back(strings::LevenshteinDFA(s, 0 /* maxErrors */)); + }); +} } // namespace search