From 49d6447d11f58be7e3074083eb57c49f476c2bc5 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Fri, 1 Apr 2016 18:24:34 +0300 Subject: [PATCH 1/2] [search] Fixed building parts matching. --- .../house_numbers_matcher_test.cpp | 55 ++++-- search/v2/features_layer.cpp | 4 +- search/v2/features_layer.hpp | 2 + search/v2/features_layer_matcher.hpp | 14 +- search/v2/features_layer_path_finder.cpp | 7 +- search/v2/geocoder.cpp | 2 + search/v2/house_numbers_matcher.cpp | 167 +++++++++++++----- search/v2/house_numbers_matcher.hpp | 23 ++- 8 files changed, 203 insertions(+), 71 deletions(-) diff --git a/search/search_tests/house_numbers_matcher_test.cpp b/search/search_tests/house_numbers_matcher_test.cpp index eef84bdb66..f6cb3fb7d8 100644 --- a/search/search_tests/house_numbers_matcher_test.cpp +++ b/search/search_tests/house_numbers_matcher_test.cpp @@ -13,16 +13,22 @@ namespace { void NormalizeHouseNumber(string const & s, vector & ts) { - vector tokens; - search::v2::NormalizeHouseNumber(strings::MakeUniString(s), tokens); - for (auto const & token : tokens) - ts.push_back(strings::ToUtf8(token)); + Parse p; + ParseHouseNumber(MakeUniString(s), p); + for (auto const & part : p.m_parts) + ts.push_back(ToUtf8(part)); } bool HouseNumbersMatch(string const & houseNumber, string const & query) { - return search::v2::HouseNumbersMatch(strings::MakeUniString(houseNumber), - strings::MakeUniString(query)); + return search::v2::HouseNumbersMatch(MakeUniString(houseNumber), MakeUniString(query)); +} + +bool HouseNumbersMatch(string const & houseNumber, string const & query, bool queryIsPrefix) +{ + vector queryParse; + ParseQuery(MakeUniString(query), queryIsPrefix, queryParse); + return search::v2::HouseNumbersMatch(MakeUniString(houseNumber), queryParse); } bool CheckTokenizer(string const & utf8s, vector const & expected) @@ -34,7 +40,12 @@ bool CheckTokenizer(string const & utf8s, vector const & expected) vector actual; for (auto const & token : tokens) actual.push_back(ToUtf8(token.m_token)); - return actual == expected; + if (actual != expected) + { + LOG(LINFO, ("actual:", actual, "expected:", expected)); + return false; + } + return true; } bool CheckNormalizer(string const & utf8s, string const & expected) @@ -49,7 +60,13 @@ bool CheckNormalizer(string const & utf8s, string const & expected) if (i + 1 != tokens.size()) actual.push_back(' '); } - return actual == expected; + + if (actual != expected) + { + LOG(LINFO, ("actual:", actual, "expected:", expected)); + return false; + } + return true; } } // namespace @@ -89,18 +106,26 @@ UNIT_TEST(HouseNumbersMatcher_Smoke) TEST(HouseNumbersMatch("10 корпус 2 строение 2", "10к2с2"), ()); TEST(HouseNumbersMatch("10к2а", "10 2а"), ()); TEST(HouseNumbersMatch("10 к2с", "10 2с"), ()); + TEST(HouseNumbersMatch("22к", "22 к"), ()); + TEST(HouseNumbersMatch("22к корпус 2а строение 7", "22к к 2а стр 7"), ()); + TEST(HouseNumbersMatch("22к к 2а с 7", "22к корпус 2а"), ()); + TEST(HouseNumbersMatch("124к корпус к", "124к к"), ()); TEST(!HouseNumbersMatch("39", "39 с 79"), ()); TEST(!HouseNumbersMatch("127а корпус 2", "127"), ()); TEST(!HouseNumbersMatch("6 корпус 2", "7"), ()); TEST(!HouseNumbersMatch("10/42 корпус 2", "42"), ()); TEST(!HouseNumbersMatch("--...--.-", "--.....-"), ()); -} + TEST(!HouseNumbersMatch("22к", "22 корпус"), ()); + TEST(!HouseNumbersMatch("22к", "22я"), ()); + TEST(!HouseNumbersMatch("22к", "22л"), ()); -UNIT_TEST(HouseNumbersMatcher_TwoStages) -{ - strings::UniString number = strings::MakeUniString("10 к2 с2"); - vector tokens; - NormalizeHouseNumber(number, tokens); - TEST(HouseNumbersMatch(number, tokens), (number, tokens)); + TEST(HouseNumbersMatch("39 корпус 79", "39", true /* queryIsPrefix */), ()); + TEST(HouseNumbersMatch("39 корпус 79", "39 кор", true /* queryIsPrefix */), ()); + TEST(!HouseNumbersMatch("39", "39 корп", true /* queryIsPrefix */), ()); + TEST(HouseNumbersMatch("39 корпус 7", "39", true /* queryIsPrefix */), ()); + TEST(HouseNumbersMatch("39К корпус 7", "39 к", true /* queryIsPrefix */), ()); + TEST(HouseNumbersMatch("39К корпус 7", "39к", true /* queryIsPrefix */), ()); + TEST(HouseNumbersMatch("39 К корпус 7", "39 к", false /* queryIsPrefix */), ()); + TEST(!HouseNumbersMatch("39 К корпус 7", "39", false /* queryIsPrefix */), ()); } diff --git a/search/v2/features_layer.cpp b/search/v2/features_layer.cpp index c18d42998b..80ceabe2cc 100644 --- a/search/v2/features_layer.cpp +++ b/search/v2/features_layer.cpp @@ -18,6 +18,7 @@ void FeaturesLayer::Clear() m_endToken = 0; m_type = SearchModel::SEARCH_TYPE_COUNT; m_hasDelayedFeatures = false; + m_lastTokenIsPrefix = false; } string DebugPrint(FeaturesLayer const & layer) @@ -26,7 +27,8 @@ string DebugPrint(FeaturesLayer const & layer) os << "FeaturesLayer [ size of m_sortedFeatures: " << (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0) << ", m_subQuery: " << DebugPrint(layer.m_subQuery) << ", m_startToken: " << layer.m_startToken - << ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type) << " ]"; + << ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type) + << ", m_lastTokenIsPrefix: " << layer.m_lastTokenIsPrefix << " ]"; return os.str(); } } // namespace v2 diff --git a/search/v2/features_layer.hpp b/search/v2/features_layer.hpp index cb031f8a89..78a48ae927 100644 --- a/search/v2/features_layer.hpp +++ b/search/v2/features_layer.hpp @@ -36,6 +36,8 @@ struct FeaturesLayer // perform additional work to retrieve features matching by house // number. bool m_hasDelayedFeatures; + + bool m_lastTokenIsPrefix; }; string DebugPrint(FeaturesLayer const & layer); diff --git a/search/v2/features_layer_matcher.hpp b/search/v2/features_layer_matcher.hpp index 5d7c745863..210f337eff 100644 --- a/search/v2/features_layer_matcher.hpp +++ b/search/v2/features_layer_matcher.hpp @@ -153,9 +153,9 @@ private: // |buildings| doesn't contain buildings matching by house number, // so following code reads buildings in POIs vicinities and checks // house numbers. - vector queryTokens; - NormalizeHouseNumber(parent.m_subQuery, queryTokens); - if (queryTokens.empty()) + vector queryParses; + ParseQuery(parent.m_subQuery, parent.m_lastTokenIsPrefix, queryParses); + if (queryParses.empty()) return; for (size_t i = 0; i < pois.size(); ++i) @@ -164,7 +164,7 @@ private: MercatorBounds::RectByCenterXYAndSizeInMeters(poiCenters[i], kBuildingRadiusMeters), [&](FeatureType & ft) { - if (HouseNumbersMatch(strings::MakeUniString(ft.GetHouseNumber()), queryTokens)) + if (HouseNumbersMatch(strings::MakeUniString(ft.GetHouseNumber()), queryParses)) { double const distanceM = MercatorBounds::DistanceOnEarth(feature::GetCenter(ft), poiCenters[i]); if (distanceM < kBuildingRadiusMeters) @@ -235,8 +235,8 @@ private: return; } - vector queryTokens; - NormalizeHouseNumber(child.m_subQuery, queryTokens); + vector queryParses; + ParseQuery(child.m_subQuery, child.m_lastTokenIsPrefix, queryParses); uint32_t numFilterInvocations = 0; auto houseNumberFilter = [&](uint32_t id, FeatureType & feature, bool & loaded) -> bool @@ -266,7 +266,7 @@ private: strings::UniString const houseNumber(strings::MakeUniString(feature.GetHouseNumber())); if (!feature::IsHouseNumber(houseNumber)) return false; - return HouseNumbersMatch(houseNumber, queryTokens); + return HouseNumbersMatch(houseNumber, queryParses); }; unordered_map cache; diff --git a/search/v2/features_layer_path_finder.cpp b/search/v2/features_layer_path_finder.cpp index b84c8fbd30..8229e9ca82 100644 --- a/search/v2/features_layer_path_finder.cpp +++ b/search/v2/features_layer_path_finder.cpp @@ -2,6 +2,7 @@ #include "search/cancel_exception.hpp" #include "search/v2/features_layer_matcher.hpp" +#include "search/v2/house_numbers_matcher.hpp" #include "indexer/features_vector.hpp" @@ -47,9 +48,9 @@ uint64_t CalcBottomUpPassCost(vector const & layers) bool LooksLikeHouseNumber(strings::UniString const & query) { - vector tokens; - NormalizeHouseNumber(query, tokens); - return !tokens.empty() && feature::IsHouseNumber(tokens.front()); + Parse parse; + ParseHouseNumber(query, parse); + return !parse.IsEmpty() && feature::IsHouseNumber(parse.m_parts.front()); } bool GetPath(uint32_t id, vector const & layers, TParentGraph const & parent, diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 42ebc4c9e8..11761018b4 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -1058,6 +1058,7 @@ void Geocoder::CreateStreetsLayerAndMatchLowerLayers( layer.m_endToken = endToken; JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, layer.m_subQuery); + layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); vector sortedFeatures; sortedFeatures.reserve(features->PopCount()); @@ -1121,6 +1122,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) layer.m_endToken = curToken + n; JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, layer.m_subQuery); + layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); } features.Intersect(m_addressFeatures[curToken + n - 1].get()); diff --git a/search/v2/house_numbers_matcher.cpp b/search/v2/house_numbers_matcher.cpp index 2fd37271f1..fce06372a6 100644 --- a/search/v2/house_numbers_matcher.cpp +++ b/search/v2/house_numbers_matcher.cpp @@ -42,6 +42,21 @@ bool IsNumberOrShortWord(HouseNumberTokenizer::Token const & t) return IsNumber(t) || IsShortWord(t); } +bool IsBuildingSynonymPrefix(UniString const & p) +{ + static UniString kSynonyms[] = { + MakeUniString("building"), MakeUniString("bld"), MakeUniString("unit"), + MakeUniString("block"), MakeUniString("blk"), MakeUniString("корпус"), + MakeUniString("литер"), MakeUniString("строение"), MakeUniString("блок")}; + + for (UniString const & s : kSynonyms) + { + if (StartsWith(s, p)) + return true; + } + return false; +} + size_t GetNumTokensForBuildingPart(vector const & ts, size_t i, vector & memory); @@ -51,29 +66,23 @@ size_t GetNumTokensForBuildingPartImpl(vector const ASSERT_LESS(i, ts.size(), ()); // TODO (@y, @m, @vng): move these constans out. - static UniString kSynonyms[] = {MakeUniString("building"), MakeUniString("unit"), - MakeUniString("block"), MakeUniString("корпус"), - MakeUniString("литер"), MakeUniString("строение"), - MakeUniString("блок")}; auto const & token = ts[i]; if (token.m_klass != HouseNumberTokenizer::CharClass::Other) return 0; - bool prefix = false; - for (UniString const & synonym : kSynonyms) - { - if (StartsWith(synonym, token.m_token)) - { - prefix = true; - break; - } - } - if (!prefix) + if (!IsBuildingSynonymPrefix(token.m_token)) return 0; // No sense in single "корпус" or "литер". - if (i + 1 >= ts.size() || !IsNumberOrShortWord(ts[i + 1])) + if (i + 1 >= ts.size()) + return 0; + + if (!IsNumberOrShortWord(ts[i + 1])) + return 0; + + // No sense in "корпус корпус" or "литер литер". + if (ts[i + 1].m_token == token.m_token) return 0; // Consume next token, either number or short word. @@ -156,6 +165,42 @@ void MergeTokens(vector const & ts, vector q.size(); + } + + return j == q.size(); } } // namespace @@ -181,14 +226,35 @@ void HouseNumberTokenizer::Tokenize(UniString const & s, vector & ts) } } -void NormalizeHouseNumber(strings::UniString const & s, vector & ts) +void ParseHouseNumber(strings::UniString const & s, Parse & p) { vector tokens; HouseNumberTokenizer::Tokenize(MakeLowerCase(s), tokens); - MergeTokens(tokens, ts); + MergeTokens(tokens, p.m_parts); +} - if (!ts.empty()) - sort(ts.begin() + 1, ts.end()); +void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps) +{ + vector tokens; + HouseNumberTokenizer::Tokenize(MakeLowerCase(query), tokens); + + { + ps.emplace_back(); + Parse & p = ps.back(); + MergeTokens(tokens, p.m_parts); + } + + // *NOTE* |tokens| is modified in the following block. + if (queryIsPrefix && !tokens.empty() && + tokens.back().m_klass == HouseNumberTokenizer::CharClass::Other && + IsBuildingSynonymPrefix(tokens.back().m_token)) + { + tokens.pop_back(); + ps.emplace_back(); + Parse & p = ps.back(); + MergeTokens(tokens, p.m_parts); + p.m_hasTrailingBuildingPrefixSynonym = true; + } } bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query) @@ -196,40 +262,52 @@ bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniStrin if (houseNumber == query) return true; - vector queryTokens; - NormalizeHouseNumber(query, queryTokens); + Parse queryParse; + ParseHouseNumber(query, queryParse); - return HouseNumbersMatch(houseNumber, queryTokens); + return HouseNumbersMatch(houseNumber, queryParse); } -bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryTokens) +bool HouseNumbersMatch(strings::UniString const & houseNumber, Parse const & queryParse) { - if (houseNumber.empty() || queryTokens.empty()) + if (houseNumber.empty() || queryParse.IsEmpty()) return false; - if (queryTokens[0][0] != houseNumber[0]) + if (queryParse.m_parts[0][0] != houseNumber[0]) return false; - vector houseNumberTokens; - NormalizeHouseNumber(houseNumber, houseNumberTokens); + Parse houseNumberParse; + ParseHouseNumber(houseNumber, houseNumberParse); - if (houseNumberTokens.empty()) + return ParsesMatch(houseNumberParse, queryParse); +} + +bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryParses) +{ + if (houseNumber.empty() || queryParses.empty()) return false; - // Check first tokens (hope, house numbers). - if (houseNumberTokens.front() != queryTokens.front()) - return false; - - size_t i = 1, j = 1; - while (i != houseNumberTokens.size() && j != queryTokens.size()) + // Fast pre-check, helps to early exit without complex house number + // parsing. + bool good = false; + for (auto const & queryParse : queryParses) { - while (i != houseNumberTokens.size() && houseNumberTokens[i] < queryTokens[j]) - ++i; - if (i == houseNumberTokens.size() || houseNumberTokens[i] != queryTokens[j]) - return false; - ++i; - ++j; + if (!queryParse.IsEmpty() && houseNumber[0] == queryParse.m_parts.front()[0]) + { + good = true; + break; + } } - return j == queryTokens.size(); + if (!good) + return false; + + Parse houseNumberParse; + ParseHouseNumber(houseNumber, houseNumberParse); + for (auto const & queryParse : queryParses) + { + if (ParsesMatch(houseNumberParse, queryParse)) + return true; + } + return false; } string DebugPrint(HouseNumberTokenizer::CharClass charClass) @@ -249,5 +327,12 @@ string DebugPrint(HouseNumberTokenizer::Token const & token) os << "Token [" << DebugPrint(token.m_token) << ", " << DebugPrint(token.m_klass) << "]"; return os.str(); } + +string DebugPrint(Parse const & parse) +{ + ostringstream os; + os << "Parse [" << DebugPrint(parse.m_parts) << "]"; + return os.str(); +} } // namespace v2 } // namespace search diff --git a/search/v2/house_numbers_matcher.hpp b/search/v2/house_numbers_matcher.hpp index 4f18a79312..aa22c178f7 100644 --- a/search/v2/house_numbers_matcher.hpp +++ b/search/v2/house_numbers_matcher.hpp @@ -37,18 +37,33 @@ public: static void Tokenize(strings::UniString const & s, vector & ts); }; +struct Parse +{ + inline bool IsEmpty() const { return m_parts.empty(); } + + vector m_parts; + bool m_hasTrailingBuildingPrefixSynonym = false; +}; + // Splits house number by tokens, removes blanks and separators. -void NormalizeHouseNumber(strings::UniString const & s, vector & ts); +void ParseHouseNumber(strings::UniString const & s, Parse & p); + +void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps); // Returns true when |query| matches to |houseNumber|. bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query); -// Returns true when |queryTokens| match to |houseNumber|. -bool HouseNumbersMatch(strings::UniString const & houseNumber, - vector const & queryTokens); +// Returns true when parsed query matches to |houseNumber|. +bool HouseNumbersMatch(strings::UniString const & houseNumber, Parse const & queryParse); + +// Returns true when at least one parse of the query matches to +// |houseNumber|. +bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryParses); string DebugPrint(HouseNumberTokenizer::CharClass charClass); string DebugPrint(HouseNumberTokenizer::Token const & token); + +string DebugPrint(Parse const & parse); } // namespace v2 } // namespace search From 3b86b88e6d064a3a4bce95f59ac3e2f15bca100e Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Wed, 6 Apr 2016 12:30:50 +0300 Subject: [PATCH 2/2] Review fixes. --- .../house_numbers_matcher_test.cpp | 73 ++++++++++--------- search/v2/features_layer_path_finder.cpp | 24 ++++-- search/v2/house_numbers_matcher.cpp | 38 +++------- search/v2/house_numbers_matcher.hpp | 11 +-- 4 files changed, 68 insertions(+), 78 deletions(-) diff --git a/search/search_tests/house_numbers_matcher_test.cpp b/search/search_tests/house_numbers_matcher_test.cpp index f6cb3fb7d8..6a4573f0bf 100644 --- a/search/search_tests/house_numbers_matcher_test.cpp +++ b/search/search_tests/house_numbers_matcher_test.cpp @@ -11,24 +11,24 @@ using namespace search::v2; namespace { -void NormalizeHouseNumber(string const & s, vector & ts) +void ParseHouseNumber(string const & s, vector> & ts) { - Parse p; - ParseHouseNumber(MakeUniString(s), p); - for (auto const & part : p.m_parts) - ts.push_back(ToUtf8(part)); + vector parses; + ParseQuery(MakeUniString(s), false /* queryIsPrefix */, parses); + for (auto const & parse : parses) + { + ts.emplace_back(); + auto & tsb = ts.back(); + for (auto const & part : parse.m_parts) + tsb.push_back(ToUtf8(part)); + } } -bool HouseNumbersMatch(string const & houseNumber, string const & query) +bool HouseNumbersMatch(string const & houseNumber, string const & query, bool queryIsPrefix = false) { - return search::v2::HouseNumbersMatch(MakeUniString(houseNumber), MakeUniString(query)); -} - -bool HouseNumbersMatch(string const & houseNumber, string const & query, bool queryIsPrefix) -{ - vector queryParse; - ParseQuery(MakeUniString(query), queryIsPrefix, queryParse); - return search::v2::HouseNumbersMatch(MakeUniString(houseNumber), queryParse); + vector queryParses; + ParseQuery(MakeUniString(query), queryIsPrefix, queryParses); + return search::v2::HouseNumbersMatch(MakeUniString(houseNumber), queryParses); } bool CheckTokenizer(string const & utf8s, vector const & expected) @@ -48,25 +48,26 @@ bool CheckTokenizer(string const & utf8s, vector const & expected) return true; } -bool CheckNormalizer(string const & utf8s, string const & expected) +bool CheckParser(string const & utf8s, string const & expected) { - vector tokens; - NormalizeHouseNumber(utf8s, tokens); + vector> parses; + ParseHouseNumber(utf8s, parses); - string actual; - for (size_t i = 0; i < tokens.size(); ++i) + for (auto const & parse : parses) { - actual.append(tokens[i]); - if (i + 1 != tokens.size()) - actual.push_back(' '); + string actual; + for (size_t i = 0; i < parse.size(); ++i) + { + actual.append(parse[i]); + if (i + 1 != parse.size()) + actual.push_back(' '); + } + if (actual == expected) + return true; } - if (actual != expected) - { - LOG(LINFO, ("actual:", actual, "expected:", expected)); - return false; - } - return true; + LOG(LINFO, ("actual:", parses, "expected:", expected)); + return false; } } // namespace @@ -79,14 +80,14 @@ UNIT_TEST(HouseNumberTokenizer_Smoke) UNIT_TEST(HouseNumberNormalizer_Smoke) { - TEST(CheckNormalizer("123Б", "123б"), ()); - TEST(CheckNormalizer("123/4 Литер А", "123 4 а"), ()); - TEST(CheckNormalizer("123а корп. 2б", "123а 2б"), ()); - TEST(CheckNormalizer("123к4", "123 4"), ()); - TEST(CheckNormalizer("123к Корпус 2", "123к 2"), ()); - TEST(CheckNormalizer("9 литер А корпус 2", "9 2 а"), ()); - TEST(CheckNormalizer("39с79", "39 79"), ()); - TEST(CheckNormalizer("9 литер аб1", "9 аб1"), ()); + TEST(CheckParser("123Б", "123б"), ()); + TEST(CheckParser("123/4 Литер А", "123 4 а"), ()); + TEST(CheckParser("123а корп. 2б", "123а 2б"), ()); + TEST(CheckParser("123к4", "123 4"), ()); + TEST(CheckParser("123к Корпус 2", "123к 2"), ()); + TEST(CheckParser("9 литер А корпус 2", "9 2 а"), ()); + TEST(CheckParser("39с79", "39 79"), ()); + TEST(CheckParser("9 литер аб1", "9 аб1"), ()); } UNIT_TEST(HouseNumbersMatcher_Smoke) diff --git a/search/v2/features_layer_path_finder.cpp b/search/v2/features_layer_path_finder.cpp index 8229e9ca82..c05ba6c8d7 100644 --- a/search/v2/features_layer_path_finder.cpp +++ b/search/v2/features_layer_path_finder.cpp @@ -46,11 +46,18 @@ uint64_t CalcBottomUpPassCost(vector const & layers) return CalcPassCost(layers.begin(), layers.end()); } -bool LooksLikeHouseNumber(strings::UniString const & query) +bool LooksLikeHouseNumber(strings::UniString const & query, bool queryIsPrefix) { - Parse parse; - ParseHouseNumber(query, parse); - return !parse.IsEmpty() && feature::IsHouseNumber(parse.m_parts.front()); + vector parses; + ParseQuery(query, queryIsPrefix, parses); + for (auto const & parse : parses) + { + if (parse.IsEmpty()) + continue; + if (feature::IsHouseNumber(parse.m_parts.front())) + return true; + } + return false; } bool GetPath(uint32_t id, vector const & layers, TParentGraph const & parent, @@ -124,8 +131,8 @@ void FeaturesLayerPathFinder::FindReachableVerticesTopDown( parent.m_hasDelayedFeatures = false; FeaturesLayer child(*layers[i - 1]); - child.m_hasDelayedFeatures = - child.m_type == SearchModel::SEARCH_TYPE_BUILDING && LooksLikeHouseNumber(child.m_subQuery); + child.m_hasDelayedFeatures = child.m_type == SearchModel::SEARCH_TYPE_BUILDING && + LooksLikeHouseNumber(child.m_subQuery, child.m_lastTokenIsPrefix); buffer.clear(); matcher.Match(child, parent, addEdge); @@ -171,8 +178,9 @@ void FeaturesLayerPathFinder::FindReachableVerticesBottomUp( child.m_hasDelayedFeatures = false; FeaturesLayer parent(*layers[i + 1]); - parent.m_hasDelayedFeatures = parent.m_type == SearchModel::SEARCH_TYPE_BUILDING && - LooksLikeHouseNumber(parent.m_subQuery); + parent.m_hasDelayedFeatures = + parent.m_type == SearchModel::SEARCH_TYPE_BUILDING && + LooksLikeHouseNumber(parent.m_subQuery, parent.m_lastTokenIsPrefix); buffer.clear(); matcher.Match(child, parent, addEdge); diff --git a/search/v2/house_numbers_matcher.cpp b/search/v2/house_numbers_matcher.cpp index fce06372a6..3acd0d6341 100644 --- a/search/v2/house_numbers_matcher.cpp +++ b/search/v2/house_numbers_matcher.cpp @@ -65,8 +65,6 @@ size_t GetNumTokensForBuildingPartImpl(vector const { ASSERT_LESS(i, ts.size(), ()); - // TODO (@y, @m, @vng): move these constans out. - auto const & token = ts[i]; if (token.m_klass != HouseNumberTokenizer::CharClass::Other) return 0; @@ -226,13 +224,6 @@ void HouseNumberTokenizer::Tokenize(UniString const & s, vector & ts) } } -void ParseHouseNumber(strings::UniString const & s, Parse & p) -{ - vector tokens; - HouseNumberTokenizer::Tokenize(MakeLowerCase(s), tokens); - MergeTokens(tokens, p.m_parts); -} - void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps) { vector tokens; @@ -257,28 +248,16 @@ void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector queryParses; + ParseQuery(query, queryIsPrefix, queryParses); - return HouseNumbersMatch(houseNumber, queryParse); -} - -bool HouseNumbersMatch(strings::UniString const & houseNumber, Parse const & queryParse) -{ - if (houseNumber.empty() || queryParse.IsEmpty()) - return false; - if (queryParse.m_parts[0][0] != houseNumber[0]) - return false; - - Parse houseNumberParse; - ParseHouseNumber(houseNumber, houseNumberParse); - - return ParsesMatch(houseNumberParse, queryParse); + return HouseNumbersMatch(houseNumber, queryParses); } bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryParses) @@ -301,7 +280,12 @@ bool HouseNumbersMatch(strings::UniString const & houseNumber, vector con return false; Parse houseNumberParse; - ParseHouseNumber(houseNumber, houseNumberParse); + { + vector tokens; + HouseNumberTokenizer::Tokenize(MakeLowerCase(houseNumber), tokens); + MergeTokens(tokens, houseNumberParse.m_parts); + } + for (auto const & queryParse : queryParses) { if (ParsesMatch(houseNumberParse, queryParse)) diff --git a/search/v2/house_numbers_matcher.hpp b/search/v2/house_numbers_matcher.hpp index aa22c178f7..53bf7d6bb7 100644 --- a/search/v2/house_numbers_matcher.hpp +++ b/search/v2/house_numbers_matcher.hpp @@ -45,16 +45,13 @@ struct Parse bool m_hasTrailingBuildingPrefixSynonym = false; }; -// Splits house number by tokens, removes blanks and separators. -void ParseHouseNumber(strings::UniString const & s, Parse & p); - +// Parses query for later faster processing, when multiple buildings +// are matched against the query. void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps); // Returns true when |query| matches to |houseNumber|. -bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query); - -// Returns true when parsed query matches to |houseNumber|. -bool HouseNumbersMatch(strings::UniString const & houseNumber, Parse const & queryParse); +bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query, + bool queryIsPrefix); // Returns true when at least one parse of the query matches to // |houseNumber|.