diff --git a/indexer/indexer.pro b/indexer/indexer.pro index 542debe5af..57d288e8fd 100644 --- a/indexer/indexer.pro +++ b/indexer/indexer.pro @@ -112,6 +112,7 @@ HEADERS += \ scales.hpp \ search_delimiters.hpp \ # it's in indexer because of CategoriesHolder dependency. search_string_utils.hpp \ # it's in indexer because of CategoriesHolder dependency. + string_slice.hpp \ succinct_trie_builder.hpp \ succinct_trie_reader.hpp \ tesselator_decl.hpp \ diff --git a/indexer/string_slice.hpp b/indexer/string_slice.hpp new file mode 100644 index 0000000000..4bf3eb6a13 --- /dev/null +++ b/indexer/string_slice.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include "base/string_utils.hpp" + +#include "std/string.hpp" +#include "std/vector.hpp" + +namespace search +{ +class StringSliceBase +{ +public: + using TString = strings::UniString; + + virtual ~StringSliceBase() = default; + + virtual TString const & Get(size_t i) const = 0; + virtual size_t Size() const = 0; +}; + +class NoPrefixStringSlice : public StringSliceBase +{ +public: + NoPrefixStringSlice(vector const & strings) + : m_strings(strings) + { + } + + virtual TString const & Get(size_t i) const override { return m_strings[i]; } + virtual size_t Size() const override { return m_strings.size(); } + +private: + vector const & m_strings; +}; +} // namespace search diff --git a/search/processor.cpp b/search/processor.cpp index 087f97af2e..0cc1fb9d26 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -402,7 +402,7 @@ int Processor::GetCategoryLocales(int8_t(&arr)[3]) const } template -void Processor::ForEachCategoryTypes(v2::QuerySlice const & slice, ToDo toDo) const +void Processor::ForEachCategoryTypes(StringSliceBase const & slice, ToDo toDo) const { int8_t arrLocales[3]; int const localesCount = GetCategoryLocales(arrLocales); diff --git a/search/processor.hpp b/search/processor.hpp index b757875d69..e3febba23c 100644 --- a/search/processor.hpp +++ b/search/processor.hpp @@ -12,6 +12,7 @@ #include "indexer/ftypes_matcher.hpp" #include "indexer/index.hpp" #include "indexer/rank_table.hpp" +#include "indexer/string_slice.hpp" #include "geometry/rect2d.hpp" @@ -141,7 +142,7 @@ protected: int GetCategoryLocales(int8_t(&arr)[3]) const; template - void ForEachCategoryTypes(v2::QuerySlice const & slice, ToDo toDo) const; + void ForEachCategoryTypes(StringSliceBase const & slice, ToDo toDo) const; template void ProcessEmojiIfNeeded(strings::UniString const & token, size_t ind, ToDo & toDo) const; diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 2c5046c96d..fbe901f9a7 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -1011,7 +1011,7 @@ void Geocoder::WithPostcodes(TFn && fn) break; TokenSlice slice(m_params, startToken, startToken + n); - if (LooksLikePostcode(slice)) + if (LooksLikePostcode(QuerySliceOnTokens(slice), true /*handleAsPrefix*/)) endToken = startToken + n; } if (startToken == endToken) diff --git a/search/v2/postcodes_matcher.cpp b/search/v2/postcodes_matcher.cpp index e5112ad148..c7e54c948c 100644 --- a/search/v2/postcodes_matcher.cpp +++ b/search/v2/postcodes_matcher.cpp @@ -19,8 +19,6 @@ using namespace strings; namespace search { -namespace v2 -{ namespace { // Top patterns for postcodes. See @@ -107,7 +105,7 @@ public: // patterns. // // Complexity: O(total length of tokens in |slice|). - bool HasString(TokenSlice const & slice) const + bool HasString(StringSliceBase const & slice, bool handleAsPrefix) const { if (slice.Size() == 0) return m_root.m_isLeaf; @@ -115,7 +113,7 @@ public: Node const * cur = &m_root; for (size_t i = 0; i < slice.Size() && cur; ++i) { - auto const & s = slice.Get(i).front(); + auto const & s = slice.Get(i); cur = cur->Move(make_transform_iterator(s.begin(), &SimplifyChar), make_transform_iterator(s.end(), &SimplifyChar)); if (cur && i + 1 < slice.Size()) @@ -125,11 +123,7 @@ public: if (!cur) return false; - if (slice.IsPrefix(slice.Size() - 1)) - return true; - - // Last token may be not a prefix, but just a part of a multi-token postcode. - if (slice.IsLast(slice.Size() - 1) && cur->Move(' ') != nullptr) + if (handleAsPrefix) return true; return cur->m_isLeaf; @@ -168,32 +162,19 @@ PostcodesMatcher const & GetPostcodesMatcher() } } // namespace -bool LooksLikePostcode(TokenSlice const & slice) { return GetPostcodesMatcher().HasString(slice); } +bool LooksLikePostcode(StringSliceBase const & slice, bool handleAsPrefix) +{ + return GetPostcodesMatcher().HasString(slice, handleAsPrefix); +} -bool LooksLikePostcode(string const & s, bool checkPrefix) +bool LooksLikePostcode(string const & s, bool handleAsPrefix) { vector tokens; bool const lastTokenIsPrefix = TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters()); - size_t const numTokens = tokens.size(); - - SearchQueryParams params; - if (checkPrefix && lastTokenIsPrefix) - { - params.m_prefixTokens.push_back(tokens.back()); - tokens.pop_back(); - } - - for (auto const & token : tokens) - { - params.m_tokens.emplace_back(); - params.m_tokens.back().push_back(token); - } - - return LooksLikePostcode(TokenSlice(params, 0, numTokens)); + return LooksLikePostcode(NoPrefixStringSlice(tokens), handleAsPrefix && lastTokenIsPrefix); } size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); } -} // namespace v2 } // namespace search diff --git a/search/v2/postcodes_matcher.hpp b/search/v2/postcodes_matcher.hpp index f30b712b9b..96557a239c 100644 --- a/search/v2/postcodes_matcher.hpp +++ b/search/v2/postcodes_matcher.hpp @@ -1,20 +1,17 @@ #pragma once +#include "indexer/string_slice.hpp" + #include "std/cstdint.hpp" #include "std/string.hpp" namespace search { -namespace v2 -{ -class TokenSlice; - -bool LooksLikePostcode(TokenSlice const & slice); +bool LooksLikePostcode(StringSliceBase const & slice, bool handleAsPrefix); /// Splits s into tokens and call LooksLikePostcode(TokenSlice) on the result. /// If checkPrefix is true returns true if some postcode starts with s. /// If checkPrefix is false returns true if s equals to some postcode. -bool LooksLikePostcode(string const & s, bool checkPrefix); +bool LooksLikePostcode(string const & s, bool handleAsPrefix); size_t GetMaxNumTokensInPostcode(); -} // namespace v2 } // namespace search diff --git a/search/v2/token_slice.hpp b/search/v2/token_slice.hpp index 0239debd1a..d6cc2bc02d 100644 --- a/search/v2/token_slice.hpp +++ b/search/v2/token_slice.hpp @@ -2,6 +2,8 @@ #include "search/query_params.hpp" +#include "indexer/string_slice.hpp" + #include "base/assert.hpp" #include "std/cstdint.hpp" @@ -67,21 +69,7 @@ private: vector m_indexes; }; -class QuerySlice -{ -public: - using TString = QueryParams::TString; - - virtual ~QuerySlice() = default; - - virtual TString const & Get(size_t i) const = 0; - virtual size_t Size() const = 0; - virtual bool IsPrefix(size_t i) const = 0; - - bool Empty() const { return Size() == 0; } -}; - -class QuerySliceOnTokens : public QuerySlice +class QuerySliceOnTokens : public StringSliceBase { public: QuerySliceOnTokens(TokenSlice const & slice) : m_slice(slice) {} @@ -89,14 +77,13 @@ public: // QuerySlice overrides: QueryParams::TString const & Get(size_t i) const override { return m_slice.Get(i).front(); } size_t Size() const override { return m_slice.Size(); } - bool IsPrefix(size_t i) const override { return m_slice.IsPrefix(i); } private: TokenSlice const m_slice; }; template -class QuerySliceOnRawStrings : public QuerySlice +class QuerySliceOnRawStrings : public StringSliceBase { public: QuerySliceOnRawStrings(TCont const & tokens, TString const & prefix) @@ -113,12 +100,6 @@ public: size_t Size() const override { return m_tokens.size() + (m_prefix.empty() ? 0 : 1); } - bool IsPrefix(size_t i) const override - { - ASSERT_LESS(i, Size(), ()); - return i == m_tokens.size(); - } - private: TCont const & m_tokens; TString const & m_prefix;