diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index 7e848d3cbc..d5829e657f 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -214,7 +214,7 @@ struct ValueBuilder void MakeValue(FeatureType const & ft, uint32_t index, FeatureWithRankAndCenter & v) const { - v.m_id = index; + v.m_featureId = index; // get BEST geometry rect of feature v.m_pt = feature::GetCenter(ft); @@ -229,7 +229,7 @@ struct ValueBuilder void MakeValue(FeatureType const & /* f */, uint32_t index, FeatureIndexValue & value) const { - value.m_id = index; + value.m_featureId = index; } }; diff --git a/search/base/inverted_list.hpp b/search/base/inverted_list.hpp index 877d95937e..81581fd224 100644 --- a/search/base/inverted_list.hpp +++ b/search/base/inverted_list.hpp @@ -10,6 +10,8 @@ namespace search { namespace base { +// This class is supposed to be used in inverted index to store list +// of document ids. template class InvertedList { diff --git a/search/base/mem_search_index.hpp b/search/base/mem_search_index.hpp index 48411491ca..dfbc53891c 100644 --- a/search/base/mem_search_index.hpp +++ b/search/base/mem_search_index.hpp @@ -6,9 +6,14 @@ #include "base/assert.hpp" #include "base/mem_trie.hpp" +#include "base/stl_add.hpp" +#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" +#include #include +#include +#include #include namespace search @@ -30,15 +35,14 @@ public: using Base = trie::Iterator; using InnerIterator = typename Trie::Iterator; - explicit Iterator(InnerIterator const & inIt) + explicit Iterator(InnerIterator const & innerIt) { - Base::m_values = inIt.GetValues(); - inIt.ForEachMove([&](Char c, InnerIterator it) - { - Base::m_edges.emplace_back(); - Base::m_edges.back().m_label.push_back(c); - m_moves.push_back(it); - }); + Base::m_values = innerIt.GetValues(); + innerIt.ForEachMove([&](Char c, InnerIterator it) { + Base::m_edges.emplace_back(); + Base::m_edges.back().m_label.push_back(c); + m_moves.push_back(it); + }); } ~Iterator() override = default; @@ -68,18 +72,29 @@ public: Iterator GetRootIterator() const { return Iterator(m_trie.GetRootIterator()); } + std::vector GetAllIds() const + { + std::vector ids; + m_trie.ForEachInTrie([&](Token const & /* token */, Id const & id) { ids.push_back(id); }); + my::SortUnique(ids); + return ids; + } + private: + static Token AddLang(int8_t lang, Token const & token) + { + Token r(1 + token.size()); + r[0] = static_cast(lang); + std::copy(token.begin(), token.end(), r.begin() + 1); + return r; + } + template void ForEachToken(Id const & id, Doc const & doc, Fn && fn) { doc.ForEachToken([&](int8_t lang, Token const & token) { - if (lang < 0) - return; - - Token t; - t.push_back(static_cast(lang)); - t.insert(t.end(), token.begin(), token.end()); - fn(t); + if (lang >= 0) + fn(AddLang(lang, token)); }); } diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index 401cc399c1..630218ff3f 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -116,10 +116,8 @@ public: if (m_prevSet && !m_prevSet->count(v)) return; - if (!m_filter(v.m_id)) - return; - - m_set->insert(v); + if (m_filter(v)) + m_set->insert(v); } void NextStep() @@ -176,7 +174,7 @@ public: void operator()(Value const & v) { - if (m_filter(v.m_id)) + if (m_filter(v)) m_values.push_back(v); } diff --git a/search/retrieval.cpp b/search/retrieval.cpp index 2485a271d5..17f82981a4 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -49,7 +49,7 @@ public: { if ((++m_counter & 0xFF) == 0) BailIfCancelled(m_cancellable); - m_features.push_back(value.m_id); + m_features.push_back(value.m_featureId); } inline void operator()(uint32_t feature) { m_features.push_back(feature); } @@ -207,9 +207,9 @@ unique_ptr RetrieveAddressFeaturesImpl( MatchFeaturesInTrie( request, root, - [&holder](uint64_t featureIndex) { - return !holder.ModifiedOrDeleted(base::asserted_cast(featureIndex)); - }, + [&holder](Value const & value) { + return !holder.ModifiedOrDeleted(base::asserted_cast(value.m_featureId)); + } /* filter */, collector); holder.ForEachModifiedOrCreated([&](FeatureType & ft, uint64_t index) { @@ -231,9 +231,9 @@ unique_ptr RetrievePostcodeFeaturesImpl( MatchPostcodesInTrie( slice, root, - [&holder](uint64_t featureIndex) { - return !holder.ModifiedOrDeleted(base::asserted_cast(featureIndex)); - }, + [&holder](Value const & value) { + return !holder.ModifiedOrDeleted(base::asserted_cast(value.m_featureId)); + } /* filter */, collector); holder.ForEachModifiedOrCreated([&](FeatureType & ft, uint64_t index) { diff --git a/search/search.pro b/search/search.pro index e21c2395d8..103ab8ad79 100644 --- a/search/search.pro +++ b/search/search.pro @@ -11,6 +11,8 @@ include($$ROOT_DIR/common.pri) HEADERS += \ algos.hpp \ approximate_string_match.hpp \ + base/inverted_list.hpp \ + base/mem_search_index.hpp \ cancel_exception.hpp \ categories_cache.hpp \ categories_set.hpp \ diff --git a/search/search_index_values.hpp b/search/search_index_values.hpp index ff8d2f75fa..3f537d305d 100644 --- a/search/search_index_values.hpp +++ b/search/search_index_values.hpp @@ -25,66 +25,66 @@ // A wrapper around feature index. struct FeatureIndexValue { - FeatureIndexValue() : m_id(0) {} + FeatureIndexValue() = default; - FeatureIndexValue(uint64_t id) : m_id(id) {} + explicit FeatureIndexValue(uint64_t featureId) : m_featureId(featureId) {} - bool operator<(FeatureIndexValue const & o) const { return m_id < o.m_id; } + bool operator<(FeatureIndexValue const & o) const { return m_featureId < o.m_featureId; } - bool operator==(FeatureIndexValue const & o) const { return m_id == o.m_id; } + bool operator==(FeatureIndexValue const & o) const { return m_featureId == o.m_featureId; } - void Swap(FeatureIndexValue & o) { swap(m_id, o.m_id); } + void Swap(FeatureIndexValue & o) { swap(m_featureId, o.m_featureId); } - uint64_t m_id; + uint64_t m_featureId = 0; }; namespace std { template <> -class hash +struct hash { public: size_t operator()(FeatureIndexValue const & value) const { - return std::hash{}(value.m_id); + return std::hash{}(value.m_featureId); } }; } // namespace std struct FeatureWithRankAndCenter { - FeatureWithRankAndCenter() : m_pt(m2::PointD()), m_id(0), m_rank(0) {} + FeatureWithRankAndCenter() = default; - FeatureWithRankAndCenter(m2::PointD pt, uint32_t id, uint8_t rank) - : m_pt(pt), m_id(id), m_rank(rank) + FeatureWithRankAndCenter(m2::PointD const & pt, uint32_t featureId, uint8_t rank) + : m_pt(pt), m_featureId(featureId), m_rank(rank) { } - bool operator<(FeatureWithRankAndCenter const & o) const { return m_id < o.m_id; } + bool operator<(FeatureWithRankAndCenter const & o) const { return m_featureId < o.m_featureId; } - bool operator==(FeatureWithRankAndCenter const & o) const { return m_id == o.m_id; } + bool operator==(FeatureWithRankAndCenter const & o) const { return m_featureId == o.m_featureId; } void Swap(FeatureWithRankAndCenter & o) { swap(m_pt, o.m_pt); - swap(m_id, o.m_id); + swap(m_featureId, o.m_featureId); swap(m_rank, o.m_rank); } - m2::PointD m_pt; // Center point of the feature. - uint32_t m_id; // Feature identifier. - uint8_t m_rank; // Rank of the feature. + m2::PointD m_pt; // Center point of the feature. + uint32_t m_featureId = 0; // Feature identifier. + uint8_t m_rank = 0; // Rank of the feature. }; namespace std { template <> -class hash +struct hash { public: size_t operator()(FeatureWithRankAndCenter const & value) const { - return std::hash{}(value.m_id); + return std::hash{}(value.m_featureId); } }; } // namespace std @@ -104,7 +104,7 @@ public: void Serialize(Sink & sink, Value const & v) const { serial::SavePoint(sink, v.m_pt, m_codingParams); - WriteToSink(sink, v.m_id); + WriteToSink(sink, v.m_featureId); WriteToSink(sink, v.m_rank); } @@ -119,7 +119,7 @@ public: void DeserializeFromSource(Source & source, Value & v) const { v.m_pt = serial::LoadPoint(source, m_codingParams); - v.m_id = ReadPrimitiveFromSource(source); + v.m_featureId = ReadPrimitiveFromSource(source); v.m_rank = ReadPrimitiveFromSource(source); } @@ -143,7 +143,7 @@ public: template void Serialize(Sink & sink, Value const & v) const { - WriteToSink(sink, v.m_id); + WriteToSink(sink, v.m_featureId); } template @@ -156,7 +156,7 @@ public: template void DeserializeFromSource(Source & source, Value & v) const { - v.m_id = ReadPrimitiveFromSource(source); + v.m_featureId = ReadPrimitiveFromSource(source); } }; @@ -185,7 +185,7 @@ public: { std::vector ids(values.size()); for (size_t i = 0; i < ids.size(); ++i) - ids[i] = values[i].m_id; + ids[i] = values[i].m_featureId; m_cbv = coding::CompressedBitVectorBuilder::FromBitPositions(move(ids)); } diff --git a/search/search_tests/mem_search_index_tests.cpp b/search/search_tests/mem_search_index_tests.cpp index 37bb07461e..52a409630d 100644 --- a/search/search_tests/mem_search_index_tests.cpp +++ b/search/search_tests/mem_search_index_tests.cpp @@ -7,6 +7,8 @@ #include "coding/multilang_utf8_string.hpp" +#include "base/stl_add.hpp" +#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" #include "base/uni_string_dfa.hpp" @@ -21,25 +23,9 @@ using namespace search; using namespace std; using namespace strings; -struct Id +namespace { - explicit Id(uint64_t id) : m_id(id) {} - - bool operator==(Id const & rhs) const { return m_id == rhs.m_id; } - bool operator!=(Id const & rhs) const { return !(*this == rhs); } - bool operator<(Id const & rhs) const { return m_id < rhs.m_id; } - - uint64_t m_id; -}; - -string DebugPrint(Id const & id) { return DebugPrint(id.m_id); } - -template<> -class hash -{ -public: - size_t operator()(Id const & id) const { return std::hash{}(id.m_id); } -}; +using Id = uint64_t; class Doc { @@ -64,47 +50,42 @@ private: class MemSearchIndexTest { public: + using Index = MemSearchIndex; + void Add(Id const & id, Doc const & doc) { m_index.Add(id, doc); } void Erase(Id const & id, Doc const & doc) { m_index.Erase(id, doc); } vector StrictQuery(string const & query, string const & lang) const { - vector prev; - bool full = true; + auto prev = m_index.GetAllIds(); + TEST(IsSortedAndUnique(prev.cbegin(), prev.cend()), ()); vector tokens; NormalizeAndTokenizeString(query, tokens); for (auto const & token : tokens) { - vector curr; - SearchTrieRequest request; request.m_names.emplace_back(token); request.m_langs.Insert(StringUtf8Multilang::GetLangIndex(lang)); + + vector curr; MatchFeaturesInTrie(request, m_index.GetRootIterator(), - [](uint64_t /* id */) { return true; }, - [&curr](Id const & id) { curr.push_back(id); }); + [](Id const & /* id */) { return true; } /* filter */, + [&curr](Id const & id) { curr.push_back(id); } /* toDo */); my::SortUnique(curr); - if (full) - { - prev = curr; - full = false; - } - else - { - vector intersection; - set_intersection(prev.begin(), prev.end(), curr.begin(), curr.end(), - back_inserter(intersection)); - prev = intersection; - } + + vector intersection; + set_intersection(prev.begin(), prev.end(), curr.begin(), curr.end(), + back_inserter(intersection)); + prev = intersection; } return prev; } protected: - MemSearchIndex m_index; + Index m_index; }; UNIT_CLASS_TEST(MemSearchIndexTest, Smoke) @@ -130,3 +111,4 @@ UNIT_CLASS_TEST(MemSearchIndexTest, Smoke) Erase(kHamlet, hamlet); TEST_EQUAL(StrictQuery("question", "en"), vector{}, ()); } +} // namespace diff --git a/search/search_tests/search_tests.pro b/search/search_tests/search_tests.pro index 0962eed770..cafbdf1f82 100644 --- a/search/search_tests/search_tests.pro +++ b/search/search_tests/search_tests.pro @@ -37,6 +37,7 @@ SOURCES += \ locality_finder_test.cpp \ locality_scorer_test.cpp \ locality_selector_test.cpp \ + mem_search_index_tests.cpp \ point_rect_matcher_tests.cpp \ query_saver_tests.cpp \ ranking_tests.cpp \