diff --git a/coding/coding_tests/trie_test.cpp b/coding/coding_tests/trie_test.cpp index 8935d42563..9460f33d30 100644 --- a/coding/coding_tests/trie_test.cpp +++ b/coding/coding_tests/trie_test.cpp @@ -277,7 +277,7 @@ UNIT_TEST(TrieBuilder_Build) trie::ReadTrie>(memReader, serial::CodingParams()); vector res; KeyValuePairBackInserter f; - trie::ForEachRef(*root, f, vector()); + trie::ForEachRefWithValues(*root, f, vector()); sort(f.m_v.begin(), f.m_v.end()); TEST_EQUAL(v, f.m_v, ()); } diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index a5fc3ebee7..d6ef51dd03 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -312,6 +312,27 @@ unique_ptr CompressedBitVectorBuilder::FromBitGroups( return make_unique(setBits); } +// static +unique_ptr CompressedBitVectorBuilder::FromCBV(CompressedBitVector const & cbv) +{ + auto strat = cbv.GetStorageStrategy(); + switch (strat) + { + case CompressedBitVector::StorageStrategy::Dense: + { + DenseCBV const & dense = static_cast(cbv); + auto bitGroups = dense.m_bitGroups; + return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups)); + } + case CompressedBitVector::StorageStrategy::Sparse: + { + SparseCBV const & sparse = static_cast(cbv); + return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions); + } + } + return unique_ptr(); +} + string DebugPrint(CompressedBitVector::StorageStrategy strat) { switch (strat) diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp index 95d2d86ac4..82f67d1ad5 100644 --- a/coding/compressed_bit_vector.hpp +++ b/coding/compressed_bit_vector.hpp @@ -2,8 +2,11 @@ #include "coding/reader.hpp" #include "coding/writer.hpp" +#include "base/assert.hpp" + #include "std/algorithm.hpp" #include "std/unique_ptr.hpp" +#include "std/utility.hpp" #include "std/vector.hpp" namespace coding @@ -150,6 +153,9 @@ public: // by concatenating the elements of bitGroups. static unique_ptr FromBitGroups(vector && bitGroups); + // Copies a CBV. + static unique_ptr FromCBV(CompressedBitVector const & cbv); + // Reads a bit vector from reader which must contain a valid // bit vector representation (see CompressedBitVector::Serialize for the format). template diff --git a/coding/trie.hpp b/coding/trie.hpp index ef80a14e11..d9433e7f3d 100644 --- a/coding/trie.hpp +++ b/coding/trie.hpp @@ -24,6 +24,8 @@ class Iterator //dbg::ObjectTracker m_tracker; public: + using TValue = typename TValueList::TValue; + struct Edge { typedef buffer_vector EdgeStrT; @@ -70,9 +72,9 @@ struct FixedSizeValueReader template void ForEachRef(Iterator const & iter, TF && f, TString const & s) { - iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue value) + iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue const & /* value */) { - f(s, value); + f(s); }); for (size_t i = 0; i < iter.m_edge.size(); ++i) { @@ -83,4 +85,19 @@ void ForEachRef(Iterator const & iter, TF && f, TString const & s) } } +template +void ForEachRefWithValues(Iterator const & iter, TF && f, TString const & s) +{ + iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue const & value) + { + f(s, value); + }); + for (size_t i = 0; i < iter.m_edge.size(); ++i) + { + TString s1(s); + s1.insert(s1.end(), iter.m_edge[i].m_str.begin(), iter.m_edge[i].m_str.end()); + auto it = iter.GoToEdge(i); + ForEachRefWithValues(*it, f, s1); + } +} } // namespace trie diff --git a/coding/trie_builder.hpp b/coding/trie_builder.hpp index d42ebaf119..b9f22d3a14 100644 --- a/coding/trie_builder.hpp +++ b/coding/trie_builder.hpp @@ -17,13 +17,13 @@ // pre-order alphabetically reversed (parent, last child, first child). // Leaf node format: -// [value] ... [value] +// [valueList] // Internal node format: // [1: header]: [2: min(valueCount, 3)] [6: min(childCount, 63)] // [vu valueCount]: if valueCount in header == 3 // [vu childCount]: if childCount in header == 63 -// [value] ... [value] +// [valueList] // [childInfo] ... [childInfo] // Child info format: diff --git a/coding/trie_reader.hpp b/coding/trie_reader.hpp index f3978f460a..6d20ffd265 100644 --- a/coding/trie_reader.hpp +++ b/coding/trie_reader.hpp @@ -94,7 +94,7 @@ private: if (childCount == 63) childCount = ReadVarUint(src); - // [value] ... [value] + // [valueList] m_valueList.Deserialize(src, valueCount); // [childInfo] ... [childInfo] diff --git a/generator/dumper.cpp b/generator/dumper.cpp index 38a5dde4c8..d118ca766a 100644 --- a/generator/dumper.cpp +++ b/generator/dumper.cpp @@ -164,7 +164,7 @@ namespace feature SearchTokensCollector() : m_currentS(), m_currentCount(0) {} - void operator()(strings::UniString const & s, FeatureWithRankAndCenter const &) + void operator()(strings::UniString const & s) { if (m_currentS == s) { @@ -200,7 +200,7 @@ namespace feature feature::DataHeader header(container); serial::CodingParams codingParams(trie::GetCodingParams(header.GetDefCodingParams())); - auto const pTrieRoot = trie::ReadTrie>( + auto const pTrieRoot = trie::ReadTrie>( container.GetReader(SEARCH_INDEX_FILE_TAG), codingParams); SearchTokensCollector f; diff --git a/indexer/search_index_builder.cpp b/indexer/search_index_builder.cpp index 62220efc6d..891b4b5e45 100644 --- a/indexer/search_index_builder.cpp +++ b/indexer/search_index_builder.cpp @@ -177,11 +177,15 @@ struct ValueBuilder template <> struct ValueBuilder { + ValueBuilder(serial::CodingParams const & cp) : m_cp(cp) {} + void MakeValue(FeatureType const & /* f */, feature::TypesHolder const & /* types */, uint32_t index, FeatureIndexValue & value) const { - value.m_value = index; + value.m_featureId = index; } + + serial::CodingParams m_cp; }; template @@ -258,22 +262,23 @@ public: } }; +template void AddFeatureNameIndexPairs(FilesContainerR const & container, CategoriesHolder & categoriesHolder, - StringsFile & stringsFile) + StringsFile & stringsFile) { FeaturesVectorTest features(container); feature::DataHeader const & header = features.GetHeader(); serial::CodingParams codingParams(trie::GetCodingParams(header.GetDefCodingParams())); - ValueBuilder valueBuilder(codingParams); + ValueBuilder valueBuilder(codingParams); unique_ptr synonyms; if (header.GetType() == feature::DataHeader::world) synonyms.reset(new SynonymsHolder(GetPlatform().WritablePathForFile(SYNONYMS_FILE))); - features.GetVector().ForEach(FeatureInserter>( + features.GetVector().ForEach(FeatureInserter>( synonyms.get(), stringsFile, categoriesHolder, header.GetScaleRange(), valueBuilder)); } } // namespace @@ -326,12 +331,14 @@ bool BuildSearchIndexFromDatFile(string const & datFile, bool forceRebuild) void BuildSearchIndex(FilesContainerR & container, Writer & indexWriter, string const & stringsFilePath) { + using TValue = FeatureIndexValue; + Platform & platform = GetPlatform(); LOG(LINFO, ("Start building search index for", container.GetFileName())); my::Timer timer; - StringsFile stringsFile(stringsFilePath); + StringsFile stringsFile(stringsFilePath); CategoriesHolder categoriesHolder(platform.GetReader(SEARCH_CATEGORIES_FILE_NAME)); @@ -341,9 +348,8 @@ void BuildSearchIndex(FilesContainerR & container, Writer & indexWriter, LOG(LINFO, ("End sorting strings:", timer.ElapsedSeconds())); stringsFile.OpenForRead(); - trie::Build::IteratorT, - ValueList>(indexWriter, stringsFile.Begin(), - stringsFile.End()); + trie::Build::IteratorT, ValueList>( + indexWriter, stringsFile.Begin(), stringsFile.End()); LOG(LINFO, ("End building search index, elapsed seconds:", timer.ElapsedSeconds())); } diff --git a/indexer/search_trie.hpp b/indexer/search_trie.hpp index baece68132..0ce0d8904f 100644 --- a/indexer/search_trie.hpp +++ b/indexer/search_trie.hpp @@ -16,7 +16,7 @@ static const uint8_t kPointCodingBits = 20; namespace trie { -using DefaultIterator = trie::Iterator>; +using DefaultIterator = trie::Iterator>; inline serial::CodingParams GetCodingParams(serial::CodingParams const & orig) { diff --git a/indexer/string_file_values.hpp b/indexer/string_file_values.hpp index 45279c7012..105b6a855c 100644 --- a/indexer/string_file_values.hpp +++ b/indexer/string_file_values.hpp @@ -23,31 +23,42 @@ /// A wrapper around feature index. struct FeatureIndexValue { - FeatureIndexValue() : m_value(0) {} + FeatureIndexValue() : m_featureId(0) {} + FeatureIndexValue(uint64_t featureId) : m_featureId(featureId) {} + + // The serialization and deserialization is needed for StringsFile. + // Use ValueList for group serialization in CBVs. template - void Write(TWriter & writer) const + void Serialize(TWriter & writer) const { - WriteToSink(writer, m_value); + WriteToSink(writer, m_featureId); } template - void Read(TReader & reader) + void Deserialize(TReader & reader) { - m_value = ReadPrimitiveFromSource(reader); + ReaderSource src(reader); + DeserializeFromSource(src); } - inline void const * data() const { return &m_value; } + template + void DeserializeFromSource(TSource & src) + { + m_featureId = ReadPrimitiveFromSource(src); + } - inline size_t size() const { return sizeof(m_value); } + inline void const * data() const { return &m_featureId; } - bool operator<(FeatureIndexValue const & value) const { return m_value < value.m_value; } + inline size_t size() const { return sizeof(m_featureId); } - bool operator==(FeatureIndexValue const & value) const { return m_value == value.m_value; } + bool operator<(FeatureIndexValue const & o) const { return m_featureId < o.m_featureId; } - void swap(FeatureIndexValue & value) { ::swap(m_value, value.m_value); } + bool operator==(FeatureIndexValue const & o) const { return m_featureId == o.m_featureId; } - uint64_t m_value; + void Swap(FeatureIndexValue & o) { ::swap(m_featureId, o.m_featureId); } + + uint64_t m_featureId; }; struct FeatureWithRankAndCenter @@ -117,12 +128,20 @@ public: ValueList() : m_cbv(unique_ptr()) {} + ValueList(ValueList const & o) : m_codingParams(o.m_codingParams) + { + if (o.m_cbv) + m_cbv = coding::CompressedBitVectorBuilder::FromCBV(*o.m_cbv); + else + m_cbv = unique_ptr(); + } + void Init(vector const & values) { - vector offsets(values.size()); - for (size_t i = 0; i < offsets.size(); ++i) - offsets[i] = values[i].m_value; - m_cbv = coding::CompressedBitVectorBuilder::FromBitPositions(offsets); + vector ids(values.size()); + for (size_t i = 0; i < ids.size(); ++i) + ids[i] = values[i].m_featureId; + m_cbv = coding::CompressedBitVectorBuilder::FromBitPositions(ids); } // This method returns number of values in the current instance of @@ -161,7 +180,10 @@ public: template void ForEach(TF && f) const { - coding::CompressedBitVectorEnumerator::ForEach(*m_cbv, forward(f)); + coding::CompressedBitVectorEnumerator::ForEach(*m_cbv, [&](uint64_t const bitPosition) + { + f(TValue(bitPosition)); + }); } void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; } diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index c3bcb4c638..9fc8f3e7e4 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -113,7 +113,6 @@ void FullMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar co ASSERT_EQUAL ( symbolsMatched, s.size(), () ); - LOG(LINFO, ("foreach`ing", it->m_valueList.Size())); it->m_valueList.ForEach(f); } @@ -152,11 +151,9 @@ void PrefixMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar } } -template +template class OffsetIntersecter { - using TValue = FeatureWithRankAndCenter; - struct HashFn { size_t operator()(TValue const & v) const { return v.m_featureId; } @@ -232,7 +229,7 @@ struct TrieRootPrefix } }; -template +template class TrieValuesHolder { public: @@ -246,7 +243,7 @@ public: m_index = index; } - void operator()(Query::TTrieValue const & v) + void operator()(TValue const & v) { if (m_filter(v.m_featureId)) m_holder[m_index].push_back(v); @@ -261,7 +258,7 @@ public: } private: - vector> m_holder; + vector> m_holder; size_t m_index; TFilter const & m_filter; }; @@ -380,10 +377,11 @@ template void MatchFeaturesInTrie(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot, TFilter const & filter, ToDo && toDo) { + using TValue = trie::DefaultIterator::TValue; TrieValuesHolder categoriesHolder(filter); bool const categoriesMatched = MatchCategoriesInTrie(params, trieRoot, categoriesHolder); - impl::OffsetIntersecter intersecter(filter); + impl::OffsetIntersecter intersecter(filter); for (size_t i = 0; i < params.m_tokens.size(); ++i) { ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang) diff --git a/search/retrieval.cpp b/search/retrieval.cpp index c715469060..c3521fcebd 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -67,9 +67,8 @@ unique_ptr RetrieveAddressFeatures(MwmSet::MwmHandl ASSERT(value, ()); serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams())); ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG); - auto const trieRoot = - trie::ReadTrie, ValueList>( - SubReaderWrapper(searchReader.GetPtr()), codingParams); + auto const trieRoot = trie::ReadTrie, ValueList>( + SubReaderWrapper(searchReader.GetPtr()), codingParams); auto emptyFilter = [](uint32_t /* featureId */) { diff --git a/search/search_query.cpp b/search/search_query.cpp index 120cc4bf22..30b084467e 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -1611,9 +1611,8 @@ void Query::SearchLocality(MwmValue const * pMwm, Locality & res1, Region & res2 ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG); - auto const trieRoot = - trie::ReadTrie, ValueList>( - SubReaderWrapper(searchReader.GetPtr()), cp); + auto const trieRoot = trie::ReadTrie, ValueList>( + SubReaderWrapper(searchReader.GetPtr()), cp); ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang) { diff --git a/search/search_query.hpp b/search/search_query.hpp index db6ba4c74a..b91575d162 100644 --- a/search/search_query.hpp +++ b/search/search_query.hpp @@ -110,7 +110,7 @@ public: /// @name This stuff is public for implementation classes in search_query.cpp /// Do not use it in client code. //@{ - using TTrieValue = FeatureWithRankAndCenter; + using TTrieValue = FeatureIndexValue; void InitParams(bool localitySearch, SearchQueryParams & params);