diff --git a/coding/coding_tests/trie_test.cpp b/coding/coding_tests/trie_test.cpp index 9460f33d30..ec86f3c637 100644 --- a/coding/coding_tests/trie_test.cpp +++ b/coding/coding_tests/trie_test.cpp @@ -167,11 +167,9 @@ public: template void Deserialize(TSource & src) { + m_values.clear(); while (src.Size() > 0) - { - m_values.push_back(TValue()); - m_values.back() = ReadPrimitiveFromSource(src); - } + m_values.emplace_back(ReadPrimitiveFromSource(src)); } template diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index d6ef51dd03..da80b04d42 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -185,6 +185,7 @@ DenseCBV::DenseCBV(vector const & setBits) { if (setBits.empty()) { + m_popCount = 0; return; } uint64_t const maxBit = *max_element(setBits.begin(), setBits.end()); diff --git a/coding/trie.hpp b/coding/trie.hpp index d9433e7f3d..d8de8d1e68 100644 --- a/coding/trie.hpp +++ b/coding/trie.hpp @@ -4,8 +4,6 @@ #include "base/base.hpp" #include "base/buffer_vector.hpp" -#include "indexer/string_file_values.hpp" - #include "std/unique_ptr.hpp" namespace trie diff --git a/coding/trie_builder.hpp b/coding/trie_builder.hpp index b9f22d3a14..7231548bbe 100644 --- a/coding/trie_builder.hpp +++ b/coding/trie_builder.hpp @@ -44,14 +44,15 @@ template void WriteNode(TSink & sink, TrieChar baseChar, TValueList const & valueList, TChildIter const begChild, TChildIter const endChild, bool isRoot = false) { + uint32_t const valueCount = valueList.Size(); if (begChild == endChild && !isRoot) { // Leaf node. + WriteVarUint(sink, valueCount); valueList.Serialize(sink); return; } uint32_t const childCount = endChild - begChild; - uint32_t const valueCount = valueList.Size(); uint8_t const header = static_cast((min(valueCount, 3U) << 6) + min(childCount, 63U)); sink.Write(&header, 1); if (valueCount >= 3) @@ -140,8 +141,11 @@ struct NodeInfo TValueList m_valueList; bool m_mayAppend; - NodeInfo() : m_begPos(0), m_char(0) {} - NodeInfo(uint64_t pos, TrieChar trieChar) : m_begPos(pos), m_char(trieChar), m_mayAppend(true) {} + NodeInfo() : m_begPos(0), m_char(0), m_valueList(TValueList()), m_mayAppend(true) {} + NodeInfo(uint64_t pos, TrieChar trieChar) + : m_begPos(pos), m_char(trieChar), m_valueList(TValueList()), m_mayAppend(true) + { + } // It is finalized in the sense that no more appends are possible // so it is a fine moment to initialize the underlying ValueList. diff --git a/coding/trie_reader.hpp b/coding/trie_reader.hpp index 6d20ffd265..878323f841 100644 --- a/coding/trie_reader.hpp +++ b/coding/trie_reader.hpp @@ -22,8 +22,9 @@ public: LeafIterator0(TReader const & reader, serial::CodingParams const & codingParams) { ReaderSource src(reader); + uint32_t valueCount = ReadVarUint(src); m_valueList.SetCodingParams(codingParams); - m_valueList.Deserialize(src); + m_valueList.Deserialize(src, valueCount); // todo(@mpimenov) There used to be an assert here // that src is completely exhausted by this time. } @@ -69,8 +70,10 @@ public: uint32_t const size = m_edgeInfo[i+1].m_offset - offset; if (m_edgeInfo[i].m_isLeaf) + { return make_unique>(m_reader.SubReader(offset, size), m_codingParams); + } return make_unique>( m_reader.SubReader(offset, size), this->m_edge[i].m_str.back(), m_codingParams); diff --git a/generator/dumper.cpp b/generator/dumper.cpp index d118ca766a..0704513fd1 100644 --- a/generator/dumper.cpp +++ b/generator/dumper.cpp @@ -200,11 +200,11 @@ namespace feature feature::DataHeader header(container); serial::CodingParams codingParams(trie::GetCodingParams(header.GetDefCodingParams())); - auto const pTrieRoot = trie::ReadTrie>( + auto const trieRoot = trie::ReadTrie>( container.GetReader(SEARCH_INDEX_FILE_TAG), codingParams); SearchTokensCollector f; - trie::ForEachRef(*pTrieRoot, f, strings::UniString()); + trie::ForEachRef(*trieRoot, f, strings::UniString()); f.Finish(); while (!f.tokens.empty()) diff --git a/indexer/search_index_builder.cpp b/indexer/search_index_builder.cpp index 891b4b5e45..1afa4abe3f 100644 --- a/indexer/search_index_builder.cpp +++ b/indexer/search_index_builder.cpp @@ -297,8 +297,8 @@ bool BuildSearchIndexFromDatFile(string const & datFile, bool forceRebuild) my::GetNameFromFullPath(mwmName); my::GetNameWithoutExt(mwmName); string const indexFilePath = platform.WritablePathForFile(mwmName + ".sdx.tmp"); - string const stringsFilePath = platform.WritablePathForFile(mwmName + ".sdx.strings.tmp"); MY_SCOPE_GUARD(indexFileGuard, bind(&FileWriter::DeleteFileX, indexFilePath)); + string const stringsFilePath = platform.WritablePathForFile(mwmName + ".sdx.strings.tmp"); MY_SCOPE_GUARD(stringsFileGuard, bind(&FileWriter::DeleteFileX, stringsFilePath)); try diff --git a/indexer/string_file.hpp b/indexer/string_file.hpp index 105f7cf94b..72feca6004 100644 --- a/indexer/string_file.hpp +++ b/indexer/string_file.hpp @@ -199,7 +199,7 @@ private: QValue(TString const & s, size_t i) : m_string(s), m_index(i) {} - inline bool operator>(QValue const & rhs) const { return !(m_string < rhs.m_string); } + inline bool operator>(QValue const & rhs) const { return !(m_string < rhs.m_string) && !(m_string == rhs.m_string); } }; priority_queue, greater> m_queue; diff --git a/indexer/string_file_values.hpp b/indexer/string_file_values.hpp index 105b6a855c..8031a88fd3 100644 --- a/indexer/string_file_values.hpp +++ b/indexer/string_file_values.hpp @@ -63,7 +63,10 @@ struct FeatureIndexValue struct FeatureWithRankAndCenter { - FeatureWithRankAndCenter() = default; + FeatureWithRankAndCenter() + : m_pt(m2::PointD()), m_featureId(0), m_rank(0), m_codingParams(serial::CodingParams()) + { + } FeatureWithRankAndCenter(m2::PointD pt, uint32_t featureId, uint8_t rank, serial::CodingParams codingParams) @@ -108,7 +111,7 @@ struct FeatureWithRankAndCenter void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; } m2::PointD m_pt; // Center point of the feature. - uint32_t m_featureId; // Offset of the feature. + uint32_t m_featureId; // Feature identifier. uint8_t m_rank; // Rank of the feature. serial::CodingParams m_codingParams; }; @@ -126,7 +129,10 @@ class ValueList public: using TValue = FeatureIndexValue; - ValueList() : m_cbv(unique_ptr()) {} + ValueList() + : m_cbv(unique_ptr()), m_codingParams(serial::CodingParams()) + { + } ValueList(ValueList const & o) : m_codingParams(o.m_codingParams) { @@ -150,37 +156,54 @@ public: // compressed bit vector, this method returns 1 when there're at // least one feature's index in the list - so, compressed bit // vector will be built and serialized - and 0 otherwise. - size_t Size() const { return m_cbv->PopCount() == 0 ? 0 : 1; } + size_t Size() const + { + if (!m_cbv) + return 0; + return m_cbv->PopCount() == 0 ? 0 : 1; + } - bool IsEmpty() const { return m_cbv->PopCount(); } + bool IsEmpty() const + { + if (!m_cbv) + return true; + return m_cbv->PopCount() == 0; + } template void Serialize(TSink & sink) const { + if (IsEmpty()) + return; vector buf; MemWriter> writer(buf); m_cbv->Serialize(writer); sink.Write(buf.data(), buf.size()); } - // Note the default parameter. It is here for compatibility with + // Note the valueCount parameter. It is here for compatibility with // an old data format that was serializing FeatureWithRankAndCenter`s. // They were put in a vector, this vector's size was encoded somehow // and then the vector was written with a method similar to Serialize above. // The deserialization code read the valueCount separately and then // read each FeatureWithRankAndCenter one by one. - // A newer approach is to make Serialize/Deserialize responsible for - // every part of serialization and as such it does not need valueCount. + // A better approach is to make Serialize/Deserialize responsible for + // every part of serialization and as such it should not need valueCount. template - void Deserialize(TSource & src, uint32_t valueCount = 0) + void Deserialize(TSource & src, uint32_t valueCount) { - m_cbv = coding::CompressedBitVectorBuilder::DeserializeFromSource(src); + if (valueCount > 0) + m_cbv = coding::CompressedBitVectorBuilder::DeserializeFromSource(src); + else + m_cbv = unique_ptr(); } template void ForEach(TF && f) const { - coding::CompressedBitVectorEnumerator::ForEach(*m_cbv, [&](uint64_t const bitPosition) + if (!m_cbv) + return; + coding::CompressedBitVectorEnumerator::ForEach(*m_cbv, [&f](uint64_t const bitPosition) { f(TValue(bitPosition)); }); @@ -201,7 +224,8 @@ class ValueList public: using TValue = FeatureWithRankAndCenter; - ValueList() = default; + ValueList() : m_codingParams(serial::CodingParams()) {} + ValueList(serial::CodingParams const & codingParams) : m_codingParams(codingParams) {} void Init(vector const & values) { m_values = values; } @@ -230,17 +254,12 @@ public: template void Deserialize(TSource & src) { - uint32_t const size = static_cast(src.Size()); - while (src.Pos() < size) + m_values.clear(); + while (src.Size() > 0) { -#ifdef DEBUG - uint64_t const pos = src.Pos(); -#endif m_values.push_back(TValue()); m_values.back().DeserializeFromSource(src); - ASSERT_NOT_EQUAL(pos, src.Pos(), ()); } - ASSERT_EQUAL(size, src.Pos(), ()); } template