forked from organicmaps/organicmaps
Review fixes.
This commit is contained in:
parent
a818bb4b37
commit
ef97e8dbaf
13 changed files with 123 additions and 126 deletions
|
@ -229,7 +229,7 @@ UNIT_TEST(CompressedBitVector_SerializationDense)
|
|||
cbv->Serialize(writer);
|
||||
}
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
auto cbv = coding::CompressedBitVectorBuilder::Deserialize(reader);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::DeserializeFromReader(reader);
|
||||
TEST(cbv.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
|
||||
|
@ -254,7 +254,7 @@ UNIT_TEST(CompressedBitVector_SerializationSparse)
|
|||
cbv->Serialize(writer);
|
||||
}
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
auto cbv = coding::CompressedBitVectorBuilder::Deserialize(reader);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::DeserializeFromReader(reader);
|
||||
TEST(cbv.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
|
||||
|
|
|
@ -231,6 +231,14 @@ void DenseCBV::Serialize(Writer & writer) const
|
|||
rw::WriteVectorOfPOD(writer, m_bitGroups);
|
||||
}
|
||||
|
||||
unique_ptr<CompressedBitVector> DenseCBV::Clone() const
|
||||
{
|
||||
DenseCBV * cbv = new DenseCBV();
|
||||
cbv->m_popCount = m_popCount;
|
||||
cbv->m_bitGroups = m_bitGroups;
|
||||
return unique_ptr<CompressedBitVector>(cbv);
|
||||
}
|
||||
|
||||
SparseCBV::SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
|
||||
{
|
||||
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
|
||||
|
@ -267,6 +275,13 @@ void SparseCBV::Serialize(Writer & writer) const
|
|||
rw::WriteVectorOfPOD(writer, m_positions);
|
||||
}
|
||||
|
||||
unique_ptr<CompressedBitVector> SparseCBV::Clone() const
|
||||
{
|
||||
SparseCBV * cbv = new SparseCBV();
|
||||
cbv->m_positions = m_positions;
|
||||
return unique_ptr<CompressedBitVector>(cbv);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(
|
||||
vector<uint64_t> const & setBits)
|
||||
|
@ -290,7 +305,7 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(
|
|||
while (!bitGroups.empty() && bitGroups.back() == 0)
|
||||
bitGroups.pop_back();
|
||||
if (bitGroups.empty())
|
||||
return make_unique<SparseCBV>(bitGroups);
|
||||
return make_unique<SparseCBV>(move(bitGroups));
|
||||
|
||||
uint64_t const maxBit = kBlockSize * (bitGroups.size() - 1) + bits::CeilLog(bitGroups.back());
|
||||
uint64_t popCount = 0;
|
||||
|
@ -312,28 +327,6 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(
|
|||
return make_unique<SparseCBV>(setBits);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromCBV(CompressedBitVector const & cbv)
|
||||
{
|
||||
auto strat = cbv.GetStorageStrategy();
|
||||
switch (strat)
|
||||
{
|
||||
case CompressedBitVector::StorageStrategy::Dense:
|
||||
{
|
||||
DenseCBV const & dense = static_cast<DenseCBV const &>(cbv);
|
||||
auto bitGroups = dense.m_bitGroups;
|
||||
return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups));
|
||||
}
|
||||
case CompressedBitVector::StorageStrategy::Sparse:
|
||||
{
|
||||
SparseCBV const & sparse = static_cast<SparseCBV const &>(cbv);
|
||||
return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions);
|
||||
}
|
||||
}
|
||||
CHECK(false, ("Unknown strategy when building a compressed bit vector."));
|
||||
return unique_ptr<CompressedBitVector>();
|
||||
}
|
||||
|
||||
string DebugPrint(CompressedBitVector::StorageStrategy strat)
|
||||
{
|
||||
switch (strat)
|
||||
|
|
|
@ -63,6 +63,9 @@ public:
|
|||
// todo(@pimenov). Think about rewriting Serialize and Deserialize to use the
|
||||
// code in old_compressed_bit_vector.{c,h}pp.
|
||||
virtual void Serialize(Writer & writer) const = 0;
|
||||
|
||||
// Copies a bit vector and returns a pointer to the copy.
|
||||
virtual unique_ptr<CompressedBitVector> Clone() const = 0;
|
||||
};
|
||||
|
||||
string DebugPrint(CompressedBitVector::StorageStrategy strat);
|
||||
|
@ -105,6 +108,7 @@ public:
|
|||
bool GetBit(uint64_t pos) const override;
|
||||
StorageStrategy GetStorageStrategy() const override;
|
||||
void Serialize(Writer & writer) const override;
|
||||
unique_ptr<CompressedBitVector> Clone() const override;
|
||||
|
||||
private:
|
||||
vector<uint64_t> m_bitGroups;
|
||||
|
@ -117,6 +121,8 @@ public:
|
|||
friend class CompressedBitVectorBuilder;
|
||||
using TIterator = vector<uint64_t>::const_iterator;
|
||||
|
||||
SparseCBV() = default;
|
||||
|
||||
explicit SparseCBV(vector<uint64_t> const & setBits);
|
||||
|
||||
explicit SparseCBV(vector<uint64_t> && setBits);
|
||||
|
@ -136,6 +142,7 @@ public:
|
|||
bool GetBit(uint64_t pos) const override;
|
||||
StorageStrategy GetStorageStrategy() const override;
|
||||
void Serialize(Writer & writer) const override;
|
||||
unique_ptr<CompressedBitVector> Clone() const override;
|
||||
|
||||
inline TIterator Begin() const { return m_positions.cbegin(); }
|
||||
inline TIterator End() const { return m_positions.cend(); }
|
||||
|
@ -155,15 +162,13 @@ public:
|
|||
|
||||
// Chooses a strategy to store the bit vector with bits from a bitmap obtained
|
||||
// by concatenating the elements of bitGroups.
|
||||
static unique_ptr<CompressedBitVector> FromBitGroups(vector<uint64_t> & bitGroups);
|
||||
static unique_ptr<CompressedBitVector> FromBitGroups(vector<uint64_t> && bitGroups);
|
||||
|
||||
// Copies a CBV.
|
||||
static unique_ptr<CompressedBitVector> FromCBV(CompressedBitVector const & cbv);
|
||||
|
||||
// Reads a bit vector from reader which must contain a valid
|
||||
// bit vector representation (see CompressedBitVector::Serialize for the format).
|
||||
template <typename TReader>
|
||||
static unique_ptr<CompressedBitVector> Deserialize(TReader & reader)
|
||||
static unique_ptr<CompressedBitVector> DeserializeFromReader(TReader & reader)
|
||||
{
|
||||
ReaderSource<TReader> src(reader);
|
||||
return DeserializeFromSource(src);
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#include "generator/dumper.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/feature_processor.hpp"
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
#include "indexer/search_trie.hpp"
|
||||
|
||||
#include "coding/multilang_utf8_string.hpp"
|
||||
|
@ -12,9 +12,9 @@
|
|||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/bind.hpp"
|
||||
#include "std/functional.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
#include "std/map.hpp"
|
||||
#include "std/queue.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
namespace
|
||||
|
@ -22,40 +22,30 @@ namespace
|
|||
template <typename TValue>
|
||||
struct SearchTokensCollector
|
||||
{
|
||||
priority_queue<pair<uint32_t, strings::UniString>> tokens;
|
||||
strings::UniString m_currentS;
|
||||
uint32_t m_currentCount;
|
||||
|
||||
SearchTokensCollector() : m_currentS(), m_currentCount(0) {}
|
||||
|
||||
void operator()(strings::UniString const & s, TValue const & /* value */)
|
||||
{
|
||||
if (m_currentS == s)
|
||||
{
|
||||
++m_currentCount;
|
||||
}
|
||||
else
|
||||
if (m_currentS != s)
|
||||
{
|
||||
if (m_currentCount > 0)
|
||||
{
|
||||
tokens.push(make_pair(m_currentCount, m_currentS));
|
||||
if (tokens.size() > 100)
|
||||
tokens.pop();
|
||||
}
|
||||
m_tokens.emplace_back(m_currentCount, m_currentS);
|
||||
m_currentS = s;
|
||||
m_currentCount = 0;
|
||||
}
|
||||
++m_currentCount;
|
||||
}
|
||||
|
||||
void Finish()
|
||||
{
|
||||
if (m_currentCount > 0)
|
||||
{
|
||||
tokens.push(make_pair(m_currentCount, m_currentS));
|
||||
if (tokens.size() > 100)
|
||||
tokens.pop();
|
||||
}
|
||||
m_tokens.emplace_back(m_currentCount, m_currentS);
|
||||
sort(m_tokens.begin(), m_tokens.end(), greater<pair<uint32_t, strings::UniString>>());
|
||||
}
|
||||
|
||||
vector<pair<uint32_t, strings::UniString>> m_tokens;
|
||||
strings::UniString m_currentS;
|
||||
uint32_t m_currentCount;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
@ -198,7 +188,7 @@ namespace feature
|
|||
}
|
||||
}
|
||||
|
||||
void DumpSearchTokens(string const & fPath)
|
||||
void DumpSearchTokens(string const & fPath, size_t maxTokensToShow)
|
||||
{
|
||||
using TValue = FeatureIndexValue;
|
||||
|
||||
|
@ -213,11 +203,11 @@ namespace feature
|
|||
trie::ForEachRef(*trieRoot, f, strings::UniString());
|
||||
f.Finish();
|
||||
|
||||
while (!f.tokens.empty())
|
||||
auto freqTokenPairs = f.m_tokens;
|
||||
for (size_t i = 0; i < min(maxTokensToShow, freqTokenPairs.size()); ++i)
|
||||
{
|
||||
strings::UniString const & s = f.tokens.top().second;
|
||||
cout << f.tokens.top().first << " '" << strings::ToUtf8(s) << "'" << endl;
|
||||
f.tokens.pop();
|
||||
auto const & s = f.m_tokens[i].second;
|
||||
cout << f.m_tokens[i].first << " " << strings::ToUtf8(s) << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,5 +6,9 @@ namespace feature
|
|||
{
|
||||
void DumpTypes(string const & fPath);
|
||||
void DumpPrefixes(string const & fPath);
|
||||
void DumpSearchTokens(string const & fPath);
|
||||
|
||||
// Writes top maxTokensToShow tokens sorted by their
|
||||
// frequency, i.e. by the number of features in
|
||||
// an mwm that contain the token in their name.
|
||||
void DumpSearchTokens(string const & fPath, size_t maxTokensToShow);
|
||||
}
|
||||
|
|
|
@ -245,7 +245,7 @@ int main(int argc, char ** argv)
|
|||
feature::DumpPrefixes(datFile);
|
||||
|
||||
if (FLAGS_dump_search_tokens)
|
||||
feature::DumpSearchTokens(datFile);
|
||||
feature::DumpSearchTokens(datFile, 100 /* maxTokensToShow */);
|
||||
|
||||
if (FLAGS_unpack_mwm)
|
||||
UnpackMwm(datFile);
|
||||
|
|
|
@ -47,7 +47,8 @@ struct KeyValuePair
|
|||
template <class TString>
|
||||
KeyValuePair(TString const & key, int value)
|
||||
: m_key(key.begin(), key.end()), m_value(value)
|
||||
{}
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t GetKeySize() const { return m_key.size(); }
|
||||
trie::TrieChar const * GetKeyData() const { return m_key.data(); }
|
||||
|
@ -57,12 +58,12 @@ struct KeyValuePair
|
|||
|
||||
inline size_t value_size() const { return sizeof(m_value); }
|
||||
|
||||
bool operator == (KeyValuePair const & p) const
|
||||
bool operator==(KeyValuePair const & p) const
|
||||
{
|
||||
return (m_key == p.m_key && m_value == p.m_value);
|
||||
}
|
||||
|
||||
bool operator < (KeyValuePair const & p) const
|
||||
bool operator<(KeyValuePair const & p) const
|
||||
{
|
||||
return ((m_key != p.m_key) ? m_key < p.m_key : m_value < p.m_value);
|
||||
}
|
||||
|
@ -94,8 +95,7 @@ struct KeyValuePairBackInserter
|
|||
};
|
||||
|
||||
// The SingleValueSerializer and ValueList classes are similar to
|
||||
// those in indexer/string_file_values.hpp but that file
|
||||
// is not included to avoid coding_tests's dependency from indexer.
|
||||
// those in indexer/string_file_values.hpp.
|
||||
template <typename TPrimitive>
|
||||
class SingleValueSerializer
|
||||
{
|
||||
|
@ -179,30 +179,29 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke)
|
|||
valueList.Init({'1', '2', '3'});
|
||||
trie::WriteNode(sink, SingleValueSerializer<char>(), 0, valueList, &children[0],
|
||||
&children[0] + ARRAY_SIZE(children));
|
||||
uint8_t const expected [] =
|
||||
{
|
||||
BOOST_BINARY(11000101), // Header: [0b11] [0b000101]
|
||||
3, // Number of values
|
||||
'1', '2', '3', // Values
|
||||
BOOST_BINARY(10000001), // Child 1: header: [+leaf] [-supershort] [2 symbols]
|
||||
MKUC(ZENC(MKSC('1'))), MKUC(ZENC(MKSC('A') - MKSC('1'))), // Child 1: edge
|
||||
1, // Child 1: size
|
||||
MKUC(64 | ZENC(MKSC('B') - MKSC('1'))), // Child 2: header: [-leaf] [+supershort]
|
||||
2, // Child 2: size
|
||||
BOOST_BINARY(00000001), // Child 3: header: [-leaf] [-supershort] [2 symbols]
|
||||
MKUC(ZENC(MKSC('z') - MKSC('B'))), 0, // Child 3: edge
|
||||
3, // Child 3: size
|
||||
BOOST_BINARY(10111111), // Child 4: header: [+leaf] [-supershort] [>= 63 symbols]
|
||||
69, // Child 4: edgeSize - 1
|
||||
MKUC(ZENC(MKSC('a') - MKSC('z'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
|
||||
4, // Child 4: size
|
||||
MKUC(BOOST_BINARY(11000000) | ZENC(0)), // Child 5: header: [+leaf] [+supershort]
|
||||
uint8_t const expected[] = {
|
||||
BOOST_BINARY(11000101), // Header: [0b11] [0b000101]
|
||||
3, // Number of values
|
||||
'1', '2', '3', // Values
|
||||
BOOST_BINARY(10000001), // Child 1: header: [+leaf] [-supershort] [2 symbols]
|
||||
MKUC(ZENC(MKSC('1'))), MKUC(ZENC(MKSC('A') - MKSC('1'))), // Child 1: edge
|
||||
1, // Child 1: size
|
||||
MKUC(64 | ZENC(MKSC('B') - MKSC('1'))), // Child 2: header: [-leaf] [+supershort]
|
||||
2, // Child 2: size
|
||||
BOOST_BINARY(00000001), // Child 3: header: [-leaf] [-supershort] [2 symbols]
|
||||
MKUC(ZENC(MKSC('z') - MKSC('B'))), 0, // Child 3: edge
|
||||
3, // Child 3: size
|
||||
BOOST_BINARY(10111111), // Child 4: header: [+leaf] [-supershort] [>= 63 symbols]
|
||||
69, // Child 4: edgeSize - 1
|
||||
MKUC(ZENC(MKSC('a') - MKSC('z'))), 2, 2, 2, 2, 2, 2, 2, 2, 2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2, 2, 2, 2, 2, 2, 2, 2, 2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2, 2, 2, 2, 2, 2, 2, 2, 2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2, 2, 2, 2, 2, 2, 2, 2, 2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2, 2, 2, 2, 2, 2, 2, 2, 2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2, 2, 2, 2, 2, 2, 2, 2, 2, // Child 4: edge
|
||||
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2, 2, 2, 2, 2, 2, 2, 2, 2, // Child 4: edge
|
||||
4, // Child 4: size
|
||||
MKUC(BOOST_BINARY(11000000) | ZENC(0)), // Child 5: header: [+leaf] [+supershort]
|
||||
};
|
||||
|
||||
TEST_EQUAL(buf, vector<uint8_t>(&expected[0], &expected[0] + ARRAY_SIZE(expected)), ());
|
||||
|
@ -216,7 +215,7 @@ UNIT_TEST(TrieBuilder_Build)
|
|||
vector<string> possibleStrings(1, string());
|
||||
for (int len = 1; len <= maxLen; ++len)
|
||||
{
|
||||
for (int i = 0, p = static_cast<int>(pow((double) base, len)); i < p; ++i)
|
||||
for (int i = 0, p = static_cast<int>(pow((double)base, len)); i < p; ++i)
|
||||
{
|
||||
string s(len, 'A');
|
||||
int t = i;
|
||||
|
@ -232,28 +231,31 @@ UNIT_TEST(TrieBuilder_Build)
|
|||
for (int i0 = -1; i0 < count; ++i0)
|
||||
for (int i1 = i0; i1 < count; ++i1)
|
||||
for (int i2 = i1; i2 < count; ++i2)
|
||||
{
|
||||
vector<KeyValuePair> v;
|
||||
if (i0 >= 0) v.push_back(KeyValuePair(possibleStrings[i0], i0));
|
||||
if (i1 >= 0) v.push_back(KeyValuePair(possibleStrings[i1], i1 + 10));
|
||||
if (i2 >= 0) v.push_back(KeyValuePair(possibleStrings[i2], i2 + 100));
|
||||
vector<string> vs;
|
||||
for (size_t i = 0; i < v.size(); ++i)
|
||||
vs.push_back(string(v[i].m_key.begin(), v[i].m_key.end()));
|
||||
{
|
||||
vector<KeyValuePair> v;
|
||||
if (i0 >= 0)
|
||||
v.push_back(KeyValuePair(possibleStrings[i0], i0));
|
||||
if (i1 >= 0)
|
||||
v.push_back(KeyValuePair(possibleStrings[i1], i1 + 10));
|
||||
if (i2 >= 0)
|
||||
v.push_back(KeyValuePair(possibleStrings[i2], i2 + 100));
|
||||
vector<string> vs;
|
||||
for (size_t i = 0; i < v.size(); ++i)
|
||||
vs.push_back(string(v[i].m_key.begin(), v[i].m_key.end()));
|
||||
|
||||
vector<uint8_t> buf;
|
||||
PushBackByteSink<vector<uint8_t>> sink(buf);
|
||||
SingleValueSerializer<uint32_t> serializer;
|
||||
trie::Build<PushBackByteSink<vector<uint8_t>>, typename vector<KeyValuePair>::iterator,
|
||||
ValueList<uint32_t>>(sink, serializer, v.begin(), v.end());
|
||||
reverse(buf.begin(), buf.end());
|
||||
vector<uint8_t> buf;
|
||||
PushBackByteSink<vector<uint8_t>> sink(buf);
|
||||
SingleValueSerializer<uint32_t> serializer;
|
||||
trie::Build<PushBackByteSink<vector<uint8_t>>, typename vector<KeyValuePair>::iterator,
|
||||
ValueList<uint32_t>>(sink, serializer, v.begin(), v.end());
|
||||
reverse(buf.begin(), buf.end());
|
||||
|
||||
MemReader memReader = MemReader(&buf[0], buf.size());
|
||||
auto const root = trie::ReadTrie<MemReader, ValueList<uint32_t>>(memReader, serializer);
|
||||
vector<KeyValuePair> res;
|
||||
KeyValuePairBackInserter f;
|
||||
trie::ForEachRef(*root, f, vector<trie::TrieChar>());
|
||||
sort(f.m_v.begin(), f.m_v.end());
|
||||
TEST_EQUAL(v, f.m_v, ());
|
||||
}
|
||||
MemReader memReader = MemReader(&buf[0], buf.size());
|
||||
auto const root = trie::ReadTrie<MemReader, ValueList<uint32_t>>(memReader, serializer);
|
||||
vector<KeyValuePair> res;
|
||||
KeyValuePairBackInserter f;
|
||||
trie::ForEachRef(*root, f, vector<trie::TrieChar>());
|
||||
sort(f.m_v.begin(), f.m_v.end());
|
||||
TEST_EQUAL(v, f.m_v, ());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include "indexer/trie_builder.hpp"
|
||||
#include "indexer/types_skipper.hpp"
|
||||
|
||||
#include "search/search_common.hpp" // for MAX_TOKENS constant
|
||||
#include "search/search_common.hpp"
|
||||
|
||||
#include "defines.hpp"
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ struct FeatureIndexValue
|
|||
|
||||
bool operator==(FeatureIndexValue const & o) const { return m_featureId == o.m_featureId; }
|
||||
|
||||
void Swap(FeatureIndexValue & o) { ::swap(m_featureId, o.m_featureId); }
|
||||
void Swap(FeatureIndexValue & o) { swap(m_featureId, o.m_featureId); }
|
||||
|
||||
uint64_t m_featureId;
|
||||
};
|
||||
|
@ -150,7 +150,7 @@ public:
|
|||
ValueList(ValueList<FeatureIndexValue> const & o)
|
||||
{
|
||||
if (o.m_cbv)
|
||||
m_cbv = coding::CompressedBitVectorBuilder::FromCBV(*o.m_cbv);
|
||||
m_cbv = o.m_cbv->Clone();
|
||||
}
|
||||
|
||||
void Init(vector<FeatureIndexValue> const & values)
|
||||
|
@ -158,7 +158,7 @@ public:
|
|||
vector<uint64_t> ids(values.size());
|
||||
for (size_t i = 0; i < ids.size(); ++i)
|
||||
ids[i] = values[i].m_featureId;
|
||||
m_cbv = coding::CompressedBitVectorBuilder::FromBitPositions(ids);
|
||||
m_cbv = coding::CompressedBitVectorBuilder::FromBitPositions(move(ids));
|
||||
}
|
||||
|
||||
// This method returns number of values in the current instance of
|
||||
|
@ -172,7 +172,7 @@ public:
|
|||
return (m_cbv && m_cbv->PopCount() != 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
bool IsEmpty() const { return !m_cbv || m_cbv->PopCount() == 0; }
|
||||
bool IsEmpty() const { return Size() == 0; }
|
||||
|
||||
template <typename TSink>
|
||||
void Serialize(TSink & sink, SingleValueSerializer<TValue> const & /* serializer */) const
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
// -- Serialized Huffman encoding.
|
||||
// -- Topology of the trie built on Huffman-encoded input strings [2 bits per node, level-order representation].
|
||||
// -- List of pairs (node id, offset). One pair per final node (i.e. a node where a string ends).
|
||||
// The lists of node ids and offsets are both non-decreasing and are delta-encoded with varuints.
|
||||
// The lists of node ids and offsets are both non-decreasing and are delta-encoded with
|
||||
// varuints.
|
||||
// -- Values of final nodes in level-order. The values for final node |id| start at offset |offset|
|
||||
// if there is a pair (id, offset) in the list above.
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
|
||||
namespace trie
|
||||
{
|
||||
|
||||
typedef uint32_t TrieChar;
|
||||
|
||||
// 95 is a good value for the default baseChar, since both small and capital latin letters
|
||||
|
@ -19,7 +18,7 @@ static uint32_t const DEFAULT_CHAR = 0;
|
|||
template <typename TValueList>
|
||||
class Iterator
|
||||
{
|
||||
//dbg::ObjectTracker m_tracker;
|
||||
// dbg::ObjectTracker m_tracker;
|
||||
|
||||
public:
|
||||
using TValue = typename TValueList::TValue;
|
||||
|
@ -46,7 +45,7 @@ struct EmptyValueReader
|
|||
EmptyValueReader() = default;
|
||||
|
||||
template <typename SourceT>
|
||||
void operator() (SourceT &, ValueType & value) const
|
||||
void operator()(SourceT &, ValueType & value) const
|
||||
{
|
||||
value = 0;
|
||||
}
|
||||
|
@ -61,7 +60,7 @@ struct FixedSizeValueReader
|
|||
};
|
||||
|
||||
template <typename SourceT>
|
||||
void operator() (SourceT & src, ValueType & value) const
|
||||
void operator()(SourceT & src, ValueType & value) const
|
||||
{
|
||||
src.Read(&value.m_data[0], N);
|
||||
}
|
||||
|
|
|
@ -49,15 +49,18 @@ void WriteNode(TSink & sink, TSerializer const & serializer, TrieChar baseChar,
|
|||
uint32_t const valueCount = valueList.Size();
|
||||
if (begChild == endChild && !isRoot)
|
||||
{
|
||||
// Leaf node.
|
||||
// Leaf node.
|
||||
#ifdef DEBUG
|
||||
auto posBefore = sink.Pos();
|
||||
#endif
|
||||
|
||||
valueList.Serialize(sink, serializer);
|
||||
|
||||
#ifdef DEBUG
|
||||
if (valueCount == 0)
|
||||
ASSERT_EQUAL(sink.Pos(), posBefore, ("Empty valueList must produce an empty serialization."));
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
uint32_t const childCount = endChild - begChild;
|
||||
|
@ -215,7 +218,8 @@ void AppendValue(TNodeInfo & node, TValue const & value)
|
|||
// sorted order and we can avoid sorting them before doing
|
||||
// further operations such as ValueList construction.
|
||||
using namespace std::rel_ops;
|
||||
ASSERT(node.m_temporaryValueList.empty() || node.m_temporaryValueList.back() <= value, ());
|
||||
ASSERT(node.m_temporaryValueList.empty() || node.m_temporaryValueList.back() <= value,
|
||||
(node.m_temporaryValueList.size()));
|
||||
if (!node.m_temporaryValueList.empty() && node.m_temporaryValueList.back() == value)
|
||||
return;
|
||||
if (node.m_mayAppend)
|
||||
|
|
|
@ -31,10 +31,9 @@ public:
|
|||
return make_unique<LeafIterator0<TValueList, TSerializer>>(*this);
|
||||
}
|
||||
|
||||
unique_ptr<Iterator<TValueList>> GoToEdge(size_t i) const override
|
||||
unique_ptr<Iterator<TValueList>> GoToEdge(size_t /* i */) const override
|
||||
{
|
||||
ASSERT(false, (i));
|
||||
UNUSED_VALUE(i);
|
||||
ASSERT(false, ());
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
@ -63,7 +62,7 @@ public:
|
|||
{
|
||||
ASSERT_LESS(i, this->m_edge.size(), ());
|
||||
uint32_t const offset = m_edgeInfo[i].m_offset;
|
||||
uint32_t const size = m_edgeInfo[i+1].m_offset - offset;
|
||||
uint32_t const size = m_edgeInfo[i + 1].m_offset - offset;
|
||||
|
||||
if (m_edgeInfo[i].m_isLeaf)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue