[search] Do not store ranks in the search index.

This commit is contained in:
Yury Melnichek 2012-01-25 21:11:47 +03:00 committed by Alex Zolotarev
parent d8ed2e335b
commit 75c45c2f4c
5 changed files with 13 additions and 43 deletions

View file

@ -39,9 +39,9 @@ struct FeatureNameInserter
AddToken(lang, s, m_rank);
}
void AddToken(signed char lang, strings::UniString const & s, uint32_t rank) const
void AddToken(signed char lang, strings::UniString const & s, uint32_t /*rank*/) const
{
m_names.AddString(StringsFile::StringT(s, lang, m_pos, static_cast<uint8_t>(min(rank, 255U))));
m_names.AddString(StringsFile::StringT(s, lang, m_pos));
}
bool operator()(signed char lang, string const & name) const
@ -80,17 +80,6 @@ struct FeatureInserter
}
};
struct MaxValueCalc
{
typedef uint8_t ValueType;
ValueType operator() (void const * p, uint32_t size) const
{
ASSERT_EQUAL(size, 5, ());
return *static_cast<uint8_t const *>(p);
}
};
} // unnamed namespace
void indexer::BuildSearchIndex(FeaturesVector const & featuresVector, Writer & writer,
@ -103,8 +92,7 @@ void indexer::BuildSearchIndex(FeaturesVector const & featuresVector, Writer & w
names.EndAdding();
names.OpenForRead();
trie::Build(writer, names.Begin(), names.End(),
trie::builder::MaxValueEdgeBuilder<MaxValueCalc>());
trie::Build(writer, names.Begin(), names.End(), trie::builder::EmptyEdgeBuilder());
// at this point all readers should be dead
}

View file

@ -20,27 +20,16 @@ struct ValueReader
{
struct ValueType
{
uint8_t m_rank; // Search rank of the feature.
uint32_t m_featureId; // Offset of the featuer.
};
template <typename SourceT> void operator() (SourceT & src, ValueType & value) const
{
value.m_rank = ReadPrimitiveFromSource<uint8_t>(src);
value.m_featureId = ReadPrimitiveFromSource<uint32_t>(src);
}
};
// Edge value: maximum search rank of the subtree is stored.
struct EdgeValueReader
{
typedef uint8_t ValueType;
template <typename SourceT> void operator() (SourceT & src, ValueType & value) const
{
src.Read(&value, 1);
}
};
typedef ::trie::reader::EmptyValueReader EdgeValueReader;
} // namespace search::trie

View file

@ -18,7 +18,6 @@ StringsFile::IdT StringsFile::StringT::Write(TWriter & writer) const
rw::Write(writer, m_name);
WriteVarUint(writer, m_pos);
WriteToSink(writer, m_rank);
return pos;
}
@ -28,15 +27,12 @@ void StringsFile::StringT::Read(TReader & src)
{
rw::Read(src, m_name);
m_pos = ReadVarUint<uint32_t>(src);
m_rank = ReadPrimitiveFromSource<uint8_t>(src);
}
bool StringsFile::StringT::operator < (StringT const & name) const
{
if (m_name != name.m_name)
return m_name < name.m_name;
if (GetRank() != name.GetRank())
return GetRank() > name.GetRank();
if (GetOffset() != name.GetOffset())
return GetOffset() < name.GetOffset();
return false;
@ -44,7 +40,7 @@ bool StringsFile::StringT::operator < (StringT const & name) const
bool StringsFile::StringT::operator == (StringT const & name) const
{
return (m_name == name.m_name && m_pos == name.m_pos && m_rank == name.m_rank);
return (m_name == name.m_name && m_pos == name.m_pos);
}
void StringsFile::AddString(StringT const & s)

View file

@ -21,12 +21,11 @@ public:
{
strings::UniString m_name;
uint32_t m_pos;
uint8_t m_rank;
public:
StringT() {}
StringT(strings::UniString const & name, signed char lang, uint32_t pos, uint8_t rank)
: m_pos(pos), m_rank(rank)
StringT(strings::UniString const & name, signed char lang, uint32_t pos)
: m_pos(pos)
{
m_name.reserve(name.size() + 1);
m_name.push_back(static_cast<uint8_t>(lang));
@ -40,13 +39,11 @@ public:
template <class TCont> void SerializeValue(TCont & cont) const
{
cont.resize(5);
cont[0] = m_rank;
cont.resize(4);
uint32_t const i = SwapIfBigEndian(m_pos);
memcpy(&cont[1], &i, 4);
memcpy(&cont[0], &i, 4);
}
uint8_t GetRank() const { return m_rank; }
uint32_t GetOffset() const { return m_pos; }
bool operator < (StringT const & name) const;
@ -59,7 +56,6 @@ public:
{
m_name.swap(r.m_name);
swap(m_pos, r.m_pos);
swap(m_rank, r.m_rank);
}
};

View file

@ -81,7 +81,7 @@ void FullMatchInTrie(TrieIterator const & trieRoot,
if (!pIter || symbolsMatched != s.size())
return;
for (size_t i = 0; i < pIter->m_value.size(); ++i)
f(pIter->m_value[i].m_featureId, pIter->m_value[i].m_rank);
f(pIter->m_value[i].m_featureId);
}
template <typename F>
@ -113,7 +113,7 @@ void PrefixMatchInTrie(TrieIterator const & trieRoot,
scoped_ptr<search::TrieIterator> pIter(trieQueue.top());
trieQueue.pop();
for (size_t i = 0; i < pIter->m_value.size(); ++i)
f(pIter->m_value[i].m_featureId, pIter->m_value[i].m_rank);
f(pIter->m_value[i].m_featureId);
for (size_t i = 0; i < pIter->m_edge.size(); ++i)
trieQueue.push(pIter->GoToEdge(i));
}
@ -131,7 +131,8 @@ template <class FilterT> struct OffsetIntersecter
explicit OffsetIntersecter(FilterT const & filter)
: m_filter(filter), m_bFirstStep(true) {}
void operator() (uint32_t offset, uint8_t rank)
// TODO: Remove rank from here.
void operator() (uint32_t offset, uint8_t rank = 0)
{
if (!m_filter(offset))
return;