forked from organicmaps/organicmaps
[search] New index generation algorithm.
This commit is contained in:
parent
edc3df3fc6
commit
1e5a568fcd
3 changed files with 120 additions and 62 deletions
|
@ -97,16 +97,11 @@ void indexer::BuildSearchIndex(FeaturesVector const & featuresVector, Writer & w
|
|||
string const & tmpFilePath)
|
||||
{
|
||||
{
|
||||
StringsFile names;
|
||||
StringsFile names(tmpFilePath);
|
||||
featuresVector.ForEachOffset(FeatureInserter(names));
|
||||
|
||||
{
|
||||
FileWriter writer(tmpFilePath);
|
||||
names.OpenForWrite(&writer);
|
||||
featuresVector.ForEachOffset(FeatureInserter(names));
|
||||
}
|
||||
|
||||
names.OpenForRead(new FileReader(tmpFilePath));
|
||||
names.SortStrings();
|
||||
names.EndAdding();
|
||||
names.OpenForRead();
|
||||
|
||||
trie::Build(writer, names.Begin(), names.End(),
|
||||
trie::builder::MaxValueEdgeBuilder<MaxValueCalc>());
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
#include "string_file.hpp"
|
||||
|
||||
#include "../coding/read_write_utils.hpp"
|
||||
#include "../coding/reader.hpp"
|
||||
#include "../coding/writer.hpp"
|
||||
#include "../coding/file_reader.hpp"
|
||||
#include "../coding/file_writer.hpp"
|
||||
|
||||
#include "../base/logging.hpp"
|
||||
|
||||
#include "../std/algorithm.hpp"
|
||||
#include "../std/bind.hpp"
|
||||
|
||||
|
||||
template <class TWriter>
|
||||
|
@ -21,11 +24,8 @@ StringsFile::IdT StringsFile::StringT::Write(TWriter & writer) const
|
|||
}
|
||||
|
||||
template <class TReader>
|
||||
void StringsFile::StringT::Read(IdT id, TReader & reader)
|
||||
void StringsFile::StringT::Read(TReader & src)
|
||||
{
|
||||
ReaderSource<TReader> src(reader);
|
||||
src.Skip(id);
|
||||
|
||||
rw::Read(src, m_name);
|
||||
m_pos = ReadVarUint<uint32_t>(src);
|
||||
m_rank = ReadPrimitiveFromSource<uint8_t>(src);
|
||||
|
@ -47,30 +47,81 @@ bool StringsFile::StringT::operator == (StringT const & name) const
|
|||
return (m_name == name.m_name && m_pos == name.m_pos && m_rank == name.m_rank);
|
||||
}
|
||||
|
||||
StringsFile::~StringsFile()
|
||||
{
|
||||
m_readers.clear();
|
||||
|
||||
for (int i = 0; i < m_index; ++i)
|
||||
FileWriter::DeleteFileX(FormatFilePath(i));
|
||||
}
|
||||
|
||||
void StringsFile::AddString(StringT const & s)
|
||||
{
|
||||
ASSERT ( m_writer != 0, () );
|
||||
m_ids.push_back(s.Write(*m_writer));
|
||||
}
|
||||
if (m_strings.size() >= 30000)
|
||||
Flush();
|
||||
|
||||
bool StringsFile::StringCompare::operator() (IdT const & id1, IdT const & id2) const
|
||||
{
|
||||
StringT str[2];
|
||||
str[0].Read(id1, m_file.m_reader);
|
||||
str[1].Read(id2, m_file.m_reader);
|
||||
return (str[0] < str[1]);
|
||||
}
|
||||
|
||||
void StringsFile::SortStrings()
|
||||
{
|
||||
stable_sort(m_ids.begin(), m_ids.end(), StringCompare(*this));
|
||||
m_strings.push_back(s);
|
||||
}
|
||||
|
||||
StringsFile::StringT StringsFile::IteratorT::dereference() const
|
||||
{
|
||||
ASSERT_LESS ( m_index, m_file->m_ids.size(), () );
|
||||
|
||||
StringT s;
|
||||
s.Read(m_file->m_ids[m_index], m_file->m_reader);
|
||||
return s;
|
||||
ASSERT ( !m_file.m_queue.empty(), () );
|
||||
return m_file.m_queue.top().m_string;
|
||||
}
|
||||
|
||||
void StringsFile::IteratorT::increment()
|
||||
{
|
||||
ASSERT ( !m_file.m_queue.empty(), () );
|
||||
int const index = m_file.m_queue.top().m_index;
|
||||
|
||||
m_file.m_queue.pop();
|
||||
|
||||
if (!m_file.PushNextValue(index))
|
||||
m_end = m_file.m_queue.empty();
|
||||
}
|
||||
|
||||
string StringsFile::FormatFilePath(int i) const
|
||||
{
|
||||
return m_filePath + string(".") + strings::to_string(i);
|
||||
}
|
||||
|
||||
void StringsFile::Flush()
|
||||
{
|
||||
sort(m_strings.begin(), m_strings.end());
|
||||
|
||||
FileWriter w(FormatFilePath(m_index++));
|
||||
for_each(m_strings.begin(), m_strings.end(), bind(&StringT::Write<FileWriter>, _1, ref(w)));
|
||||
|
||||
m_strings.clear();
|
||||
}
|
||||
|
||||
bool StringsFile::PushNextValue(int i)
|
||||
{
|
||||
try
|
||||
{
|
||||
StringT s;
|
||||
s.Read(m_readers[i]);
|
||||
|
||||
m_queue.push(QValue(s, i));
|
||||
return true;
|
||||
}
|
||||
catch (SourceOutOfBoundsException const &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void StringsFile::EndAdding()
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
||||
void StringsFile::OpenForRead()
|
||||
{
|
||||
for (int i = 0; i < m_index; ++i)
|
||||
{
|
||||
m_readers.push_back(ReaderT(new FileReader(FormatFilePath(i), 6, 1)));
|
||||
|
||||
CHECK ( PushNextValue(i), () );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "../coding/writer.hpp"
|
||||
#include "../coding/reader.hpp"
|
||||
|
||||
#include "../base/string_utils.hpp"
|
||||
|
||||
#include "../std/iterator_facade.hpp"
|
||||
#include "../std/queue.hpp"
|
||||
#include "../std/functional.hpp"
|
||||
|
||||
|
||||
class StringsFile
|
||||
|
@ -33,6 +34,8 @@ public:
|
|||
uint32_t GetKeySize() const { return m_name.size(); }
|
||||
uint32_t const * GetKeyData() const { return m_name.data(); }
|
||||
|
||||
strings::UniString const & GetString() const { return m_name; }
|
||||
|
||||
template <class TCont> void SerializeValue(TCont & cont) const
|
||||
{
|
||||
cont.resize(5);
|
||||
|
@ -48,7 +51,7 @@ public:
|
|||
bool operator == (StringT const & name) const;
|
||||
|
||||
template <class TWriter> IdT Write(TWriter & writer) const;
|
||||
template <class TReader> void Read(IdT id, TReader & reader);
|
||||
template <class TReader> void Read(TReader & src);
|
||||
|
||||
void Swap(StringT & r)
|
||||
{
|
||||
|
@ -58,46 +61,55 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class StringCompare
|
||||
{
|
||||
StringsFile & m_file;
|
||||
public:
|
||||
StringCompare(StringsFile & file) : m_file(file) {}
|
||||
bool operator() (IdT const & id1, IdT const & id2) const;
|
||||
};
|
||||
|
||||
class IteratorT : public iterator_facade<IteratorT, StringT, forward_traversal_tag, StringT>
|
||||
{
|
||||
size_t m_index;
|
||||
StringsFile const * m_file;
|
||||
StringsFile & m_file;
|
||||
bool m_end;
|
||||
|
||||
public:
|
||||
IteratorT(size_t index, StringsFile const & file)
|
||||
: m_index(index), m_file(&file) {}
|
||||
IteratorT(StringsFile & file, bool isEnd)
|
||||
: m_file(file), m_end(isEnd)
|
||||
{
|
||||
}
|
||||
|
||||
StringT dereference() const;
|
||||
bool equal(IteratorT const & r) const { return m_index == r.m_index; }
|
||||
void increment() { ++m_index; }
|
||||
bool equal(IteratorT const & r) const { return m_end == r.m_end; }
|
||||
void increment();
|
||||
};
|
||||
|
||||
StringsFile() : m_writer(0), m_reader(0) {}
|
||||
StringsFile(string const & fPath) : m_filePath(fPath), m_index(0) {}
|
||||
~StringsFile();
|
||||
|
||||
void OpenForWrite(Writer * w) { m_writer = w; }
|
||||
/// Note! r should be in dynamic memory and this class takes shared ownership of it.
|
||||
void OpenForRead(Reader * r) { m_reader = ReaderPtr<Reader>(r); }
|
||||
void EndAdding();
|
||||
void OpenForRead();
|
||||
|
||||
/// @precondition Should be opened for writing.
|
||||
void AddString(StringT const & s);
|
||||
|
||||
/// @precondition Should be opened for reading.
|
||||
void SortStrings();
|
||||
|
||||
IteratorT Begin() const { return IteratorT(0, *this); }
|
||||
IteratorT End() const { return IteratorT(m_ids.size(), *this); }
|
||||
IteratorT Begin() { return IteratorT(*this, false); }
|
||||
IteratorT End() { return IteratorT(*this, true); }
|
||||
|
||||
private:
|
||||
vector<IdT> m_ids;
|
||||
string FormatFilePath(int i) const;
|
||||
void Flush();
|
||||
bool PushNextValue(int i);
|
||||
|
||||
Writer * m_writer;
|
||||
ReaderPtr<Reader> m_reader;
|
||||
vector<StringT> m_strings;
|
||||
string m_filePath;
|
||||
int m_index;
|
||||
|
||||
typedef ReaderSource<ReaderPtr<Reader> > ReaderT;
|
||||
vector<ReaderT> m_readers;
|
||||
|
||||
struct QValue
|
||||
{
|
||||
StringT m_string;
|
||||
int m_index;
|
||||
|
||||
QValue(StringT const & s, int i) : m_string(s), m_index(i) {}
|
||||
|
||||
inline bool operator > (QValue const & rhs) const { return !(m_string < rhs.m_string); }
|
||||
};
|
||||
|
||||
priority_queue<QValue, vector<QValue>, greater<QValue> > m_queue;
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue