forked from organicmaps/organicmaps
Add --dump_search_tokens to generator_tool.
This commit is contained in:
parent
a6345ddf68
commit
c058e5fca8
3 changed files with 68 additions and 4 deletions
|
@ -2,16 +2,19 @@
|
|||
|
||||
#include "../indexer/search_delimiters.hpp"
|
||||
#include "../indexer/search_string_utils.hpp"
|
||||
#include "../indexer/classificator.hpp"
|
||||
#include "../indexer/feature_processor.hpp"
|
||||
#include "../indexer/search_trie.hpp"
|
||||
|
||||
#include "../coding/multilang_utf8_string.hpp"
|
||||
|
||||
#include "../indexer/classificator.hpp"
|
||||
#include "../indexer/feature_processor.hpp"
|
||||
#include "../base/logging.hpp"
|
||||
|
||||
#include "../std/algorithm.hpp"
|
||||
#include "../std/bind.hpp"
|
||||
#include "../std/iostream.hpp"
|
||||
#include "../std/map.hpp"
|
||||
#include "../std/queue.hpp"
|
||||
#include "../std/vector.hpp"
|
||||
|
||||
namespace feature
|
||||
|
@ -149,7 +152,63 @@ namespace feature
|
|||
{
|
||||
Print(it->first, it->second);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
struct SearchTokensCollector
|
||||
{
|
||||
priority_queue<pair<uint32_t, strings::UniString> > tokens;
|
||||
strings::UniString m_currentS;
|
||||
uint32_t m_currentCount;
|
||||
|
||||
SearchTokensCollector() : m_currentS(), m_currentCount(0) {}
|
||||
|
||||
void operator() (strings::UniString const & s, search::trie::ValueReader::ValueType value)
|
||||
{
|
||||
if (m_currentS == s)
|
||||
{
|
||||
++m_currentCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_currentCount > 0)
|
||||
{
|
||||
tokens.push(make_pair(m_currentCount, m_currentS));
|
||||
if (tokens.size() > 100)
|
||||
tokens.pop();
|
||||
}
|
||||
m_currentS = s;
|
||||
m_currentCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Finish()
|
||||
{
|
||||
if (m_currentCount > 0)
|
||||
{
|
||||
tokens.push(make_pair(m_currentCount, m_currentS));
|
||||
if (tokens.size() > 100)
|
||||
tokens.pop();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void DumpSearchTokens(string const & fPath)
|
||||
{
|
||||
FilesContainerR container(new FileReader(fPath));
|
||||
scoped_ptr<search::TrieIterator> pTrieRoot(
|
||||
::trie::reader::ReadTrie(container.GetReader(SEARCH_INDEX_FILE_TAG),
|
||||
::search::trie::ValueReader(),
|
||||
::search::trie::EdgeValueReader()));
|
||||
SearchTokensCollector f;
|
||||
trie::ForEachRef(*pTrieRoot, f, strings::UniString());
|
||||
f.Finish();
|
||||
|
||||
while (!f.tokens.empty())
|
||||
{
|
||||
strings::UniString const & s = f.tokens.top().second;
|
||||
cout << f.tokens.top().first << " '" << strings::ToUtf8(s) << "'" << endl;
|
||||
f.tokens.pop();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace feature
|
||||
|
|
|
@ -6,4 +6,5 @@ namespace feature
|
|||
{
|
||||
void DumpTypes(string const & fPath);
|
||||
void DumpPrefixes(string const & fPath);
|
||||
void DumpSearchTokens(string const & fPath);
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@ DEFINE_string(generate_borders, "",
|
|||
"specify tag name and optional value: ISO3166-1 or admin_level=4");
|
||||
DEFINE_bool(dump_types, false, "If defined, prints all types combinations and their total count");
|
||||
DEFINE_bool(dump_prefixes, false, "If defined, prints statistics on feature name prefixes");
|
||||
DEFINE_bool(dump_search_tokens, false, "Print statistics on search tokens.");
|
||||
DEFINE_bool(unpack_mwm, false, "Unpack each section of mwm into a separate file with name filePath.sectionName.");
|
||||
DEFINE_bool(generate_packed_borders, false, "Generate packed file with country polygons");
|
||||
|
||||
|
@ -238,6 +239,9 @@ int main(int argc, char ** argv)
|
|||
if (FLAGS_dump_prefixes)
|
||||
feature::DumpPrefixes(path + FLAGS_output + ".mwm");
|
||||
|
||||
if (FLAGS_dump_search_tokens)
|
||||
feature::DumpSearchTokens(path + FLAGS_output + ".mwm");
|
||||
|
||||
if (FLAGS_unpack_mwm)
|
||||
UnpackMwm(path + FLAGS_output + ".mwm");
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue