[indexer] Changed a map to a trie in CategoriesHolder.
Also, minor style refactorings.
This commit is contained in:
parent
5562457ce8
commit
b95690dcd7
4 changed files with 114 additions and 42 deletions
|
@ -49,7 +49,7 @@ public:
|
|||
ForEachInSubtree(m_root, prefix, std::forward<ToDo>(toDo));
|
||||
}
|
||||
|
||||
// Calls |toDo| for each key-value pair in a node that is reachable
|
||||
// Calls |toDo| for each key-value pair in the node that is reachable
|
||||
// by |prefix| from the trie root. Does nothing if such node does
|
||||
// not exist.
|
||||
template <typename ToDo>
|
||||
|
@ -59,6 +59,16 @@ public:
|
|||
ForEachInNode(*root, prefix, std::forward<ToDo>(toDo));
|
||||
}
|
||||
|
||||
// Calls |toDo| for each value in the node that is reachable
|
||||
// by |prefix| from the trie root. Does nothing if such node does
|
||||
// not exist.
|
||||
template <typename ToDo>
|
||||
void ForEachValueInNode(String const & prefix, ToDo && toDo) const
|
||||
{
|
||||
if (auto const * root = MoveTo(prefix))
|
||||
ForEachValueInNode(*root, std::forward<ToDo>(toDo));
|
||||
}
|
||||
|
||||
// Calls |toDo| for each key-value pair in a subtree that is
|
||||
// reachable by |prefix| from the trie root. Does nothing if such
|
||||
// subtree does not exist.
|
||||
|
@ -126,6 +136,14 @@ private:
|
|||
toDo(prefix, value);
|
||||
}
|
||||
|
||||
// Calls |toDo| for each value in |node|.
|
||||
template <typename ToDo>
|
||||
void ForEachValueInNode(Node const & node, ToDo && toDo) const
|
||||
{
|
||||
for (auto const & value : node.m_values)
|
||||
toDo(value);
|
||||
}
|
||||
|
||||
// Calls |toDo| for each key-value pair in subtree where |node| is a
|
||||
// root of the subtree. |prefix| is a path from the trie root to the
|
||||
// |node|.
|
||||
|
|
|
@ -195,23 +195,32 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
|
|||
shared_ptr<Category> p(new Category());
|
||||
p->Swap(cat);
|
||||
|
||||
for (size_t i = 0; i < types.size(); ++i)
|
||||
m_type2cat.insert(make_pair(types[i], p));
|
||||
for (uint32_t const t : types)
|
||||
m_type2cat.insert(make_pair(t, p));
|
||||
|
||||
for (size_t i = 0; i < p->m_synonyms.size(); ++i)
|
||||
for (auto const & synonym : p->m_synonyms)
|
||||
{
|
||||
ASSERT(p->m_synonyms[i].m_locale != kUnsupportedLocaleCode, ());
|
||||
auto const locale = synonym.m_locale;
|
||||
ASSERT(locale != kUnsupportedLocaleCode, ());
|
||||
|
||||
StringT const uniName = search::NormalizeAndSimplifyString(p->m_synonyms[i].m_name);
|
||||
auto const uniName = search::NormalizeAndSimplifyString(synonym.m_name);
|
||||
|
||||
vector<StringT> tokens;
|
||||
vector<String> tokens;
|
||||
SplitUniString(uniName, MakeBackInsertFunctor(tokens), search::Delimiters());
|
||||
|
||||
for (size_t j = 0; j < tokens.size(); ++j)
|
||||
for (size_t k = 0; k < types.size(); ++k)
|
||||
if (ValidKeyToken(tokens[j]))
|
||||
m_name2type.insert(
|
||||
make_pair(make_pair(p->m_synonyms[i].m_locale, tokens[j]), types[k]));
|
||||
for (auto const & token : tokens)
|
||||
{
|
||||
if (!ValidKeyToken(token))
|
||||
continue;
|
||||
for (uint32_t const t : types)
|
||||
{
|
||||
if (m_name2type.find(locale) == m_name2type.end())
|
||||
m_name2type[locale] = make_unique<Trie>();
|
||||
|
||||
auto * trie = m_name2type[locale].get();
|
||||
trie->Add(token, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,7 +228,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
|
|||
types.clear();
|
||||
}
|
||||
|
||||
bool CategoriesHolder::ValidKeyToken(StringT const & s)
|
||||
bool CategoriesHolder::ValidKeyToken(String const & s)
|
||||
{
|
||||
if (s.size() > 2)
|
||||
return true;
|
||||
|
@ -306,17 +315,19 @@ void CategoriesHolder::LoadFromStream(istream & s)
|
|||
|
||||
bool CategoriesHolder::GetNameByType(uint32_t type, int8_t locale, string & name) const
|
||||
{
|
||||
pair<IteratorT, IteratorT> const range = m_type2cat.equal_range(type);
|
||||
auto const range = m_type2cat.equal_range(type);
|
||||
|
||||
for (IteratorT i = range.first; i != range.second; ++i)
|
||||
for (auto it = range.first; it != range.second; ++it)
|
||||
{
|
||||
Category const & cat = *i->second;
|
||||
for (size_t j = 0; j < cat.m_synonyms.size(); ++j)
|
||||
if (cat.m_synonyms[j].m_locale == locale)
|
||||
Category const & cat = *it->second;
|
||||
for (auto const & synonym : cat.m_synonyms)
|
||||
{
|
||||
if (synonym.m_locale == locale)
|
||||
{
|
||||
name = cat.m_synonyms[j].m_name;
|
||||
name = synonym.m_name;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (range.first != range.second)
|
||||
|
@ -352,7 +363,7 @@ string CategoriesHolder::GetReadableFeatureType(uint32_t type, int8_t locale) co
|
|||
|
||||
bool CategoriesHolder::IsTypeExist(uint32_t type) const
|
||||
{
|
||||
pair<IteratorT, IteratorT> const range = m_type2cat.equal_range(type);
|
||||
auto const range = m_type2cat.equal_range(type);
|
||||
return range.first != range.second;
|
||||
}
|
||||
|
||||
|
@ -379,8 +390,10 @@ int8_t CategoriesHolder::MapLocaleToInteger(string const & locale)
|
|||
strings::AsciiToLower(lower);
|
||||
|
||||
for (char const * s : {"hant", "tw", "hk", "mo"})
|
||||
{
|
||||
if (lower.find(s) != string::npos)
|
||||
return 12; // Traditional Chinese
|
||||
}
|
||||
|
||||
return 17; // Simplified Chinese by default for all other cases
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#include "base/mem_trie.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/deque.hpp"
|
||||
|
@ -11,7 +12,6 @@
|
|||
#include "std/utility.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
|
||||
class Reader;
|
||||
|
||||
class CategoriesHolder
|
||||
|
@ -47,13 +47,12 @@ public:
|
|||
using GroupTranslations = unordered_map<string, vector<Category::Name>>;
|
||||
|
||||
private:
|
||||
typedef strings::UniString StringT;
|
||||
typedef multimap<uint32_t, shared_ptr<Category> > Type2CategoryContT;
|
||||
typedef multimap<pair<int8_t, StringT>, uint32_t> Name2CatContT;
|
||||
typedef Type2CategoryContT::const_iterator IteratorT;
|
||||
using String = strings::UniString;
|
||||
using Type2CategoryCont = multimap<uint32_t, shared_ptr<Category>>;
|
||||
using Trie = my::MemTrie<String, uint32_t>;
|
||||
|
||||
Type2CategoryContT m_type2cat;
|
||||
Name2CatContT m_name2type;
|
||||
Type2CategoryCont m_type2cat;
|
||||
map<int8_t, unique_ptr<Trie>> m_name2type;
|
||||
GroupTranslations m_groupTranslations;
|
||||
|
||||
public:
|
||||
|
@ -71,8 +70,8 @@ public:
|
|||
template <class ToDo>
|
||||
void ForEachCategory(ToDo && toDo) const
|
||||
{
|
||||
for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i)
|
||||
toDo(*i->second);
|
||||
for (auto & p : m_type2cat)
|
||||
toDo(*p.second);
|
||||
}
|
||||
|
||||
template <class ToDo>
|
||||
|
@ -85,9 +84,12 @@ public:
|
|||
template <class ToDo>
|
||||
void ForEachName(ToDo && toDo) const
|
||||
{
|
||||
for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i)
|
||||
for (size_t j = 0; j < i->second->m_synonyms.size(); ++j)
|
||||
toDo(i->second->m_synonyms[j]);
|
||||
for (auto & p : m_type2cat)
|
||||
{
|
||||
shared_ptr<Category> cat = p.second;
|
||||
for (auto const & synonym : cat->m_synonyms)
|
||||
toDo(synonym);
|
||||
}
|
||||
}
|
||||
|
||||
template <class ToDo>
|
||||
|
@ -101,16 +103,12 @@ public:
|
|||
}
|
||||
|
||||
template <class ToDo>
|
||||
void ForEachTypeByName(int8_t locale, StringT const & name, ToDo && toDo) const
|
||||
void ForEachTypeByName(int8_t locale, String const & name, ToDo && toDo) const
|
||||
{
|
||||
typedef typename Name2CatContT::const_iterator IterT;
|
||||
|
||||
pair<IterT, IterT> range = m_name2type.equal_range(make_pair(locale, name));
|
||||
while (range.first != range.second)
|
||||
{
|
||||
toDo(range.first->second);
|
||||
++range.first;
|
||||
}
|
||||
auto const it = m_name2type.find(locale);
|
||||
if (it == m_name2type.end())
|
||||
return;
|
||||
it->second->ForEachValueInNode(name, forward<ToDo>(toDo));
|
||||
}
|
||||
|
||||
inline GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; }
|
||||
|
@ -142,7 +140,7 @@ public:
|
|||
|
||||
private:
|
||||
void AddCategory(Category & cat, vector<uint32_t> & types);
|
||||
static bool ValidKeyToken(StringT const & s);
|
||||
static bool ValidKeyToken(String const & s);
|
||||
};
|
||||
|
||||
inline void swap(CategoriesHolder & a, CategoriesHolder & b)
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "std/transform_iterator.hpp"
|
||||
|
||||
#include "base/stl_helpers.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
using namespace indexer;
|
||||
|
||||
|
@ -222,6 +223,48 @@ UNIT_TEST(CategoriesHolder_DisplayedName)
|
|||
});
|
||||
}
|
||||
|
||||
UNIT_TEST(CategoriesHolder_ForEach)
|
||||
{
|
||||
char const kCategories[] =
|
||||
"amenity-bar\n"
|
||||
"en:abc|ddd-eee\n"
|
||||
"\n"
|
||||
"amenity-pub\n"
|
||||
"en:ddd\n"
|
||||
"\n"
|
||||
"amenity-cafe\n"
|
||||
"en:abc eee\n"
|
||||
"\n"
|
||||
"amenity-restaurant\n"
|
||||
"en:ddd|eee\n"
|
||||
"\n"
|
||||
"";
|
||||
|
||||
classificator::Load();
|
||||
CategoriesHolder holder(make_unique<MemReader>(kCategories, ARRAY_SIZE(kCategories) - 1));
|
||||
|
||||
{
|
||||
uint32_t counter = 0;
|
||||
holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("abc"),
|
||||
[&](uint32_t /* type */) { ++counter; });
|
||||
TEST_EQUAL(counter, 2, ());
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t counter = 0;
|
||||
holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("ddd"),
|
||||
[&](uint32_t /* type */) { ++counter; });
|
||||
TEST_EQUAL(counter, 3, ());
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t counter = 0;
|
||||
holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("eee"),
|
||||
[&](uint32_t /* type */) { ++counter; });
|
||||
TEST_EQUAL(counter, 3, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CategoriesIndex_Smoke)
|
||||
{
|
||||
classificator::Load();
|
||||
|
|
Reference in a new issue