[indexer] Changed a map to a trie in CategoriesHolder.

Also, minor style refactorings.
This commit is contained in:
Maxim Pimenov 2017-02-08 16:21:10 +03:00
parent 5562457ce8
commit b95690dcd7
4 changed files with 114 additions and 42 deletions

View file

@ -49,7 +49,7 @@ public:
ForEachInSubtree(m_root, prefix, std::forward<ToDo>(toDo));
}
// Calls |toDo| for each key-value pair in a node that is reachable
// Calls |toDo| for each key-value pair in the node that is reachable
// by |prefix| from the trie root. Does nothing if such node does
// not exist.
template <typename ToDo>
@ -59,6 +59,16 @@ public:
ForEachInNode(*root, prefix, std::forward<ToDo>(toDo));
}
// Calls |toDo| for each value in the node that is reachable
// by |prefix| from the trie root. Does nothing if such node does
// not exist.
template <typename ToDo>
void ForEachValueInNode(String const & prefix, ToDo && toDo) const
{
if (auto const * root = MoveTo(prefix))
ForEachValueInNode(*root, std::forward<ToDo>(toDo));
}
// Calls |toDo| for each key-value pair in a subtree that is
// reachable by |prefix| from the trie root. Does nothing if such
// subtree does not exist.
@ -126,6 +136,14 @@ private:
toDo(prefix, value);
}
// Calls |toDo| for each value in |node|.
template <typename ToDo>
void ForEachValueInNode(Node const & node, ToDo && toDo) const
{
for (auto const & value : node.m_values)
toDo(value);
}
// Calls |toDo| for each key-value pair in subtree where |node| is a
// root of the subtree. |prefix| is a path from the trie root to the
// |node|.

View file

@ -195,23 +195,32 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
shared_ptr<Category> p(new Category());
p->Swap(cat);
for (size_t i = 0; i < types.size(); ++i)
m_type2cat.insert(make_pair(types[i], p));
for (uint32_t const t : types)
m_type2cat.insert(make_pair(t, p));
for (size_t i = 0; i < p->m_synonyms.size(); ++i)
for (auto const & synonym : p->m_synonyms)
{
ASSERT(p->m_synonyms[i].m_locale != kUnsupportedLocaleCode, ());
auto const locale = synonym.m_locale;
ASSERT(locale != kUnsupportedLocaleCode, ());
StringT const uniName = search::NormalizeAndSimplifyString(p->m_synonyms[i].m_name);
auto const uniName = search::NormalizeAndSimplifyString(synonym.m_name);
vector<StringT> tokens;
vector<String> tokens;
SplitUniString(uniName, MakeBackInsertFunctor(tokens), search::Delimiters());
for (size_t j = 0; j < tokens.size(); ++j)
for (size_t k = 0; k < types.size(); ++k)
if (ValidKeyToken(tokens[j]))
m_name2type.insert(
make_pair(make_pair(p->m_synonyms[i].m_locale, tokens[j]), types[k]));
for (auto const & token : tokens)
{
if (!ValidKeyToken(token))
continue;
for (uint32_t const t : types)
{
if (m_name2type.find(locale) == m_name2type.end())
m_name2type[locale] = make_unique<Trie>();
auto * trie = m_name2type[locale].get();
trie->Add(token, t);
}
}
}
}
@ -219,7 +228,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
types.clear();
}
bool CategoriesHolder::ValidKeyToken(StringT const & s)
bool CategoriesHolder::ValidKeyToken(String const & s)
{
if (s.size() > 2)
return true;
@ -306,17 +315,19 @@ void CategoriesHolder::LoadFromStream(istream & s)
bool CategoriesHolder::GetNameByType(uint32_t type, int8_t locale, string & name) const
{
pair<IteratorT, IteratorT> const range = m_type2cat.equal_range(type);
auto const range = m_type2cat.equal_range(type);
for (IteratorT i = range.first; i != range.second; ++i)
for (auto it = range.first; it != range.second; ++it)
{
Category const & cat = *i->second;
for (size_t j = 0; j < cat.m_synonyms.size(); ++j)
if (cat.m_synonyms[j].m_locale == locale)
Category const & cat = *it->second;
for (auto const & synonym : cat.m_synonyms)
{
if (synonym.m_locale == locale)
{
name = cat.m_synonyms[j].m_name;
name = synonym.m_name;
return true;
}
}
}
if (range.first != range.second)
@ -352,7 +363,7 @@ string CategoriesHolder::GetReadableFeatureType(uint32_t type, int8_t locale) co
bool CategoriesHolder::IsTypeExist(uint32_t type) const
{
pair<IteratorT, IteratorT> const range = m_type2cat.equal_range(type);
auto const range = m_type2cat.equal_range(type);
return range.first != range.second;
}
@ -379,8 +390,10 @@ int8_t CategoriesHolder::MapLocaleToInteger(string const & locale)
strings::AsciiToLower(lower);
for (char const * s : {"hant", "tw", "hk", "mo"})
{
if (lower.find(s) != string::npos)
return 12; // Traditional Chinese
}
return 17; // Simplified Chinese by default for all other cases
}

View file

@ -1,4 +1,5 @@
#pragma once
#include "base/mem_trie.hpp"
#include "base/string_utils.hpp"
#include "std/deque.hpp"
@ -11,7 +12,6 @@
#include "std/utility.hpp"
#include "std/vector.hpp"
class Reader;
class CategoriesHolder
@ -47,13 +47,12 @@ public:
using GroupTranslations = unordered_map<string, vector<Category::Name>>;
private:
typedef strings::UniString StringT;
typedef multimap<uint32_t, shared_ptr<Category> > Type2CategoryContT;
typedef multimap<pair<int8_t, StringT>, uint32_t> Name2CatContT;
typedef Type2CategoryContT::const_iterator IteratorT;
using String = strings::UniString;
using Type2CategoryCont = multimap<uint32_t, shared_ptr<Category>>;
using Trie = my::MemTrie<String, uint32_t>;
Type2CategoryContT m_type2cat;
Name2CatContT m_name2type;
Type2CategoryCont m_type2cat;
map<int8_t, unique_ptr<Trie>> m_name2type;
GroupTranslations m_groupTranslations;
public:
@ -71,8 +70,8 @@ public:
template <class ToDo>
void ForEachCategory(ToDo && toDo) const
{
for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i)
toDo(*i->second);
for (auto & p : m_type2cat)
toDo(*p.second);
}
template <class ToDo>
@ -85,9 +84,12 @@ public:
template <class ToDo>
void ForEachName(ToDo && toDo) const
{
for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i)
for (size_t j = 0; j < i->second->m_synonyms.size(); ++j)
toDo(i->second->m_synonyms[j]);
for (auto & p : m_type2cat)
{
shared_ptr<Category> cat = p.second;
for (auto const & synonym : cat->m_synonyms)
toDo(synonym);
}
}
template <class ToDo>
@ -101,16 +103,12 @@ public:
}
template <class ToDo>
void ForEachTypeByName(int8_t locale, StringT const & name, ToDo && toDo) const
void ForEachTypeByName(int8_t locale, String const & name, ToDo && toDo) const
{
typedef typename Name2CatContT::const_iterator IterT;
pair<IterT, IterT> range = m_name2type.equal_range(make_pair(locale, name));
while (range.first != range.second)
{
toDo(range.first->second);
++range.first;
}
auto const it = m_name2type.find(locale);
if (it == m_name2type.end())
return;
it->second->ForEachValueInNode(name, forward<ToDo>(toDo));
}
inline GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; }
@ -142,7 +140,7 @@ public:
private:
void AddCategory(Category & cat, vector<uint32_t> & types);
static bool ValidKeyToken(StringT const & s);
static bool ValidKeyToken(String const & s);
};
inline void swap(CategoriesHolder & a, CategoriesHolder & b)

View file

@ -20,6 +20,7 @@
#include "std/transform_iterator.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
using namespace indexer;
@ -222,6 +223,48 @@ UNIT_TEST(CategoriesHolder_DisplayedName)
});
}
UNIT_TEST(CategoriesHolder_ForEach)
{
char const kCategories[] =
"amenity-bar\n"
"en:abc|ddd-eee\n"
"\n"
"amenity-pub\n"
"en:ddd\n"
"\n"
"amenity-cafe\n"
"en:abc eee\n"
"\n"
"amenity-restaurant\n"
"en:ddd|eee\n"
"\n"
"";
classificator::Load();
CategoriesHolder holder(make_unique<MemReader>(kCategories, ARRAY_SIZE(kCategories) - 1));
{
uint32_t counter = 0;
holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("abc"),
[&](uint32_t /* type */) { ++counter; });
TEST_EQUAL(counter, 2, ());
}
{
uint32_t counter = 0;
holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("ddd"),
[&](uint32_t /* type */) { ++counter; });
TEST_EQUAL(counter, 3, ());
}
{
uint32_t counter = 0;
holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("eee"),
[&](uint32_t /* type */) { ++counter; });
TEST_EQUAL(counter, 3, ());
}
}
UNIT_TEST(CategoriesIndex_Smoke)
{
classificator::Load();