[indexer] United all tries-by-locale into one trie.

This commit is contained in:
Maxim Pimenov 2017-02-13 18:59:55 +03:00
parent 5ddc8db2ba
commit a1f4ac5953
3 changed files with 28 additions and 27 deletions

View file

@ -203,6 +203,8 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
auto const locale = synonym.m_locale;
ASSERT_NOT_EQUAL(locale, kUnsupportedLocaleCode, ());
auto const localePrefix = String(1, static_cast<strings::UniChar>(locale));
auto const uniName = search::NormalizeAndSimplifyString(synonym.m_name);
vector<String> tokens;
@ -213,10 +215,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
if (!ValidKeyToken(token))
continue;
for (uint32_t const t : types)
{
auto it = m_name2type.emplace(locale, make_unique<Trie>()).first;
it->second->Add(token, t);
}
m_name2type->Add(localePrefix + token, t);
}
}
}
@ -243,7 +242,7 @@ bool CategoriesHolder::ValidKeyToken(String const & s)
void CategoriesHolder::LoadFromStream(istream & s)
{
m_type2cat.clear();
m_name2type.clear();
m_name2type = make_unique<Trie>();
m_groupTranslations.clear();
State state = EParseTypes;

View file

@ -56,7 +56,8 @@ private:
Type2CategoryCont m_type2cat;
// Maps locale and category token to the list of corresponding types.
map<int8_t, unique_ptr<Trie>> m_name2type;
// Locale is treated as a special symbol prepended to the token.
unique_ptr<Trie> m_name2type;
GroupTranslations m_groupTranslations;
@ -109,10 +110,9 @@ public:
template <class ToDo>
void ForEachTypeByName(int8_t locale, String const & name, ToDo && toDo) const
{
auto const it = m_name2type.find(locale);
if (it == m_name2type.end())
return;
it->second->ForEachInNode(name, my::MakeIgnoreFirstArgument(forward<ToDo>(toDo)));
auto const localePrefix = String(1, static_cast<strings::UniChar>(locale));
m_name2type->ForEachInNode(localePrefix + name,
my::MakeIgnoreFirstArgument(forward<ToDo>(toDo)));
}
inline GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; }
@ -126,14 +126,7 @@ public:
string GetReadableFeatureType(uint32_t type, int8_t locale) const;
// Exposes the tries that map category tokens to types.
Trie const * GetNameToTypesTrie(int8_t locale) const
{
auto const it = m_name2type.find(locale);
if (it == m_name2type.end())
return nullptr;
return it->second.get();
}
Trie const & GetNameToTypesTrie() const { return *m_name2type; }
bool IsTypeExist(uint32_t type) const;
inline void Swap(CategoriesHolder & r)

View file

@ -15,13 +15,14 @@
#include <cctype>
#include <functional>
#include <queue>
#include <set>
namespace search
{
// my::MemTrie<strings::UniString, uint32_t>
// todo(@m, @y). Unite with the similar function in search/feature_offset_match.hpp.
template <typename Trie, typename DFA, typename ToDo>
bool MatchInTrie(Trie const & trie, DFA const & dfa, ToDo && toDo)
bool MatchInTrie(Trie const & /* trie */, typename Trie::Iterator const & trieStartIt,
DFA const & dfa, ToDo && toDo)
{
using Char = typename Trie::Char;
using TrieIt = typename Trie::Iterator;
@ -34,7 +35,7 @@ bool MatchInTrie(Trie const & trie, DFA const & dfa, ToDo && toDo)
auto it = dfa.Begin();
if (it.Rejects())
return false;
q.emplace(trie.GetRootIterator(), it);
q.emplace(trieStartIt, it);
}
bool found = false;
@ -97,17 +98,25 @@ template <typename ToDo>
void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, TLocales const & locales,
CategoriesHolder const & categories, ToDo && todo)
{
using Trie = my::MemTrie<strings::UniString, uint32_t>;
auto const & trie = categories.GetNameToTypesTrie();
auto const & trieRootIt = trie.GetRootIterator();
std::set<int8_t> localeSet(locales.begin(), locales.end());
for (size_t i = 0; i < slice.Size(); ++i)
{
auto const & token = slice.Get(i);
auto const & dfa =
strings::LevenshteinDFA(token, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(token));
for (int8_t const locale : locales)
{
auto const * trie = categories.GetNameToTypesTrie(locale);
if (trie != nullptr)
MatchInTrie(*trie, dfa, std::bind<void>(todo, i, std::placeholders::_1));
}
trieRootIt.ForEachMove([&](Trie::Char const & c, Trie::Iterator const & moveIt) {
if (localeSet.count(static_cast<int8_t>(c)) != 0)
{
MatchInTrie(trie /* passed to infer the iterator's type */, moveIt, dfa,
std::bind<void>(todo, i, std::placeholders::_1));
}
});
}
}
} // namespace search