[indexer] United all tries-by-locale into one trie.
This commit is contained in:
parent
5ddc8db2ba
commit
a1f4ac5953
3 changed files with 28 additions and 27 deletions
|
@ -203,6 +203,8 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
|
|||
auto const locale = synonym.m_locale;
|
||||
ASSERT_NOT_EQUAL(locale, kUnsupportedLocaleCode, ());
|
||||
|
||||
auto const localePrefix = String(1, static_cast<strings::UniChar>(locale));
|
||||
|
||||
auto const uniName = search::NormalizeAndSimplifyString(synonym.m_name);
|
||||
|
||||
vector<String> tokens;
|
||||
|
@ -213,10 +215,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
|
|||
if (!ValidKeyToken(token))
|
||||
continue;
|
||||
for (uint32_t const t : types)
|
||||
{
|
||||
auto it = m_name2type.emplace(locale, make_unique<Trie>()).first;
|
||||
it->second->Add(token, t);
|
||||
}
|
||||
m_name2type->Add(localePrefix + token, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -243,7 +242,7 @@ bool CategoriesHolder::ValidKeyToken(String const & s)
|
|||
void CategoriesHolder::LoadFromStream(istream & s)
|
||||
{
|
||||
m_type2cat.clear();
|
||||
m_name2type.clear();
|
||||
m_name2type = make_unique<Trie>();
|
||||
m_groupTranslations.clear();
|
||||
|
||||
State state = EParseTypes;
|
||||
|
|
|
@ -56,7 +56,8 @@ private:
|
|||
Type2CategoryCont m_type2cat;
|
||||
|
||||
// Maps locale and category token to the list of corresponding types.
|
||||
map<int8_t, unique_ptr<Trie>> m_name2type;
|
||||
// Locale is treated as a special symbol prepended to the token.
|
||||
unique_ptr<Trie> m_name2type;
|
||||
|
||||
GroupTranslations m_groupTranslations;
|
||||
|
||||
|
@ -109,10 +110,9 @@ public:
|
|||
template <class ToDo>
|
||||
void ForEachTypeByName(int8_t locale, String const & name, ToDo && toDo) const
|
||||
{
|
||||
auto const it = m_name2type.find(locale);
|
||||
if (it == m_name2type.end())
|
||||
return;
|
||||
it->second->ForEachInNode(name, my::MakeIgnoreFirstArgument(forward<ToDo>(toDo)));
|
||||
auto const localePrefix = String(1, static_cast<strings::UniChar>(locale));
|
||||
m_name2type->ForEachInNode(localePrefix + name,
|
||||
my::MakeIgnoreFirstArgument(forward<ToDo>(toDo)));
|
||||
}
|
||||
|
||||
inline GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; }
|
||||
|
@ -126,14 +126,7 @@ public:
|
|||
string GetReadableFeatureType(uint32_t type, int8_t locale) const;
|
||||
|
||||
// Exposes the tries that map category tokens to types.
|
||||
Trie const * GetNameToTypesTrie(int8_t locale) const
|
||||
{
|
||||
auto const it = m_name2type.find(locale);
|
||||
if (it == m_name2type.end())
|
||||
return nullptr;
|
||||
return it->second.get();
|
||||
}
|
||||
|
||||
Trie const & GetNameToTypesTrie() const { return *m_name2type; }
|
||||
bool IsTypeExist(uint32_t type) const;
|
||||
|
||||
inline void Swap(CategoriesHolder & r)
|
||||
|
|
|
@ -15,13 +15,14 @@
|
|||
#include <cctype>
|
||||
#include <functional>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// my::MemTrie<strings::UniString, uint32_t>
|
||||
// todo(@m, @y). Unite with the similar function in search/feature_offset_match.hpp.
|
||||
template <typename Trie, typename DFA, typename ToDo>
|
||||
bool MatchInTrie(Trie const & trie, DFA const & dfa, ToDo && toDo)
|
||||
bool MatchInTrie(Trie const & /* trie */, typename Trie::Iterator const & trieStartIt,
|
||||
DFA const & dfa, ToDo && toDo)
|
||||
{
|
||||
using Char = typename Trie::Char;
|
||||
using TrieIt = typename Trie::Iterator;
|
||||
|
@ -34,7 +35,7 @@ bool MatchInTrie(Trie const & trie, DFA const & dfa, ToDo && toDo)
|
|||
auto it = dfa.Begin();
|
||||
if (it.Rejects())
|
||||
return false;
|
||||
q.emplace(trie.GetRootIterator(), it);
|
||||
q.emplace(trieStartIt, it);
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
|
@ -97,17 +98,25 @@ template <typename ToDo>
|
|||
void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, TLocales const & locales,
|
||||
CategoriesHolder const & categories, ToDo && todo)
|
||||
{
|
||||
using Trie = my::MemTrie<strings::UniString, uint32_t>;
|
||||
|
||||
auto const & trie = categories.GetNameToTypesTrie();
|
||||
auto const & trieRootIt = trie.GetRootIterator();
|
||||
std::set<int8_t> localeSet(locales.begin(), locales.end());
|
||||
|
||||
for (size_t i = 0; i < slice.Size(); ++i)
|
||||
{
|
||||
auto const & token = slice.Get(i);
|
||||
auto const & dfa =
|
||||
strings::LevenshteinDFA(token, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(token));
|
||||
for (int8_t const locale : locales)
|
||||
{
|
||||
auto const * trie = categories.GetNameToTypesTrie(locale);
|
||||
if (trie != nullptr)
|
||||
MatchInTrie(*trie, dfa, std::bind<void>(todo, i, std::placeholders::_1));
|
||||
}
|
||||
|
||||
trieRootIt.ForEachMove([&](Trie::Char const & c, Trie::Iterator const & moveIt) {
|
||||
if (localeSet.count(static_cast<int8_t>(c)) != 0)
|
||||
{
|
||||
MatchInTrie(trie /* passed to infer the iterator's type */, moveIt, dfa,
|
||||
std::bind<void>(todo, i, std::placeholders::_1));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace search
|
||||
|
|
Reference in a new issue