[search] Using SmallSet for Locales.

This commit is contained in:
Maxim Pimenov 2017-10-10 15:53:13 +03:00 committed by Tatiana Yan
parent 6193185139
commit 4f2f14e8ce
6 changed files with 24 additions and 20 deletions

View file

@ -141,8 +141,9 @@ void ProcessCategory(string const & line, vector<string> & groups, vector<uint32
} // namespace
// static
int8_t const CategoriesHolder::kEnglishCode = 1;
int8_t const CategoriesHolder::kUnsupportedLocaleCode = -1;
int8_t constexpr CategoriesHolder::kEnglishCode;
int8_t constexpr CategoriesHolder::kUnsupportedLocaleCode;
uint8_t constexpr CategoriesHolder::kMaxSupportedLocaleIndex;
// *NOTE* These constants should be updated when adding new
// translation to categories.txt. When editing, keep in mind to check
@ -186,6 +187,11 @@ CategoriesHolder::CategoriesHolder(unique_ptr<Reader> && reader)
ReaderStreamBuf buffer(move(reader));
istream s(&buffer);
LoadFromStream(s);
#if defined(DEBUG)
for (auto const & entry : kLocaleMapping)
ASSERT_LESS_OR_EQUAL(entry.m_code, kMaxSupportedLocaleIndex, ());
#endif
}
void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)

View file

@ -63,8 +63,9 @@ private:
GroupTranslations m_groupTranslations;
public:
static int8_t const kEnglishCode;
static int8_t const kUnsupportedLocaleCode;
static int8_t constexpr kEnglishCode = 1;
static int8_t constexpr kUnsupportedLocaleCode = -1;
static uint8_t constexpr kMaxSupportedLocaleIndex = 30;
static vector<Mapping> const kLocaleMapping;
// List of languages that are currently disabled in the application

View file

@ -397,12 +397,11 @@ Locales Processor::GetCategoryLocales() const
Locales result;
// Prepare array of processing locales. English locale is always present for category matching.
result.Insert(static_cast<uint64_t>(enLocaleCode));
if (m_currentLocaleCode != -1)
result.push_back(m_currentLocaleCode);
if (m_inputLocaleCode != -1 && m_inputLocaleCode != m_currentLocaleCode)
result.push_back(m_inputLocaleCode);
if (enLocaleCode != m_currentLocaleCode && enLocaleCode != m_inputLocaleCode)
result.push_back(enLocaleCode);
result.Insert(static_cast<uint64_t>(m_currentLocaleCode));
if (m_inputLocaleCode != -1)
result.Insert(static_cast<uint64_t>(m_inputLocaleCode));
return result;
}

View file

@ -12,6 +12,7 @@
#include "search/search_trie.hpp"
#include "search/suggest.hpp"
#include "search/token_slice.hpp"
#include "search/utils.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/index.hpp"
@ -132,9 +133,8 @@ protected:
using TMWMVector = vector<shared_ptr<MwmInfo>>;
using TOffsetsVector = map<MwmSet::MwmId, vector<uint32_t>>;
using TFHeader = feature::DataHeader;
using TLocales = buffer_vector<int8_t, 3>;
TLocales GetCategoryLocales() const;
Locales GetCategoryLocales() const;
template <typename ToDo>
void ForEachCategoryType(StringSliceBase const & slice, ToDo && toDo) const;

View file

@ -10,6 +10,7 @@
#include "search/reverse_geocoder.hpp"
#include "search/search_params.hpp"
#include "search/suggest.hpp"
#include "search/utils.hpp"
#include "indexer/categories_holder.hpp"
#include "indexer/feature_decl.hpp"
@ -43,8 +44,6 @@ class Ranker
public:
struct Params
{
using TLocales = buffer_vector<int8_t, 3>;
int8_t m_currentLocaleCode = CategoriesHolder::kEnglishCode;
m2::RectD m_viewport;
m2::PointD m_position;
@ -65,7 +64,7 @@ public:
// filtering of indentical search results.
double m_minDistanceOnMapBetweenResults = 0.0;
TLocales m_categoryLocales;
Locales m_categoryLocales;
size_t m_limit = 0;
};

View file

@ -8,6 +8,7 @@
#include "base/buffer_vector.hpp"
#include "base/levenshtein_dfa.hpp"
#include "base/small_set.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
@ -20,6 +21,8 @@
namespace search
{
using Locales = base::SafeSmallSet<CategoriesHolder::kMaxSupportedLocaleIndex + 1>;
// todo(@m, @y). Unite with the similar function in search/feature_offset_match.hpp.
template <typename TrieIt, typename DFA, typename ToDo>
bool MatchInTrie(TrieIt const & trieStartIt, DFA const & dfa, ToDo && toDo)
@ -64,8 +67,6 @@ bool MatchInTrie(TrieIt const & trieStartIt, DFA const & dfa, ToDo && toDo)
return found;
}
using Locales = buffer_vector<int8_t, 3>;
size_t GetMaxErrorsForToken(strings::UniString const & token);
strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s);
@ -103,8 +104,6 @@ void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, Locales const & loc
auto const & trie = categories.GetNameToTypesTrie();
auto const & trieRootIt = trie.GetRootIterator();
vector<int8_t> sortedLocales(locales.begin(), locales.end());
my::SortUnique(sortedLocales);
for (size_t i = 0; i < slice.Size(); ++i)
{
@ -115,7 +114,7 @@ void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, Locales const & loc
strings::LevenshteinDFA const dfa(BuildLevenshteinDFA(token));
trieRootIt.ForEachMove([&](Trie::Char const & c, Trie::Iterator const & trieStartIt) {
if (std::binary_search(sortedLocales.begin(), sortedLocales.end(), static_cast<int8_t>(c)))
if (locales.Contains(static_cast<uint64_t>(c)))
MatchInTrie(trieStartIt, dfa, std::bind<void>(todo, i, std::placeholders::_1));
});
}
@ -136,7 +135,7 @@ bool IsCategorialRequest(QuerySliceOnRawStrings<T> const & slice, Locales const
bool found = false;
auto token = slice.Get(0);
catHolder.ForEachName([&](CategoriesHolder::Category::Name const & categorySynonym) {
if (std::find(locales.begin(), locales.end(), categorySynonym.m_locale) == locales.end())
if (!locales.Contains(static_cast<uint64_t>(categorySynonym.m_locale)))
return;
if (token != strings::MakeUniString(categorySynonym.m_name))