diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index 21e393cced..f62746931e 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -1,43 +1,72 @@ #include "categories_holder.hpp" +#include "search_delimiters.hpp" +#include "search_string_utils.hpp" +#include "classificator.hpp" -#include "../indexer/classificator.hpp" - -#include "../coding/multilang_utf8_string.hpp" #include "../coding/reader.hpp" #include "../coding/reader_streambuf.hpp" +#include "../coding/multilang_utf8_string.hpp" -#include "../base/string_utils.hpp" #include "../base/logging.hpp" +#include "../base/stl_add.hpp" namespace { -struct Splitter +enum State { - vector & m_v; - Splitter(vector & v) : m_v(v) {} - void operator()(string const & s) - { - m_v.push_back(s); - } -}; - -enum State { EParseTypes, EParseLanguages }; } // unnamed namespace -size_t CategoriesHolder::LoadFromStream(istream & s) + +CategoriesHolder::CategoriesHolder(Reader * reader) { - m_categories.clear(); + ReaderStreamBuf buffer(reader); + istream s(&buffer); + LoadFromStream(s); +} + +void CategoriesHolder::AddCategory(Category & cat, vector & types) +{ + if (!cat.m_synonyms.empty() && !types.empty()) + { + shared_ptr p(new Category()); + p->Swap(cat); + + for (size_t i = 0; i < types.size(); ++i) + m_type2cat.insert(make_pair(types[i], p)); + + for (size_t i = 0; i < p->m_synonyms.size(); ++i) + { + StringT const uniName = search::NormalizeAndSimplifyString(p->m_synonyms[i].m_name); + + vector tokens; + SplitUniString(uniName, MakeBackInsertFunctor(tokens), search::CategoryDelimiters()); + + for (size_t j = 0; j < tokens.size(); ++j) + for (size_t k = 0; k < types.size(); ++k) + m_name2type.insert(make_pair(tokens[j], types[k])); + } + } + + cat.m_synonyms.clear(); + types.clear(); +} + +void CategoriesHolder::LoadFromStream(istream & s) +{ + m_type2cat.clear(); + m_name2type.clear(); State state = EParseTypes; - string line; + Category cat; + vector types; Classificator const & c = classif(); @@ -50,20 +79,20 @@ size_t CategoriesHolder::LoadFromStream(istream & s) { case EParseTypes: { - if (!cat.m_synonyms.empty() && !cat.m_types.empty()) - m_categories.push_back(cat); - cat.m_synonyms.clear(); - cat.m_types.clear(); + AddCategory(cat, types); + while (iter) { // split category to sub categories for classificator vector v; - strings::Tokenize(*iter, "-", Splitter(v)); + strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v)); + // get classificator type - cat.m_types.push_back(c.GetTypeByPath(v)); + types.push_back(c.GetTypeByPath(v)); ++iter; } - if (!cat.m_types.empty()) + + if (!types.empty()) state = EParseLanguages; } break; @@ -75,21 +104,24 @@ size_t CategoriesHolder::LoadFromStream(istream & s) state = EParseTypes; continue; } - int8_t langCode = StringUtf8Multilang::GetLangIndex(*iter); + int8_t const langCode = StringUtf8Multilang::GetLangIndex(*iter); if (langCode == StringUtf8Multilang::UNSUPPORTED_LANGUAGE_CODE) { LOG(LWARNING, ("Invalid language code:", *iter)); continue; } + while (++iter) { Category::Name name; name.m_lang = langCode; name.m_name = *iter; - // ASSERT(name.m_Name.empty(), ()); if (name.m_name.empty()) + { + LOG(LWARNING, ("Empty category name")); continue; + } if (name.m_name[0] >= '0' && name.m_name[0] <= '9') { @@ -107,25 +139,29 @@ size_t CategoriesHolder::LoadFromStream(istream & s) } // add last category - if (!cat.m_synonyms.empty() && !cat.m_types.empty()) - m_categories.push_back(cat); - - LOG(LINFO, ("Categories loaded: ", m_categories.size())); - return m_categories.size(); + AddCategory(cat, types); } -void CategoriesHolder::swap(CategoriesHolder & o) +bool CategoriesHolder::GetNameByType(uint32_t type, int8_t lang, string & name) const { - m_categories.swap(o.m_categories); -} + pair const range = m_type2cat.equal_range(type); -CategoriesHolder::CategoriesHolder() -{ -} + for (IteratorT i = range.first; i != range.second; ++i) + { + Category const & cat = *i->second; + for (size_t j = 0; j < cat.m_synonyms.size(); ++j) + if (cat.m_synonyms[j].m_lang == lang) + { + name = cat.m_synonyms[j].m_name; + return true; + } + } -CategoriesHolder::CategoriesHolder(Reader * reader) -{ - ReaderStreamBuf buffer(reader); - istream s(&buffer); - LoadFromStream(s); + if (range.first != range.second) + { + name = range.first->second->m_synonyms[0].m_name; + return true; + } + + return false; } diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp index 78832996a9..552b60a3f4 100644 --- a/indexer/categories_holder.hpp +++ b/indexer/categories_holder.hpp @@ -1,10 +1,12 @@ #pragma once -#include "../base/base.hpp" +#include "../base/string_utils.hpp" #include "../std/vector.hpp" +#include "../std/map.hpp" #include "../std/string.hpp" #include "../std/fstream.hpp" -#include "../std/algorithm.hpp" +#include "../std/shared_ptr.hpp" + class Reader; @@ -13,9 +15,6 @@ class CategoriesHolder public: struct Category { - /// Classificator types - vector m_types; - struct Name { string m_name; @@ -23,36 +22,70 @@ public: uint8_t m_prefixLengthToSuggest; }; - /// vector m_synonyms; + + inline void Swap(Category & r) + { + m_synonyms.swap(r.m_synonyms); + } }; - typedef vector ContainerT; - typedef ContainerT::const_iterator const_iterator; +private: + typedef strings::UniString StringT; + typedef multimap > ContainerT; + typedef ContainerT::const_iterator IteratorT; - CategoriesHolder(); + multimap > m_type2cat; + multimap m_name2type; + +public: + CategoriesHolder() {} /// Takes ownership of reader. explicit CategoriesHolder(Reader * reader); - /// @return Number of loaded categories or 0 if something goes wrong. - size_t LoadFromStream(istream & s); + void LoadFromStream(istream & s); template void ForEachCategory(ToDo toDo) const { - for_each(m_categories.begin(), m_categories.end(), toDo); + for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i) + toDo(*i->second); } - const_iterator begin() const { return m_categories.begin(); } - const_iterator end() const { return m_categories.end(); } + template + void ForEachName(ToDo toDo) const + { + for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i) + for (size_t j = 0; j < i->second->m_synonyms.size(); ++j) + toDo(i->second->m_synonyms[j]); + } - void swap(CategoriesHolder & o); + template + void ForEachTypeByName(StringT const & name, ToDo toDo) const + { + typedef typename multimap::const_iterator IterT; + + pair range = m_name2type.equal_range(name); + while (range.first != range.second) + { + toDo(range.first->second); + ++range.first; + } + } + + bool GetNameByType(uint32_t type, int8_t lang, string & name) const; + + inline void Swap(CategoriesHolder & r) + { + m_type2cat.swap(r.m_type2cat); + m_name2type.swap(r.m_name2type); + } private: - ContainerT m_categories; + void AddCategory(Category & cat, vector & types); }; inline void swap(CategoriesHolder & a, CategoriesHolder & b) { - return a.swap(b); + return a.Swap(b); } diff --git a/indexer/indexer_tests/categories_test.cpp b/indexer/indexer_tests/categories_test.cpp index a7a00aad85..7191611731 100644 --- a/indexer/indexer_tests/categories_test.cpp +++ b/indexer/indexer_tests/categories_test.cpp @@ -28,7 +28,6 @@ struct Checker { case 0: { - TEST_EQUAL(cat.m_types.size(), 1, ()); TEST_EQUAL(cat.m_synonyms.size(), 5, ()); TEST_EQUAL(cat.m_synonyms[0].m_lang, StringUtf8Multilang::GetLangIndex("en"), ()); TEST_EQUAL(cat.m_synonyms[0].m_name, "bench", ()); @@ -46,9 +45,8 @@ struct Checker ++m_count; } break; - case 1: + case 1: case 2: { - TEST_EQUAL(cat.m_types.size(), 2, ()); TEST_EQUAL(cat.m_synonyms.size(), 3, ()); TEST_EQUAL(cat.m_synonyms[0].m_lang, StringUtf8Multilang::GetLangIndex("en"), ()); TEST_EQUAL(cat.m_synonyms[0].m_name, "village", ()); @@ -61,7 +59,8 @@ struct Checker ++m_count; } break; - default: TEST(false, ("Too many categories")); + default: + TEST(false, ("Too many categories")); } } }; @@ -72,10 +71,10 @@ UNIT_TEST(LoadCategories) CategoriesHolder h; istringstream buffer(TEST_STRING); - TEST_GREATER(h.LoadFromStream(buffer), 0, ()); + h.LoadFromStream(buffer); size_t count = 0; Checker f(count); h.ForEachCategory(f); - TEST_EQUAL(count, 2, ()); + TEST_EQUAL(count, 3, ()); } diff --git a/map/framework.cpp b/map/framework.cpp index d813a5be94..2e88019e90 100644 --- a/map/framework.cpp +++ b/map/framework.cpp @@ -678,7 +678,7 @@ search::Engine * Framework::GetSearchEngine() m_pSearchEngine.reset( new search::Engine(&m_model.GetIndex(), - new CategoriesHolder(pl.GetReader(SEARCH_CATEGORIES_FILE_NAME)), + pl.GetReader(SEARCH_CATEGORIES_FILE_NAME), pl.GetReader(PACKED_POLYGONS_FILE), pl.GetReader(COUNTRIES_FILE), languages::CurrentLanguage())); diff --git a/search/intermediate_result.cpp b/search/intermediate_result.cpp index 95b92e0afa..7dcd4ea3ea 100644 --- a/search/intermediate_result.cpp +++ b/search/intermediate_result.cpp @@ -7,6 +7,7 @@ #include "../indexer/feature_utils.hpp" #include "../indexer/mercator.hpp" #include "../indexer/scales.hpp" +#include "../indexer/categories_holder.hpp" #include "../geometry/angles.hpp" #include "../geometry/distance_on_sphere.hpp" @@ -170,8 +171,8 @@ namespace } Result PreResult2::GenerateFinalResult( - storage::CountryInfoGetter const * pInfo, - CategoriesT const * pCat) const + storage::CountryInfoGetter const * pInfo, + CategoriesHolder const * pCat, int8_t lang) const { storage::CountryInfo info; @@ -184,7 +185,7 @@ Result PreResult2::GenerateFinalResult( switch (m_resultType) { case RESULT_FEATURE: - return Result(m_str, info.m_name, info.m_flag, GetFeatureType(pCat) + return Result(m_str, info.m_name, info.m_flag, GetFeatureType(pCat, lang) #ifdef DEBUG + ' ' + strings::to_string(static_cast(m_searchRank)) #endif @@ -308,7 +309,7 @@ string PreResult2::DebugPrint() const return res; } -string PreResult2::GetFeatureType(CategoriesT const * pCat) const +string PreResult2::GetFeatureType(CategoriesHolder const * pCat, int8_t lang) const { ASSERT_EQUAL(m_resultType, RESULT_FEATURE, ()); @@ -317,11 +318,9 @@ string PreResult2::GetFeatureType(CategoriesT const * pCat) const if (pCat) { - for (CategoriesT::const_iterator i = pCat->begin(); i != pCat->end(); ++i) - { - if (i->second == type) - return strings::ToUtf8(i->first); - } + string name; + if (pCat->GetNameByType(type, lang, name)) + return name; } string s = classif().GetFullObjectName(type); diff --git a/search/intermediate_result.hpp b/search/intermediate_result.hpp index fd08ddd785..e4e37a46e5 100644 --- a/search/intermediate_result.hpp +++ b/search/intermediate_result.hpp @@ -10,6 +10,7 @@ class FeatureType; +class CategoriesHolder; namespace storage { @@ -70,9 +71,8 @@ public: // For RESULT_CATEGORY. PreResult2(string const & name, int penalty); - typedef multimap CategoriesT; Result GenerateFinalResult(storage::CountryInfoGetter const * pInfo, - CategoriesT const * pCat) const; + CategoriesHolder const * pCat, int8_t lang) const; static bool LessRank(PreResult2 const & r1, PreResult2 const & r2); static bool LessDistance(PreResult2 const & r1, PreResult2 const & r2); @@ -103,7 +103,7 @@ public: string DebugPrint() const; private: - string GetFeatureType(CategoriesT const * pCat) const; + string GetFeatureType(CategoriesHolder const * pCat, int8_t lang) const; feature::TypesHolder m_types; inline uint32_t GetBestType() const diff --git a/search/search_engine.cpp b/search/search_engine.cpp index 2d0dae2fca..295bbff6c4 100644 --- a/search/search_engine.cpp +++ b/search/search_engine.cpp @@ -5,7 +5,6 @@ #include "../storage/country_info.hpp" #include "../indexer/categories_holder.hpp" -#include "../indexer/search_delimiters.hpp" #include "../indexer/search_string_utils.hpp" #include "../indexer/mercator.hpp" @@ -16,9 +15,7 @@ #include "../base/logging.hpp" #include "../base/stl_add.hpp" -#include "../std/algorithm.hpp" #include "../std/map.hpp" -#include "../std/utility.hpp" #include "../std/vector.hpp" #include "../std/bind.hpp" @@ -26,27 +23,55 @@ namespace search { +typedef vector > SuggestsContainerT; + class EngineData { public: - EngineData(ModelReaderPtr polyR, ModelReaderPtr countryR) - : m_infoGetter(polyR, countryR) {} + EngineData(Reader * pCategoriesR, ModelReaderPtr polyR, ModelReaderPtr countryR) + : m_categories(pCategoriesR), m_infoGetter(polyR, countryR) + { + } - multimap m_categories; - vector > m_stringsToSuggest; + CategoriesHolder m_categories; + SuggestsContainerT m_stringsToSuggest; storage::CountryInfoGetter m_infoGetter; }; -Engine::Engine(IndexType const * pIndex, CategoriesHolder * pCategories, +namespace +{ + +class InitSuggestions +{ + map m_suggests; + +public: + void operator() (CategoriesHolder::Category::Name const & name) + { + strings::UniString const uniName = NormalizeAndSimplifyString(name.m_name); + + uint8_t & score = m_suggests[uniName]; + if (score == 0 || score > name.m_prefixLengthToSuggest) + score = name.m_prefixLengthToSuggest; + } + + void GetSuggests(SuggestsContainerT & cont) const + { + cont.assign(m_suggests.begin(), m_suggests.end()); + } +}; + +} + + +Engine::Engine(IndexType const * pIndex, Reader * pCategoriesR, ModelReaderPtr polyR, ModelReaderPtr countryR, string const & lang) - : m_pIndex(pIndex), m_pData(new EngineData(polyR, countryR)) + : m_pIndex(pIndex), m_pData(new EngineData(pCategoriesR, polyR, countryR)) { - if (pCategories) - { - InitializeCategoriesAndSuggestStrings(*pCategories); - delete pCategories; - } + InitSuggestions doInit; + m_pData->m_categories.ForEachName(bind(ref(doInit), _1)); + doInit.GetSuggests(m_pData->m_stringsToSuggest); m_pQuery.reset(new Query(pIndex, &m_pData->m_categories, @@ -59,34 +84,6 @@ Engine::~Engine() { } -void Engine::InitializeCategoriesAndSuggestStrings(CategoriesHolder const & categories) -{ - m_pData->m_categories.clear(); - m_pData->m_stringsToSuggest.clear(); - - map stringsToSuggest; - for (CategoriesHolder::const_iterator it = categories.begin(); it != categories.end(); ++it) - { - for (size_t i = 0; i < it->m_synonyms.size(); ++i) - { - CategoriesHolder::Category::Name const & name = it->m_synonyms[i]; - strings::UniString const uniName = NormalizeAndSimplifyString(name.m_name); - - uint8_t & score = stringsToSuggest[uniName]; - if (score == 0 || score > name.m_prefixLengthToSuggest) - score = name.m_prefixLengthToSuggest; - - vector tokens; - SplitUniString(uniName, MakeBackInsertFunctor(tokens), CategoryDelimiters()); - for (size_t j = 0; j < tokens.size(); ++j) - for (size_t k = 0; k < it->m_types.size(); ++k) - m_pData->m_categories.insert(make_pair(tokens[j], it->m_types[k])); - } - } - - m_pData->m_stringsToSuggest.assign(stringsToSuggest.begin(), stringsToSuggest.end()); -} - namespace { m2::PointD GetViewportXY(double lat, double lon) diff --git a/search/search_engine.hpp b/search/search_engine.hpp index 3b8111f012..e136dc1465 100644 --- a/search/search_engine.hpp +++ b/search/search_engine.hpp @@ -13,7 +13,6 @@ #include "../std/function.hpp" -class CategoriesHolder; class Index; namespace search @@ -32,7 +31,7 @@ public: typedef Index IndexType; // Doesn't take ownership of @pIndex. Takes ownership of pCategories - Engine(IndexType const * pIndex, CategoriesHolder * pCategories, + Engine(IndexType const * pIndex, Reader * pCategoriesR, ModelReaderPtr polyR, ModelReaderPtr countryR, string const & lang); ~Engine(); @@ -44,8 +43,6 @@ public: string GetCountryFile(m2::PointD const & pt) const; private: - void InitializeCategoriesAndSuggestStrings(CategoriesHolder const & categories); - void SetViewportAsync(m2::RectD const & viewport); void SearchAsync(); diff --git a/search/search_query.cpp b/search/search_query.cpp index d0348289b1..fee91ea840 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -10,6 +10,7 @@ #include "../indexer/scales.hpp" #include "../indexer/search_delimiters.hpp" #include "../indexer/search_string_utils.hpp" +#include "../indexer/categories_holder.hpp" #include "../coding/multilang_utf8_string.hpp" @@ -26,14 +27,14 @@ namespace search { Query::Query(Index const * pIndex, - CategoriesMapT const * pCategories, + CategoriesHolder const * pCategories, StringsToSuggestVectorT const * pStringsToSuggest, storage::CountryInfoGetter const * pInfoGetter) : m_pIndex(pIndex), m_pCategories(pCategories), m_pStringsToSuggest(pStringsToSuggest), m_pInfoGetter(pInfoGetter), - m_preferredLanguage(StringUtf8Multilang::GetLangIndex("en")), + m_currentLang(StringUtf8Multilang::GetLangIndex("en")), m_viewport(m2::RectD::GetEmptyRect()), m_viewportExtended(m2::RectD::GetEmptyRect()), m_position(empty_pos_value, empty_pos_value), m_bOffsetsCacheIsValid(false) @@ -76,7 +77,7 @@ void Query::SetViewport(m2::RectD const & viewport) void Query::SetPreferredLanguage(string const & lang) { - m_preferredLanguage = StringUtf8Multilang::GetLangIndex(lang); + m_currentLang = StringUtf8Multilang::GetLangIndex(lang); } void Query::ClearCache() @@ -183,7 +184,7 @@ void Query::Search(string const & query, Results & res, unsigned int resultsNeed m_tokens.resize(31); vector > langPriorities(3); - langPriorities[0].push_back(m_preferredLanguage); + langPriorities[0].push_back(m_currentLang); langPriorities[1].push_back(StringUtf8Multilang::GetLangIndex("int_name")); langPriorities[1].push_back(StringUtf8Multilang::GetLangIndex("en")); langPriorities[2].push_back(StringUtf8Multilang::GetLangIndex("default")); @@ -208,7 +209,7 @@ void Query::Search(string const & query, Results & res, unsigned int resultsNeed { //double const precision = 5.0 * max(0.0001, min(latPrec, lonPrec)); // Min 55 meters res.AddResult(impl::PreResult2(m_viewport, m_position, lat, lon). - GenerateFinalResult(m_pInfoGetter, m_pCategories)); + GenerateFinalResult(m_pInfoGetter, m_pCategories, m_currentLang)); } } @@ -465,7 +466,7 @@ void Query::FlushResults(Results & res) LOG(LDEBUG, (indV[i])); - res.AddResult((*(indV[i])).GenerateFinalResult(m_pInfoGetter, m_pCategories)); + res.AddResult((*(indV[i])).GenerateFinalResult(m_pInfoGetter, m_pCategories, m_currentLang)); } } @@ -565,6 +566,18 @@ public: void Reset() { m_count = 0; } }; +class DoInsertTypes +{ + vector & m_tokens; +public: + DoInsertTypes(vector & tokens) : m_tokens(tokens) {} + + void operator() (uint32_t t) + { + m_tokens.push_back(FeatureTypeToString(t)); + } +}; + } // namespace search::impl void Query::SearchFeatures() @@ -582,20 +595,14 @@ void Query::SearchFeatures() if (m_pCategories) { for (size_t i = 0; i < m_tokens.size(); ++i) - { - typedef CategoriesMapT::const_iterator IterT; - - pair const range = m_pCategories->equal_range(m_tokens[i]); - for (IterT it = range.first; it != range.second; ++it) - tokens[i].push_back(FeatureTypeToString(it->second)); - } + m_pCategories->ForEachTypeByName(m_tokens[i], impl::DoInsertTypes(tokens[i])); } vector mwmInfo; m_pIndex->GetMwmInfo(mwmInfo); unordered_set langs; - langs.insert(m_preferredLanguage); + langs.insert(m_currentLang); langs.insert(StringUtf8Multilang::GetLangIndex("int_name")); langs.insert(StringUtf8Multilang::GetLangIndex("en")); langs.insert(StringUtf8Multilang::GetLangIndex("default")); @@ -733,7 +740,7 @@ void Query::MatchForSuggestions(strings::UniString const & token, Results & res) strings::UniString const & s = it->first; if (it->second <= token.size() && StartsWith(s.begin(), s.end(), token.begin(), token.end())) res.AddResult(impl::PreResult2(strings::ToUtf8(s), it->second). - GenerateFinalResult(m_pInfoGetter, m_pCategories)); + GenerateFinalResult(m_pInfoGetter, m_pCategories, m_currentLang)); } } diff --git a/search/search_query.hpp b/search/search_query.hpp index 69fd53324f..eae4948151 100644 --- a/search/search_query.hpp +++ b/search/search_query.hpp @@ -19,6 +19,7 @@ class FeatureType; class Index; class MwmInfo; +class CategoriesHolder; namespace storage { class CountryInfoGetter; } @@ -37,13 +38,11 @@ namespace impl class Query { public: - // Map category_token -> category_type. - typedef multimap CategoriesMapT; // Vector of pairs (string_to_suggest, min_prefix_length_to_suggest). typedef vector > StringsToSuggestVectorT; Query(Index const * pIndex, - CategoriesMapT const * pCategories, + CategoriesHolder const * pCategories, StringsToSuggestVectorT const * pStringsToSuggest, storage::CountryInfoGetter const * pInfoGetter); ~Query(); @@ -87,10 +86,10 @@ private: void GetBestMatchName(FeatureType const & f, uint32_t & penalty, string & name); Index const * m_pIndex; - CategoriesMapT const * m_pCategories; + CategoriesHolder const * m_pCategories; StringsToSuggestVectorT const * m_pStringsToSuggest; storage::CountryInfoGetter const * m_pInfoGetter; - int m_preferredLanguage; + int8_t m_currentLang, m_inputLang; volatile bool m_cancel;