[search] Category name in native language. Closed #602.

This commit is contained in:
vng 2012-02-13 20:59:56 +03:00 committed by Alex Zolotarev
parent 4c70ee2acc
commit 5c62a25a0f
10 changed files with 211 additions and 144 deletions

View file

@ -1,43 +1,72 @@
#include "categories_holder.hpp"
#include "search_delimiters.hpp"
#include "search_string_utils.hpp"
#include "classificator.hpp"
#include "../indexer/classificator.hpp"
#include "../coding/multilang_utf8_string.hpp"
#include "../coding/reader.hpp"
#include "../coding/reader_streambuf.hpp"
#include "../coding/multilang_utf8_string.hpp"
#include "../base/string_utils.hpp"
#include "../base/logging.hpp"
#include "../base/stl_add.hpp"
namespace
{
struct Splitter
enum State
{
vector<string> & m_v;
Splitter(vector<string> & v) : m_v(v) {}
void operator()(string const & s)
{
m_v.push_back(s);
}
};
enum State {
EParseTypes,
EParseLanguages
};
} // unnamed namespace
size_t CategoriesHolder::LoadFromStream(istream & s)
CategoriesHolder::CategoriesHolder(Reader * reader)
{
m_categories.clear();
ReaderStreamBuf buffer(reader);
istream s(&buffer);
LoadFromStream(s);
}
void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types)
{
if (!cat.m_synonyms.empty() && !types.empty())
{
shared_ptr<Category> p(new Category());
p->Swap(cat);
for (size_t i = 0; i < types.size(); ++i)
m_type2cat.insert(make_pair(types[i], p));
for (size_t i = 0; i < p->m_synonyms.size(); ++i)
{
StringT const uniName = search::NormalizeAndSimplifyString(p->m_synonyms[i].m_name);
vector<StringT> tokens;
SplitUniString(uniName, MakeBackInsertFunctor(tokens), search::CategoryDelimiters());
for (size_t j = 0; j < tokens.size(); ++j)
for (size_t k = 0; k < types.size(); ++k)
m_name2type.insert(make_pair(tokens[j], types[k]));
}
}
cat.m_synonyms.clear();
types.clear();
}
void CategoriesHolder::LoadFromStream(istream & s)
{
m_type2cat.clear();
m_name2type.clear();
State state = EParseTypes;
string line;
Category cat;
vector<uint32_t> types;
Classificator const & c = classif();
@ -50,20 +79,20 @@ size_t CategoriesHolder::LoadFromStream(istream & s)
{
case EParseTypes:
{
if (!cat.m_synonyms.empty() && !cat.m_types.empty())
m_categories.push_back(cat);
cat.m_synonyms.clear();
cat.m_types.clear();
AddCategory(cat, types);
while (iter)
{
// split category to sub categories for classificator
vector<string> v;
strings::Tokenize(*iter, "-", Splitter(v));
strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v));
// get classificator type
cat.m_types.push_back(c.GetTypeByPath(v));
types.push_back(c.GetTypeByPath(v));
++iter;
}
if (!cat.m_types.empty())
if (!types.empty())
state = EParseLanguages;
}
break;
@ -75,21 +104,24 @@ size_t CategoriesHolder::LoadFromStream(istream & s)
state = EParseTypes;
continue;
}
int8_t langCode = StringUtf8Multilang::GetLangIndex(*iter);
int8_t const langCode = StringUtf8Multilang::GetLangIndex(*iter);
if (langCode == StringUtf8Multilang::UNSUPPORTED_LANGUAGE_CODE)
{
LOG(LWARNING, ("Invalid language code:", *iter));
continue;
}
while (++iter)
{
Category::Name name;
name.m_lang = langCode;
name.m_name = *iter;
// ASSERT(name.m_Name.empty(), ());
if (name.m_name.empty())
{
LOG(LWARNING, ("Empty category name"));
continue;
}
if (name.m_name[0] >= '0' && name.m_name[0] <= '9')
{
@ -107,25 +139,29 @@ size_t CategoriesHolder::LoadFromStream(istream & s)
}
// add last category
if (!cat.m_synonyms.empty() && !cat.m_types.empty())
m_categories.push_back(cat);
LOG(LINFO, ("Categories loaded: ", m_categories.size()));
return m_categories.size();
AddCategory(cat, types);
}
void CategoriesHolder::swap(CategoriesHolder & o)
bool CategoriesHolder::GetNameByType(uint32_t type, int8_t lang, string & name) const
{
m_categories.swap(o.m_categories);
}
pair<IteratorT, IteratorT> const range = m_type2cat.equal_range(type);
CategoriesHolder::CategoriesHolder()
{
}
for (IteratorT i = range.first; i != range.second; ++i)
{
Category const & cat = *i->second;
for (size_t j = 0; j < cat.m_synonyms.size(); ++j)
if (cat.m_synonyms[j].m_lang == lang)
{
name = cat.m_synonyms[j].m_name;
return true;
}
}
CategoriesHolder::CategoriesHolder(Reader * reader)
{
ReaderStreamBuf buffer(reader);
istream s(&buffer);
LoadFromStream(s);
if (range.first != range.second)
{
name = range.first->second->m_synonyms[0].m_name;
return true;
}
return false;
}

View file

@ -1,10 +1,12 @@
#pragma once
#include "../base/base.hpp"
#include "../base/string_utils.hpp"
#include "../std/vector.hpp"
#include "../std/map.hpp"
#include "../std/string.hpp"
#include "../std/fstream.hpp"
#include "../std/algorithm.hpp"
#include "../std/shared_ptr.hpp"
class Reader;
@ -13,9 +15,6 @@ class CategoriesHolder
public:
struct Category
{
/// Classificator types
vector<uint32_t> m_types;
struct Name
{
string m_name;
@ -23,36 +22,70 @@ public:
uint8_t m_prefixLengthToSuggest;
};
/// <language, synonym>
vector<Name> m_synonyms;
inline void Swap(Category & r)
{
m_synonyms.swap(r.m_synonyms);
}
};
typedef vector<Category> ContainerT;
typedef ContainerT::const_iterator const_iterator;
private:
typedef strings::UniString StringT;
typedef multimap<uint32_t, shared_ptr<Category> > ContainerT;
typedef ContainerT::const_iterator IteratorT;
CategoriesHolder();
multimap<uint32_t, shared_ptr<Category> > m_type2cat;
multimap<StringT, uint32_t> m_name2type;
public:
CategoriesHolder() {}
/// Takes ownership of reader.
explicit CategoriesHolder(Reader * reader);
/// @return Number of loaded categories or 0 if something goes wrong.
size_t LoadFromStream(istream & s);
void LoadFromStream(istream & s);
template <class ToDo>
void ForEachCategory(ToDo toDo) const
{
for_each(m_categories.begin(), m_categories.end(), toDo);
for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i)
toDo(*i->second);
}
const_iterator begin() const { return m_categories.begin(); }
const_iterator end() const { return m_categories.end(); }
template <class ToDo>
void ForEachName(ToDo toDo) const
{
for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i)
for (size_t j = 0; j < i->second->m_synonyms.size(); ++j)
toDo(i->second->m_synonyms[j]);
}
void swap(CategoriesHolder & o);
template <class ToDo>
void ForEachTypeByName(StringT const & name, ToDo toDo) const
{
typedef typename multimap<StringT, uint32_t>::const_iterator IterT;
pair<IterT, IterT> range = m_name2type.equal_range(name);
while (range.first != range.second)
{
toDo(range.first->second);
++range.first;
}
}
bool GetNameByType(uint32_t type, int8_t lang, string & name) const;
inline void Swap(CategoriesHolder & r)
{
m_type2cat.swap(r.m_type2cat);
m_name2type.swap(r.m_name2type);
}
private:
ContainerT m_categories;
void AddCategory(Category & cat, vector<uint32_t> & types);
};
inline void swap(CategoriesHolder & a, CategoriesHolder & b)
{
return a.swap(b);
return a.Swap(b);
}

View file

@ -28,7 +28,6 @@ struct Checker
{
case 0:
{
TEST_EQUAL(cat.m_types.size(), 1, ());
TEST_EQUAL(cat.m_synonyms.size(), 5, ());
TEST_EQUAL(cat.m_synonyms[0].m_lang, StringUtf8Multilang::GetLangIndex("en"), ());
TEST_EQUAL(cat.m_synonyms[0].m_name, "bench", ());
@ -46,9 +45,8 @@ struct Checker
++m_count;
}
break;
case 1:
case 1: case 2:
{
TEST_EQUAL(cat.m_types.size(), 2, ());
TEST_EQUAL(cat.m_synonyms.size(), 3, ());
TEST_EQUAL(cat.m_synonyms[0].m_lang, StringUtf8Multilang::GetLangIndex("en"), ());
TEST_EQUAL(cat.m_synonyms[0].m_name, "village", ());
@ -61,7 +59,8 @@ struct Checker
++m_count;
}
break;
default: TEST(false, ("Too many categories"));
default:
TEST(false, ("Too many categories"));
}
}
};
@ -72,10 +71,10 @@ UNIT_TEST(LoadCategories)
CategoriesHolder h;
istringstream buffer(TEST_STRING);
TEST_GREATER(h.LoadFromStream(buffer), 0, ());
h.LoadFromStream(buffer);
size_t count = 0;
Checker f(count);
h.ForEachCategory(f);
TEST_EQUAL(count, 2, ());
TEST_EQUAL(count, 3, ());
}

View file

@ -678,7 +678,7 @@ search::Engine * Framework::GetSearchEngine()
m_pSearchEngine.reset(
new search::Engine(&m_model.GetIndex(),
new CategoriesHolder(pl.GetReader(SEARCH_CATEGORIES_FILE_NAME)),
pl.GetReader(SEARCH_CATEGORIES_FILE_NAME),
pl.GetReader(PACKED_POLYGONS_FILE),
pl.GetReader(COUNTRIES_FILE),
languages::CurrentLanguage()));

View file

@ -7,6 +7,7 @@
#include "../indexer/feature_utils.hpp"
#include "../indexer/mercator.hpp"
#include "../indexer/scales.hpp"
#include "../indexer/categories_holder.hpp"
#include "../geometry/angles.hpp"
#include "../geometry/distance_on_sphere.hpp"
@ -170,8 +171,8 @@ namespace
}
Result PreResult2::GenerateFinalResult(
storage::CountryInfoGetter const * pInfo,
CategoriesT const * pCat) const
storage::CountryInfoGetter const * pInfo,
CategoriesHolder const * pCat, int8_t lang) const
{
storage::CountryInfo info;
@ -184,7 +185,7 @@ Result PreResult2::GenerateFinalResult(
switch (m_resultType)
{
case RESULT_FEATURE:
return Result(m_str, info.m_name, info.m_flag, GetFeatureType(pCat)
return Result(m_str, info.m_name, info.m_flag, GetFeatureType(pCat, lang)
#ifdef DEBUG
+ ' ' + strings::to_string(static_cast<int>(m_searchRank))
#endif
@ -308,7 +309,7 @@ string PreResult2::DebugPrint() const
return res;
}
string PreResult2::GetFeatureType(CategoriesT const * pCat) const
string PreResult2::GetFeatureType(CategoriesHolder const * pCat, int8_t lang) const
{
ASSERT_EQUAL(m_resultType, RESULT_FEATURE, ());
@ -317,11 +318,9 @@ string PreResult2::GetFeatureType(CategoriesT const * pCat) const
if (pCat)
{
for (CategoriesT::const_iterator i = pCat->begin(); i != pCat->end(); ++i)
{
if (i->second == type)
return strings::ToUtf8(i->first);
}
string name;
if (pCat->GetNameByType(type, lang, name))
return name;
}
string s = classif().GetFullObjectName(type);

View file

@ -10,6 +10,7 @@
class FeatureType;
class CategoriesHolder;
namespace storage
{
@ -70,9 +71,8 @@ public:
// For RESULT_CATEGORY.
PreResult2(string const & name, int penalty);
typedef multimap<strings::UniString, uint32_t> CategoriesT;
Result GenerateFinalResult(storage::CountryInfoGetter const * pInfo,
CategoriesT const * pCat) const;
CategoriesHolder const * pCat, int8_t lang) const;
static bool LessRank(PreResult2 const & r1, PreResult2 const & r2);
static bool LessDistance(PreResult2 const & r1, PreResult2 const & r2);
@ -103,7 +103,7 @@ public:
string DebugPrint() const;
private:
string GetFeatureType(CategoriesT const * pCat) const;
string GetFeatureType(CategoriesHolder const * pCat, int8_t lang) const;
feature::TypesHolder m_types;
inline uint32_t GetBestType() const

View file

@ -5,7 +5,6 @@
#include "../storage/country_info.hpp"
#include "../indexer/categories_holder.hpp"
#include "../indexer/search_delimiters.hpp"
#include "../indexer/search_string_utils.hpp"
#include "../indexer/mercator.hpp"
@ -16,9 +15,7 @@
#include "../base/logging.hpp"
#include "../base/stl_add.hpp"
#include "../std/algorithm.hpp"
#include "../std/map.hpp"
#include "../std/utility.hpp"
#include "../std/vector.hpp"
#include "../std/bind.hpp"
@ -26,27 +23,55 @@
namespace search
{
typedef vector<pair<strings::UniString, uint8_t> > SuggestsContainerT;
class EngineData
{
public:
EngineData(ModelReaderPtr polyR, ModelReaderPtr countryR)
: m_infoGetter(polyR, countryR) {}
EngineData(Reader * pCategoriesR, ModelReaderPtr polyR, ModelReaderPtr countryR)
: m_categories(pCategoriesR), m_infoGetter(polyR, countryR)
{
}
multimap<strings::UniString, uint32_t> m_categories;
vector<pair<strings::UniString, uint8_t> > m_stringsToSuggest;
CategoriesHolder m_categories;
SuggestsContainerT m_stringsToSuggest;
storage::CountryInfoGetter m_infoGetter;
};
Engine::Engine(IndexType const * pIndex, CategoriesHolder * pCategories,
namespace
{
class InitSuggestions
{
map<strings::UniString, uint8_t> m_suggests;
public:
void operator() (CategoriesHolder::Category::Name const & name)
{
strings::UniString const uniName = NormalizeAndSimplifyString(name.m_name);
uint8_t & score = m_suggests[uniName];
if (score == 0 || score > name.m_prefixLengthToSuggest)
score = name.m_prefixLengthToSuggest;
}
void GetSuggests(SuggestsContainerT & cont) const
{
cont.assign(m_suggests.begin(), m_suggests.end());
}
};
}
Engine::Engine(IndexType const * pIndex, Reader * pCategoriesR,
ModelReaderPtr polyR, ModelReaderPtr countryR,
string const & lang)
: m_pIndex(pIndex), m_pData(new EngineData(polyR, countryR))
: m_pIndex(pIndex), m_pData(new EngineData(pCategoriesR, polyR, countryR))
{
if (pCategories)
{
InitializeCategoriesAndSuggestStrings(*pCategories);
delete pCategories;
}
InitSuggestions doInit;
m_pData->m_categories.ForEachName(bind<void>(ref(doInit), _1));
doInit.GetSuggests(m_pData->m_stringsToSuggest);
m_pQuery.reset(new Query(pIndex,
&m_pData->m_categories,
@ -59,34 +84,6 @@ Engine::~Engine()
{
}
void Engine::InitializeCategoriesAndSuggestStrings(CategoriesHolder const & categories)
{
m_pData->m_categories.clear();
m_pData->m_stringsToSuggest.clear();
map<strings::UniString, uint8_t> stringsToSuggest;
for (CategoriesHolder::const_iterator it = categories.begin(); it != categories.end(); ++it)
{
for (size_t i = 0; i < it->m_synonyms.size(); ++i)
{
CategoriesHolder::Category::Name const & name = it->m_synonyms[i];
strings::UniString const uniName = NormalizeAndSimplifyString(name.m_name);
uint8_t & score = stringsToSuggest[uniName];
if (score == 0 || score > name.m_prefixLengthToSuggest)
score = name.m_prefixLengthToSuggest;
vector<strings::UniString> tokens;
SplitUniString(uniName, MakeBackInsertFunctor(tokens), CategoryDelimiters());
for (size_t j = 0; j < tokens.size(); ++j)
for (size_t k = 0; k < it->m_types.size(); ++k)
m_pData->m_categories.insert(make_pair(tokens[j], it->m_types[k]));
}
}
m_pData->m_stringsToSuggest.assign(stringsToSuggest.begin(), stringsToSuggest.end());
}
namespace
{
m2::PointD GetViewportXY(double lat, double lon)

View file

@ -13,7 +13,6 @@
#include "../std/function.hpp"
class CategoriesHolder;
class Index;
namespace search
@ -32,7 +31,7 @@ public:
typedef Index IndexType;
// Doesn't take ownership of @pIndex. Takes ownership of pCategories
Engine(IndexType const * pIndex, CategoriesHolder * pCategories,
Engine(IndexType const * pIndex, Reader * pCategoriesR,
ModelReaderPtr polyR, ModelReaderPtr countryR,
string const & lang);
~Engine();
@ -44,8 +43,6 @@ public:
string GetCountryFile(m2::PointD const & pt) const;
private:
void InitializeCategoriesAndSuggestStrings(CategoriesHolder const & categories);
void SetViewportAsync(m2::RectD const & viewport);
void SearchAsync();

View file

@ -10,6 +10,7 @@
#include "../indexer/scales.hpp"
#include "../indexer/search_delimiters.hpp"
#include "../indexer/search_string_utils.hpp"
#include "../indexer/categories_holder.hpp"
#include "../coding/multilang_utf8_string.hpp"
@ -26,14 +27,14 @@ namespace search
{
Query::Query(Index const * pIndex,
CategoriesMapT const * pCategories,
CategoriesHolder const * pCategories,
StringsToSuggestVectorT const * pStringsToSuggest,
storage::CountryInfoGetter const * pInfoGetter)
: m_pIndex(pIndex),
m_pCategories(pCategories),
m_pStringsToSuggest(pStringsToSuggest),
m_pInfoGetter(pInfoGetter),
m_preferredLanguage(StringUtf8Multilang::GetLangIndex("en")),
m_currentLang(StringUtf8Multilang::GetLangIndex("en")),
m_viewport(m2::RectD::GetEmptyRect()), m_viewportExtended(m2::RectD::GetEmptyRect()),
m_position(empty_pos_value, empty_pos_value),
m_bOffsetsCacheIsValid(false)
@ -76,7 +77,7 @@ void Query::SetViewport(m2::RectD const & viewport)
void Query::SetPreferredLanguage(string const & lang)
{
m_preferredLanguage = StringUtf8Multilang::GetLangIndex(lang);
m_currentLang = StringUtf8Multilang::GetLangIndex(lang);
}
void Query::ClearCache()
@ -183,7 +184,7 @@ void Query::Search(string const & query, Results & res, unsigned int resultsNeed
m_tokens.resize(31);
vector<vector<int8_t> > langPriorities(3);
langPriorities[0].push_back(m_preferredLanguage);
langPriorities[0].push_back(m_currentLang);
langPriorities[1].push_back(StringUtf8Multilang::GetLangIndex("int_name"));
langPriorities[1].push_back(StringUtf8Multilang::GetLangIndex("en"));
langPriorities[2].push_back(StringUtf8Multilang::GetLangIndex("default"));
@ -208,7 +209,7 @@ void Query::Search(string const & query, Results & res, unsigned int resultsNeed
{
//double const precision = 5.0 * max(0.0001, min(latPrec, lonPrec)); // Min 55 meters
res.AddResult(impl::PreResult2(m_viewport, m_position, lat, lon).
GenerateFinalResult(m_pInfoGetter, m_pCategories));
GenerateFinalResult(m_pInfoGetter, m_pCategories, m_currentLang));
}
}
@ -465,7 +466,7 @@ void Query::FlushResults(Results & res)
LOG(LDEBUG, (indV[i]));
res.AddResult((*(indV[i])).GenerateFinalResult(m_pInfoGetter, m_pCategories));
res.AddResult((*(indV[i])).GenerateFinalResult(m_pInfoGetter, m_pCategories, m_currentLang));
}
}
@ -565,6 +566,18 @@ public:
void Reset() { m_count = 0; }
};
class DoInsertTypes
{
vector<strings::UniString> & m_tokens;
public:
DoInsertTypes(vector<strings::UniString> & tokens) : m_tokens(tokens) {}
void operator() (uint32_t t)
{
m_tokens.push_back(FeatureTypeToString(t));
}
};
} // namespace search::impl
void Query::SearchFeatures()
@ -582,20 +595,14 @@ void Query::SearchFeatures()
if (m_pCategories)
{
for (size_t i = 0; i < m_tokens.size(); ++i)
{
typedef CategoriesMapT::const_iterator IterT;
pair<IterT, IterT> const range = m_pCategories->equal_range(m_tokens[i]);
for (IterT it = range.first; it != range.second; ++it)
tokens[i].push_back(FeatureTypeToString(it->second));
}
m_pCategories->ForEachTypeByName(m_tokens[i], impl::DoInsertTypes(tokens[i]));
}
vector<MwmInfo> mwmInfo;
m_pIndex->GetMwmInfo(mwmInfo);
unordered_set<int8_t> langs;
langs.insert(m_preferredLanguage);
langs.insert(m_currentLang);
langs.insert(StringUtf8Multilang::GetLangIndex("int_name"));
langs.insert(StringUtf8Multilang::GetLangIndex("en"));
langs.insert(StringUtf8Multilang::GetLangIndex("default"));
@ -733,7 +740,7 @@ void Query::MatchForSuggestions(strings::UniString const & token, Results & res)
strings::UniString const & s = it->first;
if (it->second <= token.size() && StartsWith(s.begin(), s.end(), token.begin(), token.end()))
res.AddResult(impl::PreResult2(strings::ToUtf8(s), it->second).
GenerateFinalResult(m_pInfoGetter, m_pCategories));
GenerateFinalResult(m_pInfoGetter, m_pCategories, m_currentLang));
}
}

View file

@ -19,6 +19,7 @@
class FeatureType;
class Index;
class MwmInfo;
class CategoriesHolder;
namespace storage { class CountryInfoGetter; }
@ -37,13 +38,11 @@ namespace impl
class Query
{
public:
// Map category_token -> category_type.
typedef multimap<strings::UniString, uint32_t> CategoriesMapT;
// Vector of pairs (string_to_suggest, min_prefix_length_to_suggest).
typedef vector<pair<strings::UniString, uint8_t> > StringsToSuggestVectorT;
Query(Index const * pIndex,
CategoriesMapT const * pCategories,
CategoriesHolder const * pCategories,
StringsToSuggestVectorT const * pStringsToSuggest,
storage::CountryInfoGetter const * pInfoGetter);
~Query();
@ -87,10 +86,10 @@ private:
void GetBestMatchName(FeatureType const & f, uint32_t & penalty, string & name);
Index const * m_pIndex;
CategoriesMapT const * m_pCategories;
CategoriesHolder const * m_pCategories;
StringsToSuggestVectorT const * m_pStringsToSuggest;
storage::CountryInfoGetter const * m_pInfoGetter;
int m_preferredLanguage;
int8_t m_currentLang, m_inputLang;
volatile bool m_cancel;