[search] [TODO] Index names by appending language to the beginning of a name. This increases search index a bit, but makes language specific searches faster. For now, search only in English!

This commit is contained in:
Yury Melnichek 2011-11-22 23:42:01 +01:00 committed by Alex Zolotarev
parent aaaf5bf982
commit bf63d55e5c
5 changed files with 63 additions and 26 deletions

View file

@ -26,8 +26,12 @@ struct FeatureName
strings::UniString m_name;
char m_Value[5];
FeatureName(strings::UniString const & name, uint32_t id, uint8_t rank) : m_name(name)
FeatureName(strings::UniString const & name, signed char lang, uint32_t id, uint8_t rank)
{
m_name.reserve(name.size() + 1);
m_name.push_back(static_cast<uint8_t>(lang));
m_name.append(name.begin(), name.end());
m_Value[0] = rank;
uint32_t const idToWrite = SwapIfBigEndian(id);
memcpy(&m_Value[1], &idToWrite, 4);
@ -77,9 +81,9 @@ struct FeatureNameInserter
AddToken(lang, s, m_rank);
}
void AddToken(signed char, strings::UniString const & s, uint32_t rank) const
void AddToken(signed char lang, strings::UniString const & s, uint32_t rank) const
{
m_names.push_back(FeatureName(s, m_pos, static_cast<uint8_t>(min(rank, 255U))));
m_names.push_back(FeatureName(s, lang, m_pos, static_cast<uint8_t>(min(rank, 255U))));
}
bool operator()(signed char lang, string const & name) const

View file

View file

View file

@ -15,6 +15,8 @@
#include "../indexer/search_delimiters.hpp"
#include "../indexer/search_string_utils.hpp"
#include "../coding/multilang_utf8_string.hpp"
#include "../base/logging.hpp"
#include "../base/string_utils.hpp"
#include "../base/stl_add.hpp"
@ -270,13 +272,42 @@ void Query::SearchFeatures()
if (!m_pIndex)
return;
vector<vector<strings::UniString> > tokens(m_tokens.size());
// Add normal tokens.
for (size_t i = 0; i < m_tokens.size(); ++i)
tokens[i].push_back(m_tokens[i]);
// Add names of categories.
if (m_pCategories)
{
for (size_t i = 0; i < m_tokens.size(); ++i)
{
pair<CategoriesMapT::const_iterator, CategoriesMapT::const_iterator> range
= m_pCategories->equal_range(m_tokens[i]);
for (CategoriesMapT::const_iterator it = range.first; it != range.second; ++it)
tokens[i].push_back(FeatureTypeToString(it->second));
}
}
vector<MwmInfo> mwmInfo;
m_pIndex->GetMwmInfo(mwmInfo);
unordered_set<int8_t> langs;
langs.insert(StringUtf8Multilang::GetLangIndex("en"));
SearchFeatures(tokens, mwmInfo, langs, true);
}
void Query::SearchFeatures(vector<vector<strings::UniString> > const & tokens,
vector<MwmInfo> const & mwmInfo,
unordered_set<int8_t> const & langs,
bool onlyInViewport)
{
for (MwmSet::MwmId mwmId = 0; mwmId < mwmInfo.size(); ++mwmId)
{
// Search only mwms that intersect with viewport (world always does).
if (m_viewportExtended.IsIntersect(mwmInfo[mwmId].m_limitRect))
if (!onlyInViewport ||
m_viewportExtended.IsIntersect(mwmInfo[mwmId].m_limitRect))
{
Index::MwmLock mwmLock(*m_pIndex, mwmId);
if (MwmValue * pMwm = mwmLock.GetValue())
@ -289,33 +320,30 @@ void Query::SearchFeatures()
::search::trie::EdgeValueReader()));
if (pTrieRoot)
{
feature::DataHeader const & h = pMwm->GetHeader();
FeaturesVector featuresVector(pMwm->m_cont, h);
impl::FeatureLoader f(featuresVector, *this,
for (size_t i = 0; i < pTrieRoot->m_edge.size(); ++i)
{
TrieIterator::Edge::EdgeStrT const & edge = pTrieRoot->m_edge[i].m_str;
ASSERT_EQUAL(edge.size(), 1, ());
if (edge.size() == 1 && edge[0] < 128 && langs.count(static_cast<int8_t>(edge[0])))
{
scoped_ptr<TrieIterator> pLangRoot(pTrieRoot->GoToEdge(i));
feature::DataHeader const & h = pMwm->GetHeader();
FeaturesVector featuresVector(pMwm->m_cont, h);
impl::FeatureLoader f(
featuresVector,
*this,
(h.GetType() == feature::DataHeader::world) ? "" : mwmLock.GetCountryName());
vector<vector<strings::UniString> > tokens(m_tokens.size());
MatchFeaturesInTrie(tokens, m_prefix, *pLangRoot,
&m_offsetsInViewport[mwmId], f, m_results.max_size() * 10);
// Add normal tokens.
for (size_t i = 0; i < m_tokens.size(); ++i)
tokens[i].push_back(m_tokens[i]);
// Add names of categories.
if (m_pCategories)
{
for (size_t i = 0; i < m_tokens.size(); ++i)
{
pair<CategoriesMapT::const_iterator, CategoriesMapT::const_iterator> range
= m_pCategories->equal_range(m_tokens[i]);
for (CategoriesMapT::const_iterator it = range.first; it != range.second; ++it)
tokens[i].push_back(FeatureTypeToString(it->second));
LOG(LDEBUG, ("Lang:",
StringUtf8Multilang::GetLangByCode(static_cast<int8_t>(edge[0])),
"Matched: ",
f.m_count));
}
}
MatchFeaturesInTrie(tokens, m_prefix, *pTrieRoot,
&m_offsetsInViewport[mwmId], f, m_results.max_size() * 10);
LOG(LDEBUG, ("Matched: ", f.m_count));
}
}
}

View file

@ -14,6 +14,7 @@
class FeatureType;
class Index;
class MwmInfo;
namespace storage { class CountryInfoGetter; }
@ -57,6 +58,10 @@ private:
void FlushResults(function<void (Result const &)> const & f);
void UpdateViewportOffsets();
void SearchFeatures();
void SearchFeatures(vector<vector<strings::UniString> > const & tokens,
vector<MwmInfo> const & mwmInfo,
unordered_set<int8_t> const & langs,
bool onlyInViewport);
void SuggestStrings();
void GetBestMatchName(FeatureType const & f, uint32_t & penalty, string & name);