Review fixes.

This commit is contained in:
Yuri Gorshenin 2017-01-25 13:47:47 +03:00
parent 8ee21d1f33
commit b053c2de1f
15 changed files with 149 additions and 99 deletions

View file

@ -320,16 +320,13 @@ void MatchPostcodesInTrie(TokenSlice const & slice,
if (slice.IsPrefix(i))
{
vector<PrefixDFAModifier<UniStringDFA>> dfas;
for (auto const & s : slice.Get(i))
dfas.emplace_back(UniStringDFA(s));
slice.Get(i).ForEach([&dfas](UniString const & s) { dfas.emplace_back(UniStringDFA(s)); });
MatchInTrie(dfas, TrieRootPrefix<Value>(*postcodesRoot, edge), intersector);
}
else
{
vector<UniStringDFA> dfas;
for (auto const & s : slice.Get(i))
dfas.emplace_back(s);
slice.Get(i).ForEach([&dfas](UniString const & s) { dfas.emplace_back(s); });
MatchInTrie(dfas, TrieRootPrefix<Value>(*postcodesRoot, edge), intersector);
}

View file

@ -180,10 +180,7 @@ void JoinQueryTokens(QueryParams const & params, size_t curToken, size_t endToke
ASSERT_LESS_OR_EQUAL(curToken, endToken, ());
for (size_t i = curToken; i < endToken; ++i)
{
auto const & syms = params.GetTokens(i);
CHECK(!syms.empty(), ("Empty synonyms for token:", i));
res.append(syms.front());
res.append(params.GetToken(i).m_original);
if (i + 1 != endToken)
res.append(sep);
}
@ -384,12 +381,16 @@ void Geocoder::SetParams(Params const & params)
continue;
}
auto & syms = m_params.GetTokens(i);
my::EraseIf(syms, &IsStopWord);
if (syms.empty())
auto & token = m_params.GetToken(i);
if (IsStopWord(token.m_original))
{
m_params.RemoveToken(i);
}
else
{
my::EraseIf(token.m_synonyms, &IsStopWord);
++i;
}
}
// If all tokens are stop words - give up.
@ -401,10 +402,8 @@ void Geocoder::SetParams(Params const & params)
// individually.
for (size_t i = 0; i < m_params.GetNumTokens(); ++i)
{
auto & synonyms = m_params.GetTokens(i);
ASSERT(!synonyms.empty(), ());
if (IsStreetSynonym(synonyms.front()))
auto & token = m_params.GetToken(i);
if (IsStreetSynonym(token.m_original))
m_params.GetTypeIndices(i).clear();
}
@ -416,13 +415,12 @@ void Geocoder::SetParams(Params const & params)
{
m_tokenRequests.emplace_back();
auto & request = m_tokenRequests.back();
for (auto const & name : m_params.GetTokens(i))
{
m_params.GetToken(i).ForEach([&request](UniString const & s) {
// Here and below, we use LevenshteinDFAs for fuzzy
// matching. But due to performance reasons, we assume that the
// first letter is always correct.
request.m_names.emplace_back(name, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(name));
}
request.m_names.emplace_back(s, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(s));
});
for (auto const & index : m_params.GetTypeIndices(i))
request.m_categories.emplace_back(FeatureTypeToString(index));
request.m_langs = m_params.GetLangs();
@ -430,22 +428,17 @@ void Geocoder::SetParams(Params const & params)
else
{
auto & request = m_prefixTokenRequest;
for (auto const & name : m_params.GetTokens(i))
{
LOG(LINFO, ("Adding prefix name:", name));
m_params.GetToken(i).ForEach([&request](UniString const & s) {
request.m_names.emplace_back(
LevenshteinDFA(name, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(name)));
}
LevenshteinDFA(s, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(s)));
});
for (auto const & index : m_params.GetTypeIndices(i))
{
LOG(LINFO, ("Adding category:", FeatureTypeToString(index)));
request.m_categories.emplace_back(FeatureTypeToString(index));
}
request.m_langs = m_params.GetLangs();
}
}
LOG(LDEBUG, (DebugPrint(static_cast<QueryParams const &>(m_params))));
LOG(LDEBUG, (static_cast<QueryParams const &>(m_params)));
}
void Geocoder::GoEverywhere()
@ -623,16 +616,9 @@ void Geocoder::InitBaseContext(BaseContext & ctx)
for (size_t i = 0; i < ctx.m_features.size(); ++i)
{
if (m_params.IsPrefixToken(i))
{
LOG(LINFO, ("Token is prefix."));
ctx.m_features[i] = RetrieveAddressFeatures(*m_context, m_cancellable, m_prefixTokenRequest);
}
else
{
LOG(LINFO, ("Token is full."));
ctx.m_features[i] = RetrieveAddressFeatures(*m_context, m_cancellable, m_tokenRequests[i]);
}
LOG(LINFO, ("For i-th token:", i, ctx.m_features[i].PopCount()));
}
ctx.m_hotelsFilter = m_hotelsFilter.MakeScopedFilter(*m_context, m_params.m_hotelsFilter);
}

View file

@ -184,7 +184,7 @@ private:
template <typename TLocality>
using TLocalitiesCache = map<pair<size_t, size_t>, vector<TLocality>>;
QueryParams::Synonyms const & GetTokens(size_t i) const;
QueryParams::Token const & GetTokens(size_t i) const;
// Creates a cache of posting lists corresponding to features in m_context
// for each token and saves it to m_addressFeatures.

View file

@ -614,7 +614,7 @@ int GetOldTypeFromIndex(size_t index)
void Processor::InitParams(QueryParams & params)
{
if (m_prefix.empty())
params.Init(m_tokens.begin(), m_tokens.end());
params.InitNoPrefix(m_tokens.begin(), m_tokens.end());
else
params.InitWithPrefix(m_tokens.begin(), m_tokens.end(), m_prefix);

View file

@ -8,7 +8,7 @@ namespace search
{
namespace
{
// TODO (@y, @m): reuse this class in search::Processor.
// TODO (@y, @m): reuse this class in Processor.
class DoAddStreetSynonyms
{
public:
@ -22,26 +22,25 @@ public:
// All synonyms should be lowercase!
if (ss == "n")
AddSym(i, "north");
AddSynonym(i, "north");
if (ss == "w")
AddSym(i, "west");
AddSynonym(i, "west");
if (ss == "s")
AddSym(i, "south");
AddSynonym(i, "south");
if (ss == "e")
AddSym(i, "east");
AddSynonym(i, "east");
if (ss == "nw")
AddSym(i, "northwest");
AddSynonym(i, "northwest");
if (ss == "ne")
AddSym(i, "northeast");
AddSynonym(i, "northeast");
if (ss == "sw")
AddSym(i, "southwest");
AddSynonym(i, "southwest");
if (ss == "se")
AddSym(i, "southeast");
AddSynonym(i, "southeast");
}
private:
QueryParams::Synonyms & GetSyms(size_t i) const { return m_params.GetTokens(i); }
void AddSym(size_t i, string const & sym) { GetSyms(i).push_back(strings::MakeUniString(sym)); }
void AddSynonym(size_t i, string const & synonym) { m_params.GetToken(i).AddSynonym(synonym); }
QueryParams & m_params;
};
@ -50,7 +49,8 @@ private:
void QueryParams::Clear()
{
m_tokens.clear();
m_prefixTokens.clear();
m_prefixToken.Clear();
m_hasPrefix = false;
m_typeIndices.clear();
m_langs.clear();
m_scale = scales::GetUpperScale();
@ -76,16 +76,16 @@ bool QueryParams::IsPrefixToken(size_t i) const
return i == m_tokens.size();
}
QueryParams::Synonyms const & QueryParams::GetTokens(size_t i) const
QueryParams::Token const & QueryParams::GetToken(size_t i) const
{
ASSERT_LESS(i, GetNumTokens(), ());
return i < m_tokens.size() ? m_tokens[i] : m_prefixTokens;
return i < m_tokens.size() ? m_tokens[i] : m_prefixToken;
}
QueryParams::Synonyms & QueryParams::GetTokens(size_t i)
QueryParams::Token & QueryParams::GetToken(size_t i)
{
ASSERT_LESS(i, GetNumTokens(), ());
return i < m_tokens.size() ? m_tokens[i] : m_prefixTokens;
return i < m_tokens.size() ? m_tokens[i] : m_prefixToken;
}
bool QueryParams::IsNumberTokens(size_t start, size_t end) const
@ -96,14 +96,15 @@ bool QueryParams::IsNumberTokens(size_t start, size_t end) const
for (; start != end; ++start)
{
bool number = false;
for (auto const & t : GetTokens(start))
{
if (feature::IsNumber(t))
GetToken(start).ForEach([&number](String const & s) {
if (feature::IsNumber(s))
{
number = true;
break;
return false; // breaks ForEach
}
}
return true; // continues ForEach
});
if (!number)
return false;
}
@ -115,19 +116,32 @@ void QueryParams::RemoveToken(size_t i)
{
ASSERT_LESS(i, GetNumTokens(), ());
if (i == m_tokens.size())
m_prefixTokens.clear();
{
m_prefixToken.Clear();
m_hasPrefix = false;
}
else
{
m_tokens.erase(m_tokens.begin() + i);
}
m_typeIndices.erase(m_typeIndices.begin() + i);
}
string DebugPrint(search::QueryParams const & params)
string DebugPrint(QueryParams const & params)
{
ostringstream os;
os << "QueryParams [ m_tokens=" << DebugPrint(params.m_tokens)
<< ", m_prefixTokens=" << DebugPrint(params.m_prefixTokens)
os << "QueryParams [ m_tokens=" << ::DebugPrint(params.m_tokens)
<< ", m_prefixToken=" << DebugPrint(params.m_prefixToken)
<< ", m_typeIndices=" << ::DebugPrint(params.m_typeIndices)
<< ", m_langs=" << ::DebugPrint(params.m_langs) << " ]";
return os.str();
}
string DebugPrint(QueryParams::Token const & token)
{
ostringstream os;
os << "Token [ m_original=" << DebugPrint(token.m_original)
<< ", m_synonyms=" << DebugPrint(token.m_synonyms) << " ]";
return os.str();
}
} // namespace search

View file

@ -6,7 +6,9 @@
#include "base/string_utils.hpp"
#include "std/cstdint.hpp"
#include "std/type_traits.hpp"
#include "std/unordered_set.hpp"
#include "std/utility.hpp"
#include "std/vector.hpp"
namespace search
@ -15,18 +17,58 @@ class QueryParams
{
public:
using String = strings::UniString;
using Synonyms = vector<String>;
using TypeIndices = vector<uint32_t>;
using Langs = unordered_set<int8_t>;
struct Token
{
Token() = default;
Token(String const & original) : m_original(original) {}
void AddSynonym(String const & s) { m_synonyms.push_back(s); }
void AddSynonym(string const & s) { m_synonyms.push_back(strings::MakeUniString(s)); }
// Calls |fn| on the original token and on synonyms.
template <typename Fn>
typename enable_if<is_same<typename result_of<Fn(String)>::type, void>::value>::type ForEach(
Fn && fn) const
{
fn(m_original);
for_each(m_synonyms.begin(), m_synonyms.end(), forward<Fn>(fn));
}
// Calls |fn| on the original token and on synonyms until |fn| return false.
template <typename Fn>
typename enable_if<is_same<typename result_of<Fn(String)>::type, bool>::value>::type ForEach(
Fn && fn) const
{
if (!fn(m_original))
return;
for (auto const & synonym : m_synonyms)
{
if (!fn(synonym))
return;
}
}
void Clear()
{
m_original.clear();
m_synonyms.clear();
}
String m_original;
vector<String> m_synonyms;
};
QueryParams() = default;
template <typename It>
void Init(It tokenBegin, It tokenEnd)
void InitNoPrefix(It tokenBegin, It tokenEnd)
{
Clear();
for (; tokenBegin != tokenEnd; ++tokenBegin)
m_tokens.push_back({*tokenBegin});
m_tokens.emplace_back(*tokenBegin);
m_typeIndices.resize(GetNumTokens());
}
@ -35,17 +77,18 @@ public:
{
Clear();
for (; tokenBegin != tokenEnd; ++tokenBegin)
m_tokens.push_back({*tokenBegin});
m_prefixTokens.push_back(prefix);
m_tokens.push_back(*tokenBegin);
m_prefixToken.m_original = prefix;
m_hasPrefix = true;
m_typeIndices.resize(GetNumTokens());
}
inline size_t GetNumTokens() const
{
return m_prefixTokens.empty() ? m_tokens.size() : m_tokens.size() + 1;
return m_hasPrefix ? m_tokens.size() + 1: m_tokens.size();
}
inline bool LastTokenIsPrefix() const { return !m_prefixTokens.empty(); }
inline bool LastTokenIsPrefix() const { return m_hasPrefix; }
inline bool IsEmpty() const { return GetNumTokens() == 0; }
void Clear();
@ -55,8 +98,8 @@ public:
TypeIndices const & GetTypeIndices(size_t i) const;
bool IsPrefixToken(size_t i) const;
Synonyms const & GetTokens(size_t i) const;
Synonyms & GetTokens(size_t i);
Token const & GetToken(size_t i) const;
Token & GetToken(size_t i);
// Returns true if all tokens in [start, end) range have integral
// synonyms.
@ -66,18 +109,22 @@ public:
inline Langs & GetLangs() { return m_langs; }
inline Langs const & GetLangs() const { return m_langs; }
inline bool IsLangExist(int8_t lang) const { return m_langs.count(lang) != 0; }
inline bool LangExists(int8_t lang) const { return m_langs.count(lang) != 0; }
inline int GetScale() const { return m_scale; }
private:
friend string DebugPrint(search::QueryParams const & params);
friend string DebugPrint(QueryParams const & params);
vector<Token> m_tokens;
Token m_prefixToken;
bool m_hasPrefix = false;
vector<Synonyms> m_tokens;
Synonyms m_prefixTokens;
vector<TypeIndices> m_typeIndices;
Langs m_langs;
int m_scale = scales::GetUpperScale();
};
string DebugPrint(QueryParams::Token const & token);
} // namespace search

View file

@ -8,16 +8,22 @@ namespace search
{
namespace impl
{
bool Match(vector<UniString> const & tokens, UniString const & token)
bool FullMatch(QueryParams::Token const & token, UniString const & text)
{
return find(tokens.begin(), tokens.end(), token) != tokens.end();
if (token.m_original == text)
return true;
auto const & synonyms = token.m_synonyms;
return find(synonyms.begin(), synonyms.end(), text) != synonyms.end();
}
bool PrefixMatch(vector<UniString> const & prefixes, UniString const & token)
bool PrefixMatch(QueryParams::Token const & token, UniString const & text)
{
for (auto const & prefix : prefixes)
if (StartsWith(text, token.m_original))
return true;
for (auto const & synonym : token.m_synonyms)
{
if (StartsWith(token, prefix))
if (StartsWith(text, synonym))
return true;
}
return false;

View file

@ -20,9 +20,9 @@ class QueryParams;
namespace impl
{
bool Match(vector<strings::UniString> const & tokens, strings::UniString const & token);
bool FullMatch(QueryParams::Token const & token, strings::UniString const & text);
bool PrefixMatch(vector<strings::UniString> const & prefixes, strings::UniString const & token);
bool PrefixMatch(QueryParams::Token const & token, strings::UniString const & text);
} // namespace impl
// The order and numeric values are important here. Please, check all
@ -65,11 +65,11 @@ NameScore GetNameScore(vector<strings::UniString> const & tokens, TSlice const &
{
bool match = true;
for (size_t i = 0; i < m - 1 && match; ++i)
match = match && impl::Match(slice.Get(i), tokens[offset + i]);
match = match && impl::FullMatch(slice.Get(i), tokens[offset + i]);
if (!match)
continue;
if (impl::Match(slice.Get(m - 1), tokens[offset + m - 1]))
if (impl::FullMatch(slice.Get(m - 1), tokens[offset + m - 1]))
{
if (m == n)
return NAME_SCORE_FULL_MATCH;

View file

@ -184,10 +184,10 @@ bool MatchFeatureByPostcode(FeatureType const & ft, TokenSlice const & slice)
{
if (slice.IsPrefix(i))
{
if (!StartsWith(tokens[i], slice.Get(i).front()))
if (!StartsWith(tokens[i], slice.Get(i).m_original))
return false;
}
else if (tokens[i] != slice.Get(i).front())
else if (tokens[i] != slice.Get(i).m_original)
{
return false;
}

View file

@ -534,7 +534,7 @@ UNIT_CLASS_TEST(ProcessorTest, TestPostcodes)
QueryParams params;
{
strings::UniString const tokens[] = {strings::MakeUniString("141702")};
params.Init(tokens, tokens + ARRAY_SIZE(tokens));
params.InitNoPrefix(tokens, tokens + ARRAY_SIZE(tokens));
}
auto features = RetrievePostcodeFeatures(context, cancellable,
TokenSlice(params, 0, params.GetNumTokens()));

View file

@ -35,7 +35,7 @@ void InitParams(string const & query, bool lastTokenIsPrefix, QueryParams & para
}
else
{
params.Init(tokens.begin(), tokens.end());
params.InitNoPrefix(tokens.begin(), tokens.end());
}
}
@ -47,7 +47,7 @@ void AddLocality(string const & name, uint32_t featureId, QueryParams & params,
Delimiters delims;
SplitUniString(NormalizeAndSimplifyString(name), MakeInsertFunctor(tokens), delims);
size_t numTokens = params.GetNumTokens();
size_t const numTokens = params.GetNumTokens();
for (size_t startToken = 0; startToken != numTokens; ++startToken)
{
@ -56,7 +56,7 @@ void AddLocality(string const & name, uint32_t featureId, QueryParams & params,
bool matches = true;
for (size_t i = startToken; i != endToken && matches; ++i)
{
UniString const & queryToken = params.GetTokens(i).front();
UniString const & queryToken = params.GetToken(i).m_original;
if (params.IsPrefixToken(i))
{
matches = any_of(tokens.begin(), tokens.end(), [&queryToken](UniString const & token)

View file

@ -33,7 +33,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken,
}
else
{
params.Init(tokens.begin(), tokens.end());
params.InitNoPrefix(tokens.begin(), tokens.end());
}
return GetNameScore(name, TokenSlice(params, startToken, endToken));

View file

@ -154,7 +154,7 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
for (; curToken < ctx.m_numTokens && !ctx.m_usedTokens[curToken] && !streets.IsEmpty();
++curToken)
{
auto const & token = params.GetTokens(curToken).front();
auto const & token = params.GetToken(curToken).m_original;
bool const isPrefix = params.IsPrefixToken(curToken);
if (house_numbers::LooksLikeHouseNumber(token, isPrefix))

View file

@ -17,10 +17,10 @@ class TokenSlice
public:
TokenSlice(QueryParams const & params, size_t startToken, size_t endToken);
inline QueryParams::Synonyms const & Get(size_t i) const
inline QueryParams::Token const & Get(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_params.GetTokens(m_offset + i);
return m_params.GetToken(m_offset + i);
}
inline size_t Size() const { return m_size; }
@ -42,10 +42,10 @@ class TokenSliceNoCategories
public:
TokenSliceNoCategories(QueryParams const & params, size_t startToken, size_t endToken);
inline QueryParams::Synonyms const & Get(size_t i) const
inline QueryParams::Token const & Get(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_params.GetTokens(m_indexes[i]);
return m_params.GetToken(m_indexes[i]);
}
inline size_t Size() const { return m_indexes.size(); }
@ -64,7 +64,7 @@ public:
QuerySlice(TokenSlice const & slice) : m_slice(slice) {}
// QuerySlice overrides:
QueryParams::String const & Get(size_t i) const override { return m_slice.Get(i).front(); }
QueryParams::String const & Get(size_t i) const override { return m_slice.Get(i).m_original; }
size_t Size() const override { return m_slice.Size(); }
private:

View file

@ -17,7 +17,7 @@ void ForEachCategoryType(StringSliceBase const & slice, TLocales const & locales
{
for (size_t i = 0; i < slice.Size(); ++i)
{
auto token = slice.Get(i);
auto const & token = slice.Get(i);
for (size_t j = 0; j < locales.size(); ++j)
categories.ForEachTypeByName(locales[j], token, bind<void>(todo, i, _1));