[search] Pair (start-token, end-token) is replaced by TokenRange.

This commit is contained in:
Yuri Gorshenin 2017-02-03 15:25:42 +03:00 committed by Sergey Yershov
parent 22d252d110
commit ce7bc3a7f9
24 changed files with 217 additions and 163 deletions

View file

@ -120,6 +120,7 @@ set(
streets_matcher.hpp
string_intersection.hpp
suggest.hpp
token_range.hpp
token_slice.cpp
token_slice.hpp
types_skipper.cpp

View file

@ -12,8 +12,7 @@ void FeaturesLayer::Clear()
{
m_sortedFeatures = nullptr;
m_subQuery.clear();
m_startToken = 0;
m_endToken = 0;
m_tokenRange.Clear();
m_type = SearchModel::SEARCH_TYPE_COUNT;
m_hasDelayedFeatures = false;
m_lastTokenIsPrefix = false;
@ -24,8 +23,9 @@ string DebugPrint(FeaturesLayer const & layer)
ostringstream os;
os << "FeaturesLayer [ size of m_sortedFeatures: "
<< (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0)
<< ", m_subQuery: " << DebugPrint(layer.m_subQuery) << ", m_startToken: " << layer.m_startToken
<< ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type)
<< ", m_subQuery: " << DebugPrint(layer.m_subQuery)
<< ", m_tokenRange: " << DebugPrint(layer.m_tokenRange)
<< ", m_type: " << DebugPrint(layer.m_type)
<< ", m_lastTokenIsPrefix: " << layer.m_lastTokenIsPrefix << " ]";
return os.str();
}

View file

@ -1,6 +1,7 @@
#pragma once
#include "search/model.hpp"
#include "search/token_range.hpp"
#include "base/string_utils.hpp"
@ -22,8 +23,7 @@ struct FeaturesLayer
strings::UniString m_subQuery;
size_t m_startToken;
size_t m_endToken;
TokenRange m_tokenRange;
SearchModel::SearchType m_type;
// *NOTE* This field is meaningful only when m_type equals to

View file

@ -79,26 +79,27 @@ UniString const kUniSpace(MakeUniString(" "));
struct ScopedMarkTokens
{
ScopedMarkTokens(vector<bool> & usedTokens, size_t from, size_t to)
: m_usedTokens(usedTokens), m_from(from), m_to(to)
ScopedMarkTokens(vector<bool> & usedTokens, TokenRange const & range)
: m_usedTokens(usedTokens), m_range(range)
{
ASSERT_LESS_OR_EQUAL(m_from, m_to, ());
ASSERT_LESS_OR_EQUAL(m_to, m_usedTokens.size(), ());
ASSERT(m_range.IsValid(), ());
ASSERT_LESS_OR_EQUAL(m_range.m_end, m_usedTokens.size(), ());
#if defined(DEBUG)
for (size_t i = m_from; i != m_to; ++i)
for (size_t i : m_range)
ASSERT(!m_usedTokens[i], (i));
#endif
fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, true /* used */);
fill(m_usedTokens.begin() + m_range.m_begin, m_usedTokens.begin() + m_range.m_end,
true /* used */);
}
~ScopedMarkTokens()
{
fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, false /* used */);
fill(m_usedTokens.begin() + m_range.m_begin, m_usedTokens.begin() + m_range.m_end,
false /* used */);
}
vector<bool> & m_usedTokens;
size_t const m_from;
size_t const m_to;
TokenRange const m_range;
};
class LazyRankTable : public RankTable
@ -174,14 +175,14 @@ private:
LazyRankTable m_ranks;
};
void JoinQueryTokens(QueryParams const & params, size_t curToken, size_t endToken,
UniString const & sep, UniString & res)
void JoinQueryTokens(QueryParams const & params, TokenRange const & range, UniString const & sep,
UniString & res)
{
ASSERT_LESS_OR_EQUAL(curToken, endToken, ());
for (size_t i = curToken; i < endToken; ++i)
ASSERT(range.IsValid(), (range));
for (size_t i : range)
{
res.append(params.GetToken(i).m_original);
if (i + 1 != endToken)
if (i + 1 != range.m_end)
res.append(sep);
}
}
@ -623,18 +624,16 @@ void Geocoder::InitBaseContext(BaseContext & ctx)
ctx.m_hotelsFilter = m_hotelsFilter.MakeScopedFilter(*m_context, m_params.m_hotelsFilter);
}
void Geocoder::InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken,
void Geocoder::InitLayer(SearchModel::SearchType type, TokenRange const & tokenRange,
FeaturesLayer & layer)
{
layer.Clear();
layer.m_type = type;
layer.m_startToken = startToken;
layer.m_endToken = endToken;
layer.m_tokenRange = tokenRange;
JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */,
layer.m_subQuery);
JoinQueryTokens(m_params, layer.m_tokenRange, kUniSpace /* sep */, layer.m_subQuery);
layer.m_lastTokenIsPrefix =
layer.m_startToken < layer.m_endToken && m_params.IsPrefixToken(layer.m_endToken - 1);
!layer.m_tokenRange.Empty() && m_params.IsPrefixToken(layer.m_tokenRange.m_end - 1);
}
void Geocoder::FillLocalityCandidates(BaseContext const & ctx, CBV const & filter,
@ -685,7 +684,7 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx)
}
++count;
m_regions[type][make_pair(l.m_startToken, l.m_endToken)].push_back(region);
m_regions[type][l.m_tokenRange].push_back(region);
}
};
@ -709,7 +708,7 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx)
LOG(LINFO, ("City =", city.m_defaultName, "radius =", radius, "prob =", city.m_prob));
#endif
m_cities[{l.m_startToken, l.m_endToken}].push_back(city);
m_cities[city.m_tokenRange].push_back(city);
}
break;
}
@ -758,7 +757,7 @@ void Geocoder::FillVillageLocalities(BaseContext const & ctx)
LOG(LDEBUG, ("Village =", village.m_defaultName, "radius =", radius, "prob =", village.m_prob));
#endif
m_cities[{l.m_startToken, l.m_endToken}].push_back(village);
m_cities[village.m_tokenRange].push_back(village);
if (numVillages >= kMaxNumVillages)
break;
}
@ -812,9 +811,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
{
BailIfCancelled();
size_t const startToken = p.first.first;
size_t const endToken = p.first.second;
if (ctx.HasUsedTokensInRange(startToken, endToken))
auto const & tokenRange = p.first;
if (ctx.HasUsedTokensInRange(tokenRange))
continue;
for (auto const & region : p.second)
@ -838,11 +836,11 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
if (!matches)
continue;
ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken);
ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange);
if (ctx.AllTokensUsed())
{
// Region matches to search query, we need to emit it as is.
EmitResult(ctx, region, startToken, endToken);
EmitResult(ctx, region, tokenRange);
continue;
}
@ -866,9 +864,8 @@ void Geocoder::MatchCities(BaseContext & ctx)
// Localities are ordered my (m_startToken, m_endToken) pairs.
for (auto const & p : m_cities)
{
size_t const startToken = p.first.first;
size_t const endToken = p.first.second;
if (ctx.HasUsedTokensInRange(startToken, endToken))
auto const & tokenRange = p.first;
if (ctx.HasUsedTokensInRange(tokenRange))
continue;
for (auto const & city : p.second)
@ -881,11 +878,11 @@ void Geocoder::MatchCities(BaseContext & ctx)
continue;
}
ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken);
ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange);
if (ctx.AllTokensUsed())
{
// City matches to search query, we need to emit it as is.
EmitResult(ctx, city, startToken, endToken);
EmitResult(ctx, city, tokenRange);
continue;
}
@ -947,7 +944,7 @@ void Geocoder::WithPostcodes(BaseContext & ctx, TFn && fn)
if (ctx.m_usedTokens[startToken + n - 1])
break;
TokenSlice slice(m_params, startToken, startToken + n);
TokenSlice slice(m_params, TokenRange(startToken, startToken + n));
auto const isPrefix = startToken + n == ctx.m_numTokens;
if (LooksLikePostcode(QuerySlice(slice), isPrefix))
endToken = startToken + n;
@ -955,20 +952,17 @@ void Geocoder::WithPostcodes(BaseContext & ctx, TFn && fn)
if (startToken == endToken)
continue;
auto postcodes =
RetrievePostcodeFeatures(*m_context, TokenSlice(m_params, startToken, endToken));
MY_SCOPE_GUARD(cleanup, [&]()
{
m_postcodes.Clear();
});
TokenRange const tokenRange(startToken, endToken);
auto postcodes = RetrievePostcodeFeatures(*m_context, TokenSlice(m_params, tokenRange));
MY_SCOPE_GUARD(cleanup, [&]() { m_postcodes.Clear(); });
if (!postcodes.IsEmpty())
{
ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken);
ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange);
m_postcodes.Clear();
m_postcodes.m_startToken = startToken;
m_postcodes.m_endToken = endToken;
m_postcodes.m_tokenRange = tokenRange;
m_postcodes.m_features = move(postcodes);
fn();
@ -994,14 +988,14 @@ void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx,
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &m_layers));
auto & layer = m_layers.back();
InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_startToken, prediction.m_endToken, layer);
InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_tokenRange, layer);
vector<uint32_t> sortedFeatures;
sortedFeatures.reserve(prediction.m_features.PopCount());
prediction.m_features.ForEach(MakeBackInsertFunctor(sortedFeatures));
layer.m_sortedFeatures = &sortedFeatures;
ScopedMarkTokens mark(ctx.m_usedTokens, prediction.m_startToken, prediction.m_endToken);
ScopedMarkTokens mark(ctx.m_usedTokens, prediction.m_tokenRange);
MatchPOIsAndBuildings(ctx, 0 /* curToken */);
}
@ -1024,15 +1018,11 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
CBV filtered = m_postcodes.m_features;
if (m_filter->NeedToFilter(m_postcodes.m_features))
filtered = m_filter->Filter(m_postcodes.m_features);
filtered.ForEach([&](uint32_t id)
{
SearchModel::SearchType searchType;
if (GetSearchTypeInGeocoding(ctx, id, searchType))
{
EmitResult(ctx, m_context->GetId(), id, searchType,
m_postcodes.m_startToken, m_postcodes.m_endToken);
}
});
filtered.ForEach([&](uint32_t id) {
SearchModel::SearchType searchType;
if (GetSearchTypeInGeocoding(ctx, id, searchType))
EmitResult(ctx, m_context->GetId(), id, searchType, m_postcodes.m_tokenRange);
});
return;
}
@ -1052,7 +1042,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
if (!m_postcodes.m_features.HasBit(id))
continue;
EmitResult(ctx, m_context->GetId(), id, SearchModel::SEARCH_TYPE_STREET,
m_layers.back().m_startToken, m_layers.back().m_endToken);
m_layers.back().m_tokenRange);
}
}
@ -1062,8 +1052,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &m_layers));
auto & layer = m_layers.back();
InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_startToken, m_postcodes.m_endToken,
layer);
InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_tokenRange, layer);
vector<uint32_t> features;
m_postcodes.m_features.ForEach(MakeBackInsertFunctor(features));
@ -1111,7 +1100,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
{
auto & layer = m_layers.back();
InitLayer(layer.m_type, curToken, curToken + n, layer);
InitLayer(layer.m_type, TokenRange(curToken, curToken + n), layer);
}
features = features.Intersect(ctx.m_features[curToken + n - 1]);
@ -1222,11 +1211,8 @@ bool Geocoder::IsLayerSequenceSane() const
{
auto const & buildings = m_layers[buildingIndex];
auto const & streets = m_layers[streetIndex];
if (buildings.m_startToken != streets.m_endToken &&
buildings.m_endToken != streets.m_startToken)
{
if (!buildings.m_tokenRange.AdjacentTo(streets.m_tokenRange))
return false;
}
}
return true;
@ -1257,12 +1243,12 @@ void Geocoder::FindPaths(BaseContext const & ctx)
// TODO(@y, @m, @vng): use rest fields of IntersectionResult for
// better scoring.
EmitResult(ctx, m_context->GetId(), result.InnermostResult(), innermostLayer.m_type,
innermostLayer.m_startToken, innermostLayer.m_endToken);
innermostLayer.m_tokenRange);
});
}
void Geocoder::EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId, uint32_t ftId,
SearchModel::SearchType type, size_t startToken, size_t endToken)
SearchModel::SearchType type, TokenRange const & tokenRange)
{
FeatureID id(mwmId, ftId);
@ -1276,14 +1262,13 @@ void Geocoder::EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId,
// pivot.
PreRankingInfo info;
info.m_searchType = type;
info.m_startToken = startToken;
info.m_endToken = endToken;
info.m_tokenRange = tokenRange;
m_preRanker.Emplace(id, info);
}
void Geocoder::EmitResult(BaseContext const & ctx, Region const & region, size_t startToken,
size_t endToken)
void Geocoder::EmitResult(BaseContext const & ctx, Region const & region,
TokenRange const & tokenRange)
{
SearchModel::SearchType type;
switch (region.m_type)
@ -1292,13 +1277,12 @@ void Geocoder::EmitResult(BaseContext const & ctx, Region const & region, size_t
case REGION_TYPE_COUNTRY: type = SearchModel::SEARCH_TYPE_COUNTRY; break;
case REGION_TYPE_COUNT: type = SearchModel::SEARCH_TYPE_COUNT; break;
}
EmitResult(ctx, region.m_countryId, region.m_featureId, type, startToken, endToken);
EmitResult(ctx, region.m_countryId, region.m_featureId, type, tokenRange);
}
void Geocoder::EmitResult(BaseContext const & ctx, City const & city, size_t startToken,
size_t endToken)
void Geocoder::EmitResult(BaseContext const & ctx, City const & city, TokenRange const & tokenRange)
{
EmitResult(ctx, city.m_countryId, city.m_featureId, city.m_type, startToken, endToken);
EmitResult(ctx, city.m_countryId, city.m_featureId, city.m_type, tokenRange);
}
void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken)
@ -1334,7 +1318,7 @@ void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken)
if (!GetSearchTypeInGeocoding(ctx, featureId, searchType))
return;
if (searchType == SearchModel::SEARCH_TYPE_UNCLASSIFIED)
EmitResult(ctx, m_context->GetId(), featureId, searchType, startToken, curToken);
EmitResult(ctx, m_context->GetId(), featureId, searchType, TokenRange(startToken, curToken));
};
allFeatures.ForEach(emitUnclassified);
}
@ -1385,7 +1369,7 @@ string DebugPrint(Geocoder::Locality const & locality)
os << "Locality [ ";
os << "m_countryId=" << DebugPrint(locality.m_countryId) << ", ";
os << "m_featureId=" << locality.m_featureId << ", ";
os << "token range=[" << locality.m_startToken << ", " << locality.m_endToken << "), ";
os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", ";
os << "m_prob=" << locality.m_prob;
os << " ]";
return os.str();

View file

@ -17,6 +17,7 @@
#include "search/query_params.hpp"
#include "search/ranking_utils.hpp"
#include "search/streets_matcher.hpp"
#include "search/token_range.hpp"
#include "indexer/index.hpp"
#include "indexer/mwm_set.hpp"
@ -96,20 +97,18 @@ public:
{
Locality() = default;
Locality(MwmSet::MwmId const & countryId, uint32_t featureId, size_t startToken,
size_t endToken, double prob)
Locality(MwmSet::MwmId const & countryId, uint32_t featureId, TokenRange const & tokenRange,
double prob)
: m_countryId(countryId)
, m_featureId(featureId)
, m_startToken(startToken)
, m_endToken(endToken)
, m_tokenRange(tokenRange)
, m_prob(prob)
{
}
MwmSet::MwmId m_countryId;
uint32_t m_featureId = 0;
size_t m_startToken = 0;
size_t m_endToken = 0;
TokenRange m_tokenRange;
// Measures our belief in the fact that tokens in the range
// [m_startToken, m_endToken) indeed specify a locality. Currently
@ -173,20 +172,18 @@ private:
{
void Clear()
{
m_startToken = 0;
m_endToken = 0;
m_tokenRange.Clear();
m_features.Reset();
}
size_t m_startToken = 0;
size_t m_endToken = 0;
TokenRange m_tokenRange;
CBV m_features;
};
void GoImpl(vector<shared_ptr<MwmInfo>> & infos, bool inViewport);
template <typename TLocality>
using TLocalitiesCache = map<pair<size_t, size_t>, vector<TLocality>>;
template <typename Locality>
using LocalitiesCache = map<TokenRange, vector<Locality>>;
QueryParams::Token const & GetTokens(size_t i) const;
@ -194,7 +191,7 @@ private:
// for each token and saves it to m_addressFeatures.
void InitBaseContext(BaseContext & ctx);
void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken,
void InitLayer(SearchModel::SearchType type, TokenRange const & tokenRange,
FeaturesLayer & layer);
void FillLocalityCandidates(BaseContext const & ctx,
@ -256,10 +253,9 @@ private:
// Forms result and feeds it to |m_preRanker|.
void EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId, uint32_t ftId,
SearchModel::SearchType type, size_t startToken, size_t endToken);
void EmitResult(BaseContext const & ctx, Region const & region, size_t startToken,
size_t endToken);
void EmitResult(BaseContext const & ctx, City const & city, size_t startToken, size_t endToken);
SearchModel::SearchType type, TokenRange const & tokenRange);
void EmitResult(BaseContext const & ctx, Region const & region, TokenRange const & tokenRange);
void EmitResult(BaseContext const & ctx, City const & city, TokenRange const & tokenRange);
// Tries to match unclassified objects from lower layers, like
// parks, forests, lakes, rivers, etc. This method finds all
@ -305,8 +301,8 @@ private:
// m_cities stores both big cities that are visible at World.mwm
// and small villages and hamlets that are not.
TLocalitiesCache<City> m_cities;
TLocalitiesCache<Region> m_regions[REGION_TYPE_COUNT];
LocalitiesCache<City> m_cities;
LocalitiesCache<Region> m_regions[REGION_TYPE_COUNT];
// Caches of features in rects. These caches are separated from
// TLocalitiesCache because the latter are quite lightweight and not

View file

@ -1,5 +1,8 @@
#include "search/geocoder_context.hpp"
#include "search/token_range.hpp"
#include "base/assert.hpp"
#include "base/stl_add.hpp"
#include "std/algorithm.hpp"
@ -18,9 +21,11 @@ bool BaseContext::AllTokensUsed() const
return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
}
bool BaseContext::HasUsedTokensInRange(size_t from, size_t to) const
bool BaseContext::HasUsedTokensInRange(TokenRange const & range) const
{
return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor());
ASSERT(range.IsValid(), (range));
return any_of(m_usedTokens.begin() + range.m_begin, m_usedTokens.begin() + range.m_end,
IdFunctor());
}
size_t BaseContext::NumUnusedTokenGroups() const

View file

@ -9,6 +9,7 @@
namespace search
{
class FeaturesFilter;
struct TokenRange;
struct BaseContext
{
@ -19,9 +20,8 @@ struct BaseContext
// Returns true iff all tokens are used.
bool AllTokensUsed() const;
// Returns true if there exists at least one used token in [from,
// to).
bool HasUsedTokensInRange(size_t from, size_t to) const;
// Returns true if there exists at least one used token in |range|.
bool HasUsedTokensInRange(TokenRange const & range) const;
// Counts number of groups of consecutive unused tokens.
size_t NumUnusedTokenGroups() const;

View file

@ -33,5 +33,4 @@ struct IntersectionResult
};
string DebugPrint(IntersectionResult const & result);
} // namespace search

View file

@ -28,7 +28,7 @@ LocalityScorer::ExLocality::ExLocality() : m_numTokens(0), m_rank(0), m_nameScor
LocalityScorer::ExLocality::ExLocality(Geocoder::Locality const & locality)
: m_locality(locality)
, m_numTokens(locality.m_endToken - locality.m_startToken)
, m_numTokens(locality.m_tokenRange.Size())
, m_rank(0)
, m_nameScore(NAME_SCORE_ZERO)
{
@ -58,13 +58,14 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte
for (size_t endToken = startToken + 1; endToken <= ctx.m_numTokens; ++endToken)
{
TokenRange const tokenRange(startToken, endToken);
// Skip locality candidates that match only numbers.
if (!m_params.IsNumberTokens(startToken, endToken))
if (!m_params.IsNumberTokens(tokenRange))
{
intersection.ForEach([&](uint32_t featureId) {
double const prob = static_cast<double>(intersection.PopCount()) /
static_cast<double>(unfilteredIntersection.PopCount());
localities.emplace_back(countryId, featureId, startToken, endToken, prob);
localities.emplace_back(countryId, featureId, tokenRange, prob);
});
}
@ -144,10 +145,7 @@ void LocalityScorer::SortByNameAndProb(std::vector<ExLocality> & ls) const
auto score = NAME_SCORE_ZERO;
for (auto const & name : names)
{
score = max(score, GetNameScore(name, TokenSlice(m_params, l.m_locality.m_startToken,
l.m_locality.m_endToken)));
}
score = max(score, GetNameScore(name, TokenSlice(m_params, l.m_locality.m_tokenRange)));
l.m_nameScore = score;
std::sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) {

View file

@ -42,7 +42,7 @@ struct ComparePreResult1
auto const & rinfo = rhs.GetInfo();
if (linfo.GetNumTokens() != rinfo.GetNumTokens())
return linfo.GetNumTokens() > rinfo.GetNumTokens();
return linfo.m_startToken < rinfo.m_startToken;
return linfo.m_tokenRange.m_begin < rinfo.m_tokenRange.m_begin;
}
};

View file

@ -9,8 +9,7 @@ string DebugPrint(PreRankingInfo const & info)
ostringstream os;
os << "PreRankingInfo [";
os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
os << "m_startToken:" << info.m_startToken << ",";
os << "m_endToken:" << info.m_endToken << ",";
os << "m_tokenRange:" << DebugPrint(info.m_tokenRange) << ",";
os << "m_rank:" << info.m_rank << ",";
os << "m_searchType:" << info.m_searchType;
os << "]";

View file

@ -1,6 +1,7 @@
#pragma once
#include "search/model.hpp"
#include "search/token_range.hpp"
#include "geometry/point2d.hpp"
@ -10,7 +11,7 @@ namespace search
{
struct PreRankingInfo
{
inline size_t GetNumTokens() const { return m_endToken - m_startToken; }
inline size_t GetNumTokens() const { return m_tokenRange.Size(); }
// An abstract distance from the feature to the pivot. Measurement
// units do not matter here.
@ -19,10 +20,8 @@ struct PreRankingInfo
m2::PointD m_center = m2::PointD::Zero();
bool m_centerLoaded = false;
// Tokens [m_startToken, m_endToken) match to the feature name or
// house number.
size_t m_startToken = 0;
size_t m_endToken = 0;
// Tokens match to the feature name or house number.
TokenRange m_tokenRange;
// Rank of the feature.
uint8_t m_rank = 0;

View file

@ -1,5 +1,7 @@
#include "search/query_params.hpp"
#include "search/token_range.hpp"
#include "indexer/feature_impl.hpp"
#include "std/algorithm.hpp"
@ -88,15 +90,15 @@ QueryParams::Token & QueryParams::GetToken(size_t i)
return i < m_tokens.size() ? m_tokens[i] : m_prefixToken;
}
bool QueryParams::IsNumberTokens(size_t start, size_t end) const
bool QueryParams::IsNumberTokens(TokenRange const & range) const
{
ASSERT_LESS(start, end, ());
ASSERT_LESS_OR_EQUAL(end, GetNumTokens(), ());
ASSERT(range.IsValid(), (range));
ASSERT_LESS_OR_EQUAL(range.m_end, GetNumTokens(), ());
for (; start != end; ++start)
for (size_t i : range)
{
bool number = false;
GetToken(start).ForEach([&number](String const & s) {
GetToken(i).ForEach([&number](String const & s) {
if (feature::IsNumber(s))
{
number = true;

View file

@ -13,6 +13,8 @@
namespace search
{
struct TokenRange;
class QueryParams
{
public:
@ -101,9 +103,8 @@ public:
Token const & GetToken(size_t i) const;
Token & GetToken(size_t i);
// Returns true if all tokens in [start, end) range have integral
// synonyms.
bool IsNumberTokens(size_t start, size_t end) const;
// Returns true if all tokens in |range| have integral synonyms.
bool IsNumberTokens(TokenRange const & range) const;
void RemoveToken(size_t i);

View file

@ -179,8 +179,8 @@ class PreResult2Maker
info.m_searchType = preInfo.m_searchType;
info.m_nameScore = NAME_SCORE_ZERO;
TokenSlice slice(m_params, preInfo.m_startToken, preInfo.m_endToken);
TokenSliceNoCategories sliceNoCategories(m_params, preInfo.m_startToken, preInfo.m_endToken);
TokenSlice slice(m_params, preInfo.m_tokenRange);
TokenSliceNoCategories sliceNoCategories(m_params, preInfo.m_tokenRange);
for (auto const & lang : m_params.GetLangs())
{

View file

@ -75,6 +75,7 @@ HEADERS += \
streets_matcher.hpp \
string_intersection.hpp \
suggest.hpp \
token_range.hpp \
token_slice.hpp \
types_skipper.hpp \
utils.hpp \

View file

@ -133,8 +133,7 @@ UNIT_CLASS_TEST(PreRankerTest, Smoke)
FeatureID id(mwmId, index);
PreRankingInfo info;
info.m_startToken = 0;
info.m_endToken = 1;
info.m_tokenRange = TokenRange(0, 1);
info.m_searchType = SearchModel::SEARCH_TYPE_POI;
preRanker.Emplace(id, info);

View file

@ -4,6 +4,7 @@
#include "search/search_integration_tests/helpers.hpp"
#include "search/search_tests_support/test_results_matching.hpp"
#include "search/search_tests_support/test_search_request.hpp"
#include "search/token_range.hpp"
#include "search/token_slice.hpp"
#include "generator/feature_builder.hpp"
@ -537,8 +538,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestPostcodes)
strings::UniString const tokens[] = {strings::MakeUniString("141702")};
params.InitNoPrefix(tokens, tokens + ARRAY_SIZE(tokens));
}
auto features = RetrievePostcodeFeatures(context, cancellable,
TokenSlice(params, 0, params.GetNumTokens()));
auto features = RetrievePostcodeFeatures(
context, cancellable, TokenSlice(params, TokenRange(0, params.GetNumTokens())));
TEST_EQUAL(1, features->PopCount(), ());
uint64_t index = 0;

View file

@ -2,6 +2,7 @@
#include "search/query_params.hpp"
#include "search/ranking_utils.hpp"
#include "search/token_range.hpp"
#include "search/token_slice.hpp"
#include "indexer/search_delimiters.hpp"
@ -18,7 +19,7 @@ using namespace strings;
namespace
{
NameScore GetScore(string const & name, string const & query, size_t startToken, size_t endToken)
NameScore GetScore(string const & name, string const & query, TokenRange const & tokenRange)
{
search::Delimiters delims;
QueryParams params;
@ -36,17 +37,20 @@ NameScore GetScore(string const & name, string const & query, size_t startToken,
params.InitNoPrefix(tokens.begin(), tokens.end());
}
return GetNameScore(name, TokenSlice(params, startToken, endToken));
return GetNameScore(name, TokenSlice(params, tokenRange));
}
UNIT_TEST(NameTest_Smoke)
{
TEST_EQUAL(GetScore("New York", "Central Park, New York, US", 2, 4), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("New York", "York", 0, 1), NAME_SCORE_SUBSTRING, ());
TEST_EQUAL(GetScore("Moscow", "Red Square Mosc", 2, 3), NAME_SCORE_FULL_MATCH_PREFIX, ());
TEST_EQUAL(GetScore("Moscow", "Red Square Moscow", 2, 3), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("San Francisco", "Fran", 0, 1), NAME_SCORE_SUBSTRING_PREFIX, ());
TEST_EQUAL(GetScore("San Francisco", "Fran ", 0, 1), NAME_SCORE_ZERO, ());
TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтов", 0, 1), NAME_SCORE_FULL_MATCH_PREFIX, ());
TEST_EQUAL(GetScore("New York", "Central Park, New York, US", TokenRange(2, 4)),
NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("New York", "York", TokenRange(0, 1)), NAME_SCORE_SUBSTRING, ());
TEST_EQUAL(GetScore("Moscow", "Red Square Mosc", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH_PREFIX,
());
TEST_EQUAL(GetScore("Moscow", "Red Square Moscow", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("San Francisco", "Fran", TokenRange(0, 1)), NAME_SCORE_SUBSTRING_PREFIX, ());
TEST_EQUAL(GetScore("San Francisco", "Fran ", TokenRange(0, 1)), NAME_SCORE_ZERO, ());
TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтов", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH_PREFIX,
());
}
} // namespace

View file

@ -23,7 +23,7 @@ bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Predicti
if (lhs.GetNumTokens() != rhs.GetNumTokens())
return lhs.GetNumTokens() > rhs.GetNumTokens();
return lhs.m_startToken < rhs.m_startToken;
return lhs.m_tokenRange.m_begin < rhs.m_tokenRange.m_begin;
}
} // namespace
@ -109,8 +109,7 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
predictions.emplace_back();
auto & prediction = predictions.back();
prediction.m_startToken = startToken;
prediction.m_endToken = curToken;
prediction.m_tokenRange = TokenRange(startToken, curToken);
ASSERT_NOT_EQUAL(fs.PopCount(), 0, ());
ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ());

View file

@ -2,6 +2,7 @@
#include "search/cbv.hpp"
#include "search/geocoder_context.hpp"
#include "search/token_range.hpp"
#include "std/vector.hpp"
@ -15,15 +16,11 @@ class StreetsMatcher
public:
struct Prediction
{
inline size_t GetNumTokens() const { return m_endToken - m_startToken; }
inline size_t GetNumTokens() const { return m_tokenRange.Size(); }
CBV m_features;
size_t m_startToken = 0;
size_t m_endToken = 0;
TokenRange m_tokenRange;
double m_prob = 0.0;
uint64_t m_hash = 0;
};

71
search/token_range.hpp Normal file
View file

@ -0,0 +1,71 @@
#pragma once
#include "base/assert.hpp"
#include "base/range_iterator.hpp"
#include <cstdint>
#include <sstream>
#include <string>
namespace search
{
struct TokenRange final
{
TokenRange() = default;
TokenRange(size_t begin, size_t end) : m_begin(begin), m_end(end)
{
ASSERT(IsValid(), (*this));
}
inline bool AdjacentTo(TokenRange const & rhs) const
{
ASSERT(IsValid(), (*this));
ASSERT(rhs.IsValid(), (rhs));
return m_begin == rhs.m_end || m_end == rhs.m_begin;
}
inline size_t Size() const
{
ASSERT(IsValid(), (*this));
return m_end - m_begin;
}
inline bool Empty() const { return Size() == 0; }
inline void Clear()
{
m_begin = 0;
m_end = 0;
}
inline bool IsValid() const { return m_begin <= m_end; }
inline bool operator<(TokenRange const & rhs) const
{
if (m_begin != rhs.m_begin)
return m_begin < rhs.m_begin;
return m_end < rhs.m_end;
}
inline bool operator==(TokenRange const & rhs) const
{
return m_begin == rhs.m_begin && m_end == rhs.m_end;
}
inline my::RangeIterator<size_t> begin() const { return my::RangeIterator<size_t>(m_begin); }
inline my::RangeIterator<size_t> end() const { return my::RangeIterator<size_t>(m_end); }
inline my::RangeIterator<size_t> cbegin() const { return my::RangeIterator<size_t>(m_begin); }
inline my::RangeIterator<size_t> cend() const { return my::RangeIterator<size_t>(m_end); }
size_t m_begin = 0;
size_t m_end = 0;
};
inline std::string DebugPrint(TokenRange const & tokenRange)
{
std::ostringstream os;
os << "TokenRange [" << tokenRange.m_begin << ", " << tokenRange.m_end << ")";
return os.str();
}
} // namespace search

View file

@ -23,10 +23,10 @@ string SliceToString(string const & name, TSlice const & slice)
} // namespace
// TokenSlice --------------------------------------------------------------------------------------
TokenSlice::TokenSlice(QueryParams const & params, size_t startToken, size_t endToken)
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
TokenSlice::TokenSlice(QueryParams const & params, TokenRange const & range)
: m_params(params), m_offset(range.m_begin), m_size(range.Size())
{
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
ASSERT(range.IsValid(), (range));
}
bool TokenSlice::IsPrefix(size_t i) const
@ -36,14 +36,11 @@ bool TokenSlice::IsPrefix(size_t i) const
}
// TokenSliceNoCategories --------------------------------------------------------------------------
TokenSliceNoCategories::TokenSliceNoCategories(QueryParams const & params, size_t startToken,
size_t endToken)
TokenSliceNoCategories::TokenSliceNoCategories(QueryParams const & params, TokenRange const & range)
: m_params(params)
{
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
m_indexes.reserve(endToken - startToken);
for (size_t i = startToken; i < endToken; ++i)
m_indexes.reserve(range.Size());
for (size_t i : range)
{
if (!m_params.IsCategorySynonym(i))
m_indexes.push_back(i);

View file

@ -1,6 +1,7 @@
#pragma once
#include "search/query_params.hpp"
#include "search/token_range.hpp"
#include "indexer/string_slice.hpp"
@ -15,7 +16,7 @@ namespace search
class TokenSlice
{
public:
TokenSlice(QueryParams const & params, size_t startToken, size_t endToken);
TokenSlice(QueryParams const & params, TokenRange const & range);
inline QueryParams::Token const & Get(size_t i) const
{
@ -40,7 +41,7 @@ private:
class TokenSliceNoCategories
{
public:
TokenSliceNoCategories(QueryParams const & params, size_t startToken, size_t endToken);
TokenSliceNoCategories(QueryParams const & params, TokenRange const & range);
inline QueryParams::Token const & Get(size_t i) const
{