forked from organicmaps/organicmaps
[search] Minor refactoring.
This commit is contained in:
parent
c37d7fc084
commit
954050088d
1 changed files with 112 additions and 111 deletions
|
@ -67,6 +67,112 @@ bool EqualsByHashAndMisprints(StreetsMatcher::Prediction const & lhs,
|
|||
{
|
||||
return lhs.m_withMisprints == rhs.m_withMisprints && lhs.m_hash == rhs.m_hash;
|
||||
}
|
||||
|
||||
void FindStreets(BaseContext const & ctx, FeaturesFilter const & filter, QueryParams const & params,
|
||||
size_t startToken, bool withMisprints,
|
||||
vector<StreetsMatcher::Prediction> & predictions)
|
||||
{
|
||||
// Here we try to match as many tokens as possible while
|
||||
// intersection is a non-empty bit vector of streets. Single
|
||||
// tokens that are synonyms to streets are ignored. Moreover,
|
||||
// each time a token that looks like a beginning of a house number
|
||||
// is met, we try to use current intersection of tokens as a
|
||||
// street layer and try to match BUILDINGs or POIs.
|
||||
CBV streets(ctx.m_streets);
|
||||
|
||||
CBV all;
|
||||
all.SetFull();
|
||||
|
||||
size_t curToken = startToken;
|
||||
|
||||
// This variable is used for prevention of duplicate calls to
|
||||
// CreateStreetsLayerAndMatchLowerLayers() with the same
|
||||
// arguments.
|
||||
size_t lastToken = startToken;
|
||||
|
||||
// When true, no bit vectors were intersected with |streets| at all.
|
||||
bool emptyIntersection = true;
|
||||
|
||||
// When true, |streets| is in the incomplete state and can't be
|
||||
// used for creation of street layers.
|
||||
bool incomplete = false;
|
||||
|
||||
auto emit = [&]() {
|
||||
if (streets.IsEmpty() || emptyIntersection || incomplete || lastToken == curToken)
|
||||
return;
|
||||
|
||||
CBV fs(streets);
|
||||
CBV fa(all);
|
||||
|
||||
ASSERT(!fs.IsFull(), ());
|
||||
ASSERT(!fa.IsFull(), ());
|
||||
|
||||
if (filter.NeedToFilter(fs))
|
||||
fs = filter.Filter(fs);
|
||||
|
||||
if (fs.IsEmpty())
|
||||
return;
|
||||
|
||||
if (filter.NeedToFilter(fa))
|
||||
fa = filter.Filter(fa).Union(fs);
|
||||
|
||||
predictions.emplace_back();
|
||||
auto & prediction = predictions.back();
|
||||
|
||||
prediction.m_tokenRange = TokenRange(startToken, curToken);
|
||||
|
||||
ASSERT_NOT_EQUAL(fs.PopCount(), 0, ());
|
||||
ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ());
|
||||
prediction.m_prob = static_cast<double>(fs.PopCount()) / static_cast<double>(fa.PopCount());
|
||||
|
||||
prediction.m_features = move(fs);
|
||||
prediction.m_hash = prediction.m_features.Hash();
|
||||
prediction.m_withMisprints = withMisprints;
|
||||
};
|
||||
|
||||
StreetTokensFilter streetsFilter(
|
||||
[&](strings::UniString const & /* token */, size_t tag) {
|
||||
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
|
||||
if (tag < curToken)
|
||||
{
|
||||
// This is the case for delayed
|
||||
// street synonym. Therefore,
|
||||
// |streets| is temporarily in the
|
||||
// incomplete state.
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
|
||||
incomplete = true;
|
||||
return;
|
||||
}
|
||||
ASSERT_EQUAL(tag, curToken, ());
|
||||
|
||||
// |streets| will become empty after
|
||||
// the intersection. Therefore we need
|
||||
// to create streets layer right now.
|
||||
if (buffer.IsEmpty())
|
||||
emit();
|
||||
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
incomplete = false;
|
||||
},
|
||||
withMisprints);
|
||||
|
||||
for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty(); ++curToken)
|
||||
{
|
||||
auto const & token = params.GetToken(curToken).GetOriginal();
|
||||
bool const isPrefix = params.IsPrefixToken(curToken);
|
||||
|
||||
if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
|
||||
emit();
|
||||
|
||||
streetsFilter.Put(token, isPrefix, curToken);
|
||||
}
|
||||
emit();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
|
@ -87,9 +193,10 @@ void StreetsMatcher::Go(BaseContext const & ctx, FeaturesFilter const & filter,
|
|||
|
||||
// Leave the most probable and longest prediction for predictions with the same m_hash (features)
|
||||
// and m_withMisprints.
|
||||
// We will still distinguish parses with the same m_hash (features) but different range and m_withMisprints.
|
||||
// For example, for "Paramount dive" we will have two parses:
|
||||
// STREET UNUSED (can be matched to poi later)
|
||||
// We will still distinguish parses with the same m_hash (features) but different range and
|
||||
// m_withMisprints. For example, for "Paramount dive" we will have two parses:
|
||||
//
|
||||
// STREET UNUSED (can be matched to poi later)
|
||||
// Paramount dive
|
||||
//
|
||||
// STREET STREET ("drive" with misprints)
|
||||
|
@ -115,114 +222,8 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
|
|||
if (ctx.IsTokenUsed(startToken))
|
||||
continue;
|
||||
|
||||
auto emit = [&](CBV const & streets, CBV const & all, size_t curToken, size_t lastToken,
|
||||
bool emptyIntersection, bool incomplete, bool withMisprints) {
|
||||
if (!streets.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken)
|
||||
{
|
||||
CBV fs(streets);
|
||||
CBV fa(all);
|
||||
|
||||
ASSERT(!fs.IsFull(), ());
|
||||
ASSERT(!fa.IsFull(), ());
|
||||
|
||||
if (filter.NeedToFilter(fs))
|
||||
fs = filter.Filter(fs);
|
||||
|
||||
if (fs.IsEmpty())
|
||||
return;
|
||||
|
||||
if (filter.NeedToFilter(fa))
|
||||
fa = filter.Filter(fa).Union(fs);
|
||||
|
||||
predictions.emplace_back();
|
||||
auto & prediction = predictions.back();
|
||||
|
||||
prediction.m_tokenRange = TokenRange(startToken, curToken);
|
||||
|
||||
ASSERT_NOT_EQUAL(fs.PopCount(), 0, ());
|
||||
ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ());
|
||||
prediction.m_prob = static_cast<double>(fs.PopCount()) / static_cast<double>(fa.PopCount());
|
||||
|
||||
prediction.m_features = move(fs);
|
||||
prediction.m_hash = prediction.m_features.Hash();
|
||||
prediction.m_withMisprints = withMisprints;
|
||||
}
|
||||
};
|
||||
|
||||
auto findStreets = [&](bool withMisprints) {
|
||||
// Here we try to match as many tokens as possible while
|
||||
// intersection is a non-empty bit vector of streets. Single
|
||||
// tokens that are synonyms to streets are ignored. Moreover,
|
||||
// each time a token that looks like a beginning of a house number
|
||||
// is met, we try to use current intersection of tokens as a
|
||||
// street layer and try to match BUILDINGs or POIs.
|
||||
CBV streets(ctx.m_streets);
|
||||
|
||||
CBV all;
|
||||
all.SetFull();
|
||||
|
||||
size_t curToken = startToken;
|
||||
|
||||
// This variable is used for prevention of duplicate calls to
|
||||
// CreateStreetsLayerAndMatchLowerLayers() with the same
|
||||
// arguments.
|
||||
size_t lastToken = startToken;
|
||||
|
||||
// When true, no bit vectors were intersected with |streets| at all.
|
||||
bool emptyIntersection = true;
|
||||
|
||||
// When true, |streets| is in the incomplete state and can't be
|
||||
// used for creation of street layers.
|
||||
bool incomplete = false;
|
||||
|
||||
StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag)
|
||||
{
|
||||
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
|
||||
if (tag < curToken)
|
||||
{
|
||||
// This is the case for delayed
|
||||
// street synonym. Therefore,
|
||||
// |streets| is temporarily in the
|
||||
// incomplete state.
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
|
||||
incomplete = true;
|
||||
return;
|
||||
}
|
||||
ASSERT_EQUAL(tag, curToken, ());
|
||||
|
||||
// |streets| will become empty after
|
||||
// the intersection. Therefore we need
|
||||
// to create streets layer right now.
|
||||
if (buffer.IsEmpty())
|
||||
emit(streets, all, curToken, lastToken, emptyIntersection,
|
||||
incomplete, withMisprints);
|
||||
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
incomplete = false;
|
||||
},
|
||||
withMisprints);
|
||||
|
||||
for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty();
|
||||
++curToken)
|
||||
{
|
||||
auto const & token = params.GetToken(curToken).GetOriginal();
|
||||
bool const isPrefix = params.IsPrefixToken(curToken);
|
||||
|
||||
if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
|
||||
emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints);
|
||||
|
||||
filter.Put(token, isPrefix, curToken);
|
||||
}
|
||||
emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints);
|
||||
};
|
||||
|
||||
findStreets(false /* withMisprints */);
|
||||
findStreets(true /* withMisprints */);
|
||||
::search::FindStreets(ctx, filter, params, startToken, false /* withMisprints */, predictions);
|
||||
::search::FindStreets(ctx, filter, params, startToken, true /* withMisprints */, predictions);
|
||||
}
|
||||
}
|
||||
} // namespace search
|
||||
|
|
Loading…
Add table
Reference in a new issue