forked from organicmaps/organicmaps
[search] Fix street synonyms with misprints matching.
This commit is contained in:
parent
b71fb6a256
commit
12583814f0
3 changed files with 196 additions and 40 deletions
|
@ -560,9 +560,9 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
|
|||
checkErrors("трактир лермонтов", ErrorsMade(2));
|
||||
checkErrors("кафе", ErrorsMade());
|
||||
|
||||
checkErrors("Yesenina cafe", ErrorsMade(0));
|
||||
checkErrors("Esenina cafe", ErrorsMade(1));
|
||||
checkErrors("Jesenina cafe", ErrorsMade(1));
|
||||
checkErrors("Cafe Yesenina", ErrorsMade(0));
|
||||
checkErrors("Cafe Esenina", ErrorsMade(1));
|
||||
checkErrors("Cafe Jesenina", ErrorsMade(1));
|
||||
|
||||
checkErrors("Островского кафе", ErrorsMade(0));
|
||||
checkErrors("Астровского кафе", ErrorsMade(1));
|
||||
|
@ -2052,5 +2052,106 @@ UNIT_CLASS_TEST(ProcessorTest, StreetSynonymsWithMisprints)
|
|||
}
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(ProcessorTest, HouseOnStreetSynonymsWithMisprints)
|
||||
{
|
||||
string const countryName = "Wonderland";
|
||||
|
||||
TestStreet tverskoi(vector<m2::PointD>{m2::PointD(1.0, -1.0), m2::PointD(1.0, 1.0)},
|
||||
"Tverskoi Boulevard", "en");
|
||||
TestStreet leninsky(vector<m2::PointD>{m2::PointD(0.0, -1.0), m2::PointD(0.0, 1.0)},
|
||||
"Leninsky Avenue", "en");
|
||||
TestStreet mira(vector<m2::PointD>{m2::PointD(-1.0, -1.0), m2::PointD(-1.0, 1.0)},
|
||||
"Проспект Мира", "ru");
|
||||
|
||||
TestPOI houseTverskoi(m2::PointD(1.0, 0.0), "", "en");
|
||||
houseTverskoi.SetHouseNumber("3");
|
||||
houseTverskoi.SetStreetName(tverskoi.GetName("en"));
|
||||
|
||||
TestPOI houseLeninsky(m2::PointD(0.0, 0.0), "", "en");
|
||||
houseLeninsky.SetHouseNumber("5");
|
||||
houseLeninsky.SetStreetName(leninsky.GetName("en"));
|
||||
|
||||
TestPOI houseMira(m2::PointD(-1.0, 0.0), "", "en");
|
||||
houseMira.SetHouseNumber("7");
|
||||
houseMira.SetStreetName(mira.GetName("ru"));
|
||||
|
||||
auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder) {
|
||||
builder.Add(tverskoi);
|
||||
builder.Add(leninsky);
|
||||
builder.Add(mira);
|
||||
builder.Add(houseTverskoi);
|
||||
builder.Add(houseLeninsky);
|
||||
builder.Add(houseMira);
|
||||
});
|
||||
|
||||
auto alternativeMatch = [this](string const & query, Rules const & rules1, Rules const & rules2) {
|
||||
TestSearchRequest request(m_engine, query, "en", Mode::Everywhere, m_viewport);
|
||||
request.Run();
|
||||
return MatchResults(m_dataSource, rules1, request.Results()) ||
|
||||
MatchResults(m_dataSource, rules2, request.Results());
|
||||
};
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, houseTverskoi)};
|
||||
Rules rulesWithStreet = {ExactMatch(countryId, houseTverskoi), ExactMatch(countryId, tverskoi)};
|
||||
TEST(alternativeMatch("tverskoi 3", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("tverskoi boulevard 3", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("tverskoi bulevard 3", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("tverskoi blvd 3", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("tverskoi blvrd 3", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("tverskoi boulevrd 3", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("tverskoi bolevard 3", rules, rulesWithStreet), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, houseLeninsky)};
|
||||
Rules rulesWithStreet = {ExactMatch(countryId, houseLeninsky), ExactMatch(countryId, leninsky)};
|
||||
TEST(alternativeMatch("leninsky 5", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("leninsky avenue 5", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("leninsky avenu 5", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("leninsky avneue 5", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("leninsky av 5", rules, rulesWithStreet), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, houseMira)};
|
||||
Rules rulesWithStreet = {ExactMatch(countryId, houseMira), ExactMatch(countryId, mira)};
|
||||
TEST(alternativeMatch("мира 7", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("проспект мира 7", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("пропект мира 7", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("прсопект мира 7", rules, rulesWithStreet), ());
|
||||
TEST(alternativeMatch("пр-т мира 7", rules, rulesWithStreet), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(ProcessorTest, StreetSynonymPrefixMatch)
|
||||
{
|
||||
string const countryName = "Wonderland";
|
||||
|
||||
TestStreet yesenina(
|
||||
vector<m2::PointD>{m2::PointD(0.5, -0.5), m2::PointD(0, 0), m2::PointD(-0.5, 0.5)},
|
||||
"Yesenina street", "en");
|
||||
|
||||
TestPOI cafe(m2::PointD(0, 0), "", "en");
|
||||
cafe.SetTypes({{"amenity", "cafe"}});
|
||||
|
||||
auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder) {
|
||||
builder.Add(yesenina);
|
||||
builder.Add(cafe);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(-1, -1, 1, 1));
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, cafe)};
|
||||
TEST(ResultsMatch("Yesenina cafe ", rules), ());
|
||||
TEST(ResultsMatch("Cafe Yesenina ", rules), ());
|
||||
TEST(ResultsMatch("Cafe Yesenina", rules), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, cafe), ExactMatch(countryId, yesenina)};
|
||||
// Prefix match with misprints to street synonym gives street as additional result
|
||||
// but we still can find the cafe.
|
||||
TEST(ResultsMatch("Yesenina cafe", rules), ());
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
} // namespace search
|
||||
|
|
|
@ -16,11 +16,43 @@ namespace search
|
|||
{
|
||||
namespace
|
||||
{
|
||||
bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Prediction const & rhs)
|
||||
bool LessByHashAndRange(StreetsMatcher::Prediction const & lhs,
|
||||
StreetsMatcher::Prediction const & rhs)
|
||||
{
|
||||
if (lhs.m_hash != rhs.m_hash)
|
||||
return lhs.m_hash < rhs.m_hash;
|
||||
|
||||
if (lhs.GetNumTokens() != rhs.GetNumTokens())
|
||||
return lhs.GetNumTokens() > rhs.GetNumTokens();
|
||||
|
||||
if (lhs.m_tokenRange.Begin() != rhs.m_tokenRange.Begin())
|
||||
return lhs.m_tokenRange.Begin() < rhs.m_tokenRange.Begin();
|
||||
|
||||
if (lhs.m_prob != rhs.m_prob)
|
||||
return lhs.m_prob > rhs.m_prob;
|
||||
|
||||
if (lhs.m_withMisprints != rhs.m_withMisprints)
|
||||
return rhs.m_withMisprints;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool EqualsByHashAndRange(StreetsMatcher::Prediction const & lhs,
|
||||
StreetsMatcher::Prediction const & rhs)
|
||||
{
|
||||
return lhs.GetNumTokens() == rhs.GetNumTokens() &&
|
||||
lhs.m_tokenRange.Begin() == rhs.m_tokenRange.Begin() && lhs.m_hash == rhs.m_hash;
|
||||
}
|
||||
|
||||
bool LessByHashAndMisprints(StreetsMatcher::Prediction const & lhs,
|
||||
StreetsMatcher::Prediction const & rhs)
|
||||
{
|
||||
if (lhs.m_hash != rhs.m_hash)
|
||||
return lhs.m_hash < rhs.m_hash;
|
||||
|
||||
if (lhs.m_withMisprints != rhs.m_withMisprints)
|
||||
return rhs.m_withMisprints;
|
||||
|
||||
if (lhs.m_prob != rhs.m_prob)
|
||||
return lhs.m_prob > rhs.m_prob;
|
||||
|
||||
|
@ -29,6 +61,12 @@ bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Predicti
|
|||
|
||||
return lhs.m_tokenRange.Begin() < rhs.m_tokenRange.Begin();
|
||||
}
|
||||
|
||||
bool EqualsByHashAndMisprints(StreetsMatcher::Prediction const & lhs,
|
||||
StreetsMatcher::Prediction const & rhs)
|
||||
{
|
||||
return lhs.m_withMisprints == rhs.m_withMisprints && lhs.m_hash == rhs.m_hash;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
|
@ -44,10 +82,25 @@ void StreetsMatcher::Go(BaseContext const & ctx, FeaturesFilter const & filter,
|
|||
if (predictions.empty())
|
||||
return;
|
||||
|
||||
sort(predictions.begin(), predictions.end(), &LessByHash);
|
||||
predictions.erase(
|
||||
unique(predictions.begin(), predictions.end(), base::EqualsBy(&Prediction::m_hash)),
|
||||
predictions.end());
|
||||
// Remove predictions with the same m_hash (features) and token range.
|
||||
sort(predictions.begin(), predictions.end(), &LessByHashAndRange);
|
||||
predictions.erase(unique(predictions.begin(), predictions.end(), &EqualsByHashAndRange),
|
||||
predictions.end());
|
||||
|
||||
// Leave the most probable and longest prediction for predictions with the same m_hash (features)
|
||||
// and m_withMisprints.
|
||||
// We will still distinguish parses with the same m_hash (features) but different range and m_withMisprints.
|
||||
// For example, for "Paramount dive" we will have two parses:
|
||||
// STREET UNUSED (can be matched to poi later)
|
||||
// Paramount dive
|
||||
//
|
||||
// STREET STREET ("drive" with misprints)
|
||||
// Paramount dive
|
||||
//
|
||||
// The parses will have the same features and hash but we need both of them.
|
||||
sort(predictions.begin(), predictions.end(), &LessByHashAndMisprints);
|
||||
predictions.erase(unique(predictions.begin(), predictions.end(), &EqualsByHashAndMisprints),
|
||||
predictions.end());
|
||||
|
||||
sort(predictions.rbegin(), predictions.rend(), base::LessBy(&Prediction::m_prob));
|
||||
while (predictions.size() > kMaxNumOfImprobablePredictions &&
|
||||
|
@ -66,33 +119,8 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
|
|||
if (ctx.IsTokenUsed(startToken))
|
||||
continue;
|
||||
|
||||
// Here we try to match as many tokens as possible while
|
||||
// intersection is a non-empty bit vector of streets. Single
|
||||
// tokens that are synonyms to streets are ignored. Moreover,
|
||||
// each time a token that looks like a beginning of a house number
|
||||
// is met, we try to use current intersection of tokens as a
|
||||
// street layer and try to match BUILDINGs or POIs.
|
||||
CBV streets(ctx.m_streets);
|
||||
|
||||
CBV all;
|
||||
all.SetFull();
|
||||
|
||||
size_t curToken = startToken;
|
||||
|
||||
// This variable is used for prevention of duplicate calls to
|
||||
// CreateStreetsLayerAndMatchLowerLayers() with the same
|
||||
// arguments.
|
||||
size_t lastToken = startToken;
|
||||
|
||||
// When true, no bit vectors were intersected with |streets| at all.
|
||||
bool emptyIntersection = true;
|
||||
|
||||
// When true, |streets| is in the incomplete state and can't be
|
||||
// used for creation of street layers.
|
||||
bool incomplete = false;
|
||||
|
||||
auto emit = [&]()
|
||||
{
|
||||
auto emit = [&](CBV const & streets, CBV const & all, size_t curToken, size_t lastToken,
|
||||
bool emptyIntersection, bool incomplete, bool withMisprints) {
|
||||
if (!streets.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken)
|
||||
{
|
||||
CBV fs(streets);
|
||||
|
@ -121,11 +149,36 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
|
|||
|
||||
prediction.m_features = move(fs);
|
||||
prediction.m_hash = prediction.m_features.Hash();
|
||||
prediction.m_withMisprints = withMisprints;
|
||||
}
|
||||
};
|
||||
|
||||
auto findStreets = [&](bool withMisprints)
|
||||
{
|
||||
auto findStreets = [&](bool withMisprints) {
|
||||
// Here we try to match as many tokens as possible while
|
||||
// intersection is a non-empty bit vector of streets. Single
|
||||
// tokens that are synonyms to streets are ignored. Moreover,
|
||||
// each time a token that looks like a beginning of a house number
|
||||
// is met, we try to use current intersection of tokens as a
|
||||
// street layer and try to match BUILDINGs or POIs.
|
||||
CBV streets(ctx.m_streets);
|
||||
|
||||
CBV all;
|
||||
all.SetFull();
|
||||
|
||||
size_t curToken = startToken;
|
||||
|
||||
// This variable is used for prevention of duplicate calls to
|
||||
// CreateStreetsLayerAndMatchLowerLayers() with the same
|
||||
// arguments.
|
||||
size_t lastToken = startToken;
|
||||
|
||||
// When true, no bit vectors were intersected with |streets| at all.
|
||||
bool emptyIntersection = true;
|
||||
|
||||
// When true, |streets| is in the incomplete state and can't be
|
||||
// used for creation of street layers.
|
||||
bool incomplete = false;
|
||||
|
||||
StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag)
|
||||
{
|
||||
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
|
||||
|
@ -148,7 +201,8 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
|
|||
// the intersection. Therefore we need
|
||||
// to create streets layer right now.
|
||||
if (buffer.IsEmpty())
|
||||
emit();
|
||||
emit(streets, all, curToken, lastToken, emptyIntersection,
|
||||
incomplete, withMisprints);
|
||||
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
|
@ -164,11 +218,11 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
|
|||
bool const isPrefix = params.IsPrefixToken(curToken);
|
||||
|
||||
if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
|
||||
emit();
|
||||
emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints);
|
||||
|
||||
filter.Put(token, isPrefix, curToken);
|
||||
}
|
||||
emit();
|
||||
emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints);
|
||||
};
|
||||
|
||||
findStreets(false /* withMisprints */);
|
||||
|
|
|
@ -22,6 +22,7 @@ public:
|
|||
|
||||
CBV m_features;
|
||||
TokenRange m_tokenRange;
|
||||
bool m_withMisprints = false;
|
||||
double m_prob = 0.0;
|
||||
uint64_t m_hash = 0;
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue