[search] Leave street predictions with the same m_hash and m_withMisprints but different token ranges.

This commit is contained in:
tatiana-yan 2020-04-27 14:21:29 +03:00 committed by mpimenov
parent 938d47fe25
commit 02e457f8e0
2 changed files with 15 additions and 42 deletions

View file

@ -256,7 +256,7 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
TEST(ResultsMatch(" ", Rules()), ());
}
{
Rules rules = {ExactMatch(wonderlandId, quantumTeleport2)};
Rules rules = {ExactMatch(wonderlandId, quantumTeleport2), ExactMatch(wonderlandId, feynmanStreet)};
TEST(ResultsMatch("teleport feynman street", rules), ());
}
{
@ -1508,7 +1508,7 @@ UNIT_CLASS_TEST(ProcessorTest, PathsThroughLayers)
auto const rulePoi = ExactMatch(countryId, reinforcementCafe);
// POI-BUILDING-STREET
TEST(ResultsMatch("computing street statistical learning cafe ", {rulePoi}), ());
TEST(ResultsMatch("computing street statistical learning cafe ", {rulePoi, ruleStreet}), ());
TEST(ResultsMatch("computing street 0 cafe ", {rulePoi}), ());
// POI-BUILDING is not supported
@ -1516,10 +1516,10 @@ UNIT_CLASS_TEST(ProcessorTest, PathsThroughLayers)
TEST(ResultsMatch("0 cafe ", {}), ());
// POI-STREET
TEST(ResultsMatch("computing street cafe ", {rulePoi}), ());
TEST(ResultsMatch("computing street cafe ", {rulePoi, ruleStreet}), ());
// BUILDING-STREET
TEST(ResultsMatch("computing street statistical learning ", {ruleBuilding}), ());
TEST(ResultsMatch("computing street statistical learning ", {ruleBuilding, ruleStreet}), ());
TEST(ResultsMatch("computing street 0 ", {ruleBuilding}), ());
// POI
@ -1769,8 +1769,8 @@ UNIT_CLASS_TEST(ProcessorTest, SynonymsTest)
vector<m2::PointD>{m2::PointD(-0.5, -0.5), m2::PointD(0.0, 0.0), m2::PointD(0.5, 0.5)},
"большая свято-покровская улица", "ru");
TestPOI stPeterEn(m2::PointD(0.0, 0.0), "saint peter basilica", "en");
TestPOI stPeterRu(m2::PointD(0.5, 0.5), "собор святого петра", "ru");
TestPOI stPeterEn(m2::PointD(2.0, 2.0), "saint peter basilica", "en");
TestPOI stPeterRu(m2::PointD(-2.0, -2.0), "собор святого петра", "ru");
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder) {
builder.Add(streetEn);
@ -1779,7 +1779,7 @@ UNIT_CLASS_TEST(ProcessorTest, SynonymsTest)
builder.Add(stPeterRu);
});
SetViewport(m2::RectD(-1, -1, 1, 1));
SetViewport(m2::RectD(-2.0, -2.0, 2.0, 2.0));
{
Rules rules = {ExactMatch(wonderlandId, streetEn)};
TEST(ResultsMatch("southwest street ", rules), ());
@ -2013,7 +2013,7 @@ UNIT_CLASS_TEST(ProcessorTest, StreetSynonymPrefix)
SetViewport(m2::RectD(m2::PointD(0.0, 0.0), m2::PointD(1.0, 2.0)));
{
Rules rules = {ExactMatch(countryId, house)};
Rules rules = {ExactMatch(countryId, house), ExactMatch(countryId, street)};
TEST(ResultsMatch("3 Boulevard Maloney Est", rules), ());
}
}

View file

@ -44,30 +44,6 @@ bool EqualsByHashAndRange(StreetsMatcher::Prediction const & lhs,
lhs.m_tokenRange.Begin() == rhs.m_tokenRange.Begin() && lhs.m_hash == rhs.m_hash;
}
bool LessByHashAndMisprints(StreetsMatcher::Prediction const & lhs,
StreetsMatcher::Prediction const & rhs)
{
if (lhs.m_hash != rhs.m_hash)
return lhs.m_hash < rhs.m_hash;
if (lhs.m_withMisprints != rhs.m_withMisprints)
return rhs.m_withMisprints;
if (lhs.m_prob != rhs.m_prob)
return lhs.m_prob > rhs.m_prob;
if (lhs.GetNumTokens() != rhs.GetNumTokens())
return lhs.GetNumTokens() > rhs.GetNumTokens();
return lhs.m_tokenRange.Begin() < rhs.m_tokenRange.Begin();
}
bool EqualsByHashAndMisprints(StreetsMatcher::Prediction const & lhs,
StreetsMatcher::Prediction const & rhs)
{
return lhs.m_withMisprints == rhs.m_withMisprints && lhs.m_hash == rhs.m_hash;
}
void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter const & filter,
QueryParams const & params, size_t startToken, bool withMisprints,
vector<StreetsMatcher::Prediction> & predictions)
@ -192,19 +168,16 @@ void StreetsMatcher::Go(BaseContext const & ctx, CBV const & candidates,
// Remove predictions with the same m_hash (features) and token range.
base::SortUnique(predictions, &LessByHashAndRange, &EqualsByHashAndRange);
// Leave the most probable and longest prediction for predictions with the same m_hash (features)
// and m_withMisprints.
// We will still distinguish parses with the same m_hash (features) but different range and
// m_withMisprints. For example, for "Paramount dive" we will have two parses:
// We need all predictions here. Even predictions with the same m_hash (features) and
// m_withMisprints. For example:
//
// STREET UNUSED (can be matched to poi later)
// Paramount dive
// STREET STREET STREET STREET
// 8 March street, 8
//
// STREET STREET ("drive" with misprints)
// Paramount dive
// STREET STREET STREET UNUSED (can be matched to house number later)
// 8 March street, 8
//
// The parses will have the same features and hash but we need both of them.
base::SortUnique(predictions, &LessByHashAndMisprints, &EqualsByHashAndMisprints);
// Predictions have the same m_hash (features) and m_withMisprints but lead to different parses.
sort(predictions.rbegin(), predictions.rend(), base::LessBy(&Prediction::m_prob));
while (predictions.size() > kMaxNumOfImprobablePredictions &&