From 02e457f8e0e4d6e3940989984fc6f36cb8b184ff Mon Sep 17 00:00:00 2001 From: tatiana-yan Date: Mon, 27 Apr 2020 14:21:29 +0300 Subject: [PATCH] [search] Leave street predictions with the same m_hash and m_withMisprints but different token ranges. --- .../processor_test.cpp | 16 ++++---- search/streets_matcher.cpp | 41 ++++--------------- 2 files changed, 15 insertions(+), 42 deletions(-) diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index e8a854a153..5751fde43f 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -256,7 +256,7 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke) TEST(ResultsMatch(" ", Rules()), ()); } { - Rules rules = {ExactMatch(wonderlandId, quantumTeleport2)}; + Rules rules = {ExactMatch(wonderlandId, quantumTeleport2), ExactMatch(wonderlandId, feynmanStreet)}; TEST(ResultsMatch("teleport feynman street", rules), ()); } { @@ -1508,7 +1508,7 @@ UNIT_CLASS_TEST(ProcessorTest, PathsThroughLayers) auto const rulePoi = ExactMatch(countryId, reinforcementCafe); // POI-BUILDING-STREET - TEST(ResultsMatch("computing street statistical learning cafe ", {rulePoi}), ()); + TEST(ResultsMatch("computing street statistical learning cafe ", {rulePoi, ruleStreet}), ()); TEST(ResultsMatch("computing street 0 cafe ", {rulePoi}), ()); // POI-BUILDING is not supported @@ -1516,10 +1516,10 @@ UNIT_CLASS_TEST(ProcessorTest, PathsThroughLayers) TEST(ResultsMatch("0 cafe ", {}), ()); // POI-STREET - TEST(ResultsMatch("computing street cafe ", {rulePoi}), ()); + TEST(ResultsMatch("computing street cafe ", {rulePoi, ruleStreet}), ()); // BUILDING-STREET - TEST(ResultsMatch("computing street statistical learning ", {ruleBuilding}), ()); + TEST(ResultsMatch("computing street statistical learning ", {ruleBuilding, ruleStreet}), ()); TEST(ResultsMatch("computing street 0 ", {ruleBuilding}), ()); // POI @@ -1769,8 +1769,8 @@ UNIT_CLASS_TEST(ProcessorTest, SynonymsTest) vector{m2::PointD(-0.5, -0.5), m2::PointD(0.0, 0.0), m2::PointD(0.5, 0.5)}, "большая свято-покровская улица", "ru"); - TestPOI stPeterEn(m2::PointD(0.0, 0.0), "saint peter basilica", "en"); - TestPOI stPeterRu(m2::PointD(0.5, 0.5), "собор святого петра", "ru"); + TestPOI stPeterEn(m2::PointD(2.0, 2.0), "saint peter basilica", "en"); + TestPOI stPeterRu(m2::PointD(-2.0, -2.0), "собор святого петра", "ru"); auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder) { builder.Add(streetEn); @@ -1779,7 +1779,7 @@ UNIT_CLASS_TEST(ProcessorTest, SynonymsTest) builder.Add(stPeterRu); }); - SetViewport(m2::RectD(-1, -1, 1, 1)); + SetViewport(m2::RectD(-2.0, -2.0, 2.0, 2.0)); { Rules rules = {ExactMatch(wonderlandId, streetEn)}; TEST(ResultsMatch("southwest street ", rules), ()); @@ -2013,7 +2013,7 @@ UNIT_CLASS_TEST(ProcessorTest, StreetSynonymPrefix) SetViewport(m2::RectD(m2::PointD(0.0, 0.0), m2::PointD(1.0, 2.0))); { - Rules rules = {ExactMatch(countryId, house)}; + Rules rules = {ExactMatch(countryId, house), ExactMatch(countryId, street)}; TEST(ResultsMatch("3 Boulevard Maloney Est", rules), ()); } } diff --git a/search/streets_matcher.cpp b/search/streets_matcher.cpp index c51aba6bb1..c903c23672 100644 --- a/search/streets_matcher.cpp +++ b/search/streets_matcher.cpp @@ -44,30 +44,6 @@ bool EqualsByHashAndRange(StreetsMatcher::Prediction const & lhs, lhs.m_tokenRange.Begin() == rhs.m_tokenRange.Begin() && lhs.m_hash == rhs.m_hash; } -bool LessByHashAndMisprints(StreetsMatcher::Prediction const & lhs, - StreetsMatcher::Prediction const & rhs) -{ - if (lhs.m_hash != rhs.m_hash) - return lhs.m_hash < rhs.m_hash; - - if (lhs.m_withMisprints != rhs.m_withMisprints) - return rhs.m_withMisprints; - - if (lhs.m_prob != rhs.m_prob) - return lhs.m_prob > rhs.m_prob; - - if (lhs.GetNumTokens() != rhs.GetNumTokens()) - return lhs.GetNumTokens() > rhs.GetNumTokens(); - - return lhs.m_tokenRange.Begin() < rhs.m_tokenRange.Begin(); -} - -bool EqualsByHashAndMisprints(StreetsMatcher::Prediction const & lhs, - StreetsMatcher::Prediction const & rhs) -{ - return lhs.m_withMisprints == rhs.m_withMisprints && lhs.m_hash == rhs.m_hash; -} - void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter const & filter, QueryParams const & params, size_t startToken, bool withMisprints, vector & predictions) @@ -192,19 +168,16 @@ void StreetsMatcher::Go(BaseContext const & ctx, CBV const & candidates, // Remove predictions with the same m_hash (features) and token range. base::SortUnique(predictions, &LessByHashAndRange, &EqualsByHashAndRange); - // Leave the most probable and longest prediction for predictions with the same m_hash (features) - // and m_withMisprints. - // We will still distinguish parses with the same m_hash (features) but different range and - // m_withMisprints. For example, for "Paramount dive" we will have two parses: + // We need all predictions here. Even predictions with the same m_hash (features) and + // m_withMisprints. For example: // - // STREET UNUSED (can be matched to poi later) - // Paramount dive + // STREET STREET STREET STREET + // 8 March street, 8 // - // STREET STREET ("drive" with misprints) - // Paramount dive + // STREET STREET STREET UNUSED (can be matched to house number later) + // 8 March street, 8 // - // The parses will have the same features and hash but we need both of them. - base::SortUnique(predictions, &LessByHashAndMisprints, &EqualsByHashAndMisprints); + // Predictions have the same m_hash (features) and m_withMisprints but lead to different parses. sort(predictions.rbegin(), predictions.rend(), base::LessBy(&Prediction::m_prob)); while (predictions.size() > kMaxNumOfImprobablePredictions &&