From c4176532d66c3ff8842da1b5b5983f87c10025f9 Mon Sep 17 00:00:00 2001 From: Viktor Govako Date: Thu, 18 Jan 2024 17:01:06 -0300 Subject: [PATCH] [search] Increased 'errors' and distance ranking penalty. Signed-off-by: Viktor Govako --- .../generator_tests_support/test_feature.cpp | 4 +- search/ranker.cpp | 11 ++- search/ranking_info.cpp | 4 +- .../processor_test.cpp | 17 ++++ .../search_integration_tests/ranker_test.cpp | 4 +- .../search_quality_tests/real_mwm_tests.cpp | 90 +++++++++++++------ search/search_tests/ranking_tests.cpp | 1 + 7 files changed, 100 insertions(+), 31 deletions(-) diff --git a/generator/generator_tests_support/test_feature.cpp b/generator/generator_tests_support/test_feature.cpp index ace665c539..7ecfb93973 100644 --- a/generator/generator_tests_support/test_feature.cpp +++ b/generator/generator_tests_support/test_feature.cpp @@ -167,6 +167,7 @@ void TestPlace::Serialize(FeatureBuilder & fb) const { TestFeature::Serialize(fb); fb.AddType(m_type); + fb.GetParams().rank = m_rank; } string TestPlace::ToDebugString() const @@ -178,8 +179,9 @@ string TestPlace::ToDebugString() const } TestCountry::TestCountry(m2::PointD const & center, std::string const & name, std::string const & lang) - : TestPlace(center, name, lang, classif().GetTypeByPath({"place", "country"})) + : TestPlace(center, name, lang, classif().GetTypeByPath({"place", "country"}), 170 /* rank */) { + // Default rank for Country with population ~ 1.0E7 } TestState::TestState(m2::PointD const & center, string const & name, string const & lang) diff --git a/search/ranker.cpp b/search/ranker.cpp index db4e915339..a2ec62a523 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -557,6 +557,7 @@ private: if (scores.m_isAltOrOldName) isAltOrOldName = true; matchedLength += scores.m_matchedLength; + return scores.m_nameScore; }; auto const updateDependScore = [&](Model::Type type, uint32_t dependID) @@ -584,7 +585,15 @@ private: ASSERT(preInfo.m_tokenRanges[Model::TYPE_VILLAGE].Empty(), ()); } - updateScoreForFeature(*city, type); + auto const cityNameScore = updateScoreForFeature(*city, type); + + // Update distance with matched city pivot if we have a _good_ city name score. + // A bit controversial distance score reset, but lets try. + // Other option is to combine old pivot distance and city distance. + // See 'Barcelona_Carrers' test. + // "San Francisco" query should not update rank for "Francisco XXX" street in "San YYY" village. + if (cityNameScore == NameScore::FULL_MATCH) + info.m_distanceToPivot = mercator::DistanceOnEarth(center, city->GetCenter()); } } diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp index ce8efd3bfe..86b6d96d87 100644 --- a/search/ranking_info.cpp +++ b/search/ranking_info.cpp @@ -26,7 +26,7 @@ double constexpr kCategoriesDistanceToPivot = -0.6874177; double constexpr kCategoriesRank = 1.0000000; double constexpr kCategoriesFalseCats = -1.0000000; -double constexpr kDistanceToPivot = -0.2123693; +double constexpr kDistanceToPivot = -0.48; double constexpr kWalkingDistanceM = 5000.0; // This constant is very important and checked in Famous_Cities_Rank test. @@ -38,7 +38,7 @@ double constexpr kPopularity = 1.0000000; // - On the other hand, when search for "subway", do we usually prefer famous fast food or metro? double constexpr kFalseCats = -0.01; -double constexpr kErrorsMade = -0.15; +double constexpr kErrorsMade = -0.4; double constexpr kMatchedFraction = 0.1876736; double constexpr kAllTokensUsed = 0.0478513; double constexpr kCommonTokens = -0.05; diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index ec238066ee..97a35302f9 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -2724,6 +2724,23 @@ UNIT_CLASS_TEST(ProcessorTest, Suburbs) } } +UNIT_CLASS_TEST(ProcessorTest, Suburbs1) +{ + // The same with neighborhood + TestSuburb suburb({0, 0}, "les Planes", "default"); + + auto countryId = BuildCountry("Wonderland", [&](TestMwmBuilder & builder) + { + builder.Add(suburb); + }); + + SetViewport(m2::RectD(-1.0, -1.0, 1.0, 1.0)); + + /// @todo Should debug this case. Actually, bad matching because of "carrer". + Rules rules = {ExactMatch(countryId, suburb)}; + TEST(ResultsMatch("carrer de les planes", rules), ()); +} + UNIT_CLASS_TEST(ProcessorTest, ViewportFilter) { TestStreet street23({{0.5, -1.0}, {0.5, 1.0}}, "23rd February street", "en"); diff --git a/search/search_integration_tests/ranker_test.cpp b/search/search_integration_tests/ranker_test.cpp index c67de27548..03287e40b2 100644 --- a/search/search_integration_tests/ranker_test.cpp +++ b/search/search_integration_tests/ranker_test.cpp @@ -6,7 +6,6 @@ #include "generator/generator_tests_support/test_feature.hpp" #include "generator/generator_tests_support/test_mwm_builder.hpp" -#include #include namespace ranker_test @@ -98,8 +97,9 @@ UNIT_CLASS_TEST(RankerTest, UniteSameResults) UNIT_CLASS_TEST(RankerTest, PreferCountry) { std::string const name = "Wonderland"; - TestCountry wonderland(m2::PointD(10.0, 10.0), name, "en"); + TestCountry wonderland(m2::PointD(9.0, 9.0), name, "en"); // ~1400 km from (0, 0) TestPOI cafe(m2::PointD(0.0, 0.0), name, "en"); + cafe.SetTypes({{"amenity", "cafe"}}); auto const worldID = BuildWorld([&](TestMwmBuilder & builder) { diff --git a/search/search_quality/search_quality_tests/real_mwm_tests.cpp b/search/search_quality/search_quality_tests/real_mwm_tests.cpp index 0925d4df12..85550dec7a 100644 --- a/search/search_quality/search_quality_tests/real_mwm_tests.cpp +++ b/search/search_quality/search_quality_tests/real_mwm_tests.cpp @@ -185,8 +185,7 @@ UNIT_CLASS_TEST(MwmTestsFixture, TopPOIs_Smoke) auto const & results = request->Results(); TEST_GREATER(results.size(), kPopularPoiResultsCount, ()); - /// @todo results[0] - 'Carrefour' city in Haiti. - Range const range(results, 1, kPopularPoiResultsCount); + Range const range(results, 0, kPopularPoiResultsCount); EqualClassifType(range, GetClassifTypes({{"shop"}})); TEST_LESS(SortedByDistance(range, center).first, 200, ()); } @@ -201,9 +200,12 @@ UNIT_CLASS_TEST(MwmTestsFixture, TopPOIs_Smoke) auto const & results = request->Results(); TEST_GREATER(results.size(), kTopPoiResultsCount, ()); - Range const range(results); - EqualClassifType(range, GetClassifTypes({{"shop"}, {"amenity", "parking"}})); + Range const range(results, 0, 4); + EqualClassifType(range, GetClassifTypes({{"shop"}})); TEST_LESS(SortedByDistance(range, center).first, 5000, ()); + + // parking (< 6km) should be on top. + EqualClassifType(Range(results, 4, 6), GetClassifTypes({{"leisure", "playground"}, {"amenity", "parking"}})); } // https://github.com/organicmaps/organicmaps/issues/2470 @@ -310,6 +312,7 @@ UNIT_CLASS_TEST(MwmTestsFixture, Hamburg_Park) {"tourism", "theme_park"}, {"amenity", "fast_food"}, {"shop", "gift"}, + /// @todo Add _near street_ penalty // {"amenity", "pharmacy"}, // "Heide-Apotheke" near the "Parkstraße" street })); @@ -342,6 +345,7 @@ UNIT_CLASS_TEST(MwmTestsFixture, Barcelona_Carrers) CenterInRect(range, {2.1651583, 41.3899995, 2.1863021, 41.4060494}); } + // In case of a city, distance rank should be from it's origin. { auto request = MakeRequest("carrer de les planes sabadell"); auto const & results = request->Results(); @@ -351,6 +355,22 @@ UNIT_CLASS_TEST(MwmTestsFixture, Barcelona_Carrers) EqualClassifType(range, GetClassifTypes({{"highway"}})); CenterInRect(range, {2.1078314, 41.5437515, 2.1106129, 41.5438819}); } + + { + // Прилукская Слобода, Минск + ms::LatLon const center(53.8197647, 27.4701662); + SetViewportAndLoadMaps(center); + + auto request = MakeRequest("минск малая ул", "ru"); + auto const & results = request->Results(); + TEST_GREATER(results.size(), kTopPoiResultsCount, ()); + + EqualClassifType(Range(results, 0, 1), GetClassifTypes({{"highway"}})); + CenterInRect(Range(results, 0, 1), {27.5134786, 53.8717921, 27.5210173, 53.875768}); + + /// @todo Second result is expected to be the nearby street in "Прилукская Слобода", but not always .. + //TEST_GREATER(GetDistanceM(results[0], center), GetDistanceM(results[1], center), ()); + } } // https://github.com/organicmaps/organicmaps/issues/3307 @@ -519,12 +539,15 @@ UNIT_CLASS_TEST(MwmTestsFixture, Street_BusStop) auto const & results = request->Results(); TEST_GREATER(results.size(), kTopPoiResultsCount, ()); - // First result is a "Juncal" supermarket near the train station, 24km :) - // Second result is a train station in other MWM, >200km away. - Range const range(results, 0, 2); - EqualClassifType(range, GetClassifTypes({{"shop", "supermarket"}, {"railway", "station"}})); - TEST_LESS(!SortedByDistance(range, center).first, 2.0E5, ()); - TEST_LESS(SortedByDistance(range, center).first, 3.0E5, ()); + // -1 "Juncal" supermarket near the train station, 24km :) + // -2 Train station building in "Juncal" village, other MWM, >200km + // -3 Train station building near "Juncal" street, 28km + // -4 Railway station, same as (3) + // -5 Railway station, same as (2) + Range const range(results); + EqualClassifType(range, GetClassifTypes({{"shop", "supermarket"}, + {"railway", "station"}, {"building", "train_station"}})); + TEST_LESS(SortedByDistance(Range(results, 0, 2), center).first, 23.0E3, ()); } } @@ -650,14 +673,7 @@ UNIT_CLASS_TEST(MwmTestsFixture, AddrInterpolation_Rank) TEST_GREATER(results.size(), kPopularPoiResultsCount, ()); // Top first address results. - Range const range(results, 0, 6 /* count */); - EqualClassifType(range, GetClassifTypes({{"building", "address"}})); - - // Results are not sorted because one match is not exact (address near street). - //TEST_LESS(SortedByDistance(range, center), 300000.0, ()); - - // Interesting results goes after, streets on distance ~250km in Pergamino. - // Seems like because of matching 2700 postcode. + EqualClassifType(Range(results, 0, 4), GetClassifTypes({{"building", "address"}})); } } @@ -869,22 +885,24 @@ UNIT_CLASS_TEST(MwmTestsFixture, Full_Address) ms::LatLon const center(49.0195332, 12.0974856); SetViewportAndLoadMaps(center); + /// @todo There is a tricky neighborhood here so ranking gets dumb (and we treat 'A' as a stopword). + /// Anyway, needed addresses are in top 3 among: + /// -1 "Gewerbepark A", "A 1" + /// -2 "Gewerbepark B", "1" + /// -3 "Gewerbepark C", "1" + { auto request = MakeRequest("Wörth an der Donau Gewerbepark A 1 93086 Germany"); auto const & results = request->Results(); TEST_GREATER(results.size(), kPopularPoiResultsCount, ()); - HasAddress(Range(results, 0, 1), "Gewerbepark A", "A 1", {"shop", "car"}); + HasAddress(Range(results, 0, 3), "Gewerbepark A", "A 1", {"shop", "car"}); } { auto request = MakeRequest("Wörth an der Donau Gewerbepark C 1 93086 Germany"); auto const & results = request->Results(); TEST_GREATER(results.size(), kPopularPoiResultsCount, ()); - /// @todo There is a tricky neighborhood here, so ranking gets dumb :) - /// 1: "Gewerbepark A", "A 1" near "Gewerbepark C" st - /// 2: "Gewerbepark B", "1" near "Gewerbepark C" st - /// 3: "Gewerbepark C", "1" HasAddress(Range(results, 0, 3), "Gewerbepark C", "1"); } } @@ -1052,9 +1070,8 @@ UNIT_CLASS_TEST(MwmTestsFixture, Pois_Rank) size_t constexpr kResultsCount = 20; TEST_GREATER(results.size(), kResultsCount, ()); - /// @todo result[0] - 'Rimini' city in Italy // First 2 results - nearest supermarkets. - Range range(results, 1, 3); + Range range(results, 0, 2); EqualClassifType(range, GetClassifTypes({{"shop", "supermarket"}})); TEST_LESS(SortedByDistance(range, center).second, 1500, ()); } @@ -1072,4 +1089,27 @@ UNIT_CLASS_TEST(MwmTestsFixture, Pois_Rank) } } +// "San Francisco" is always an interesting query, because in Latin America there are: +// - hundreds of cities with similar names, +// - thousands of streets/POIs +UNIT_CLASS_TEST(MwmTestsFixture, San_Francisco) +{ + auto const & cl = classif(); + { + // New York + ms::LatLon const center(40.71253, -74.00628); + SetViewportAndLoadMaps(center); + + auto request = MakeRequest("San Francisco"); + auto const & results = request->Results(); + TEST_GREATER(results.size(), kTopPoiResultsCount, ()); + + TEST_EQUAL(results[0].GetFeatureType(), cl.GetTypeByPath({"shop", "laundry"}), ()); + TEST_LESS(GetDistanceM(results[0], center), 1.0E4, ()); + + TEST_EQUAL(results[1].GetFeatureType(), cl.GetTypeByPath({"place", "city"}), ()); + TEST_LESS(GetDistanceM(results[1], center), 4.2E6, ()); + } +} + } // namespace real_mwm_tests diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index 9f16b32e18..772d0043f4 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -215,6 +215,7 @@ UNIT_TEST(RankingInfo_PreferCountry) country.m_distanceToPivot = 1e6; country.m_tokenRanges[Model::TYPE_COUNTRY] = TokenRange(0, 1); country.m_type = Model::TYPE_COUNTRY; + country.m_rank = 100; // This is rather small rank for a country. // Country should be preferred even if cafe is much closer to viewport center. TEST_LESS(cafe.GetLinearModelRank(), country.GetLinearModelRank(), (cafe, country));