From e9ed05623d5654de8980d544d9b67c68b7a6c876 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Wed, 20 Dec 2017 16:35:09 +0300 Subject: [PATCH] [search] Count errors in the city part. --- search/geocoder.cpp | 6 ++- search/model.hpp | 5 +++ search/pre_ranking_info.hpp | 5 +++ search/ranker.cpp | 37 ++++++++++++++++++- search/ranking_info.cpp | 34 ++++++++--------- search/ranking_utils.hpp | 5 +++ .../processor_test.cpp | 19 +++++++--- 7 files changed, 85 insertions(+), 26 deletions(-) diff --git a/search/geocoder.cpp b/search/geocoder.cpp index a4656e9060..fb56b47606 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -1361,7 +1361,11 @@ void Geocoder::EmitResult(BaseContext & ctx, MwmSet::MwmId const & mwmId, uint32 } if (ctx.m_city) - info.m_tokenRange[Model::TYPE_CITY] = ctx.m_city->m_tokenRange; + { + auto const & city = *ctx.m_city; + info.m_tokenRange[Model::TYPE_CITY] = city.m_tokenRange; + info.m_cityId = FeatureID(city.m_countryId, city.m_featureId); + } if (geoParts) info.m_geoParts = *geoParts; diff --git a/search/model.hpp b/search/model.hpp index 39cafc3e0d..f6535f54ba 100644 --- a/search/model.hpp +++ b/search/model.hpp @@ -47,6 +47,11 @@ public: TYPE_COUNT }; + static bool IsLocalityType(Type const type) + { + return type >= TYPE_VILLAGE && type <= TYPE_COUNTRY; + } + Type GetType(FeatureType const & feature) const; void SetCianEnabled(bool enabled) { m_cianEnabled = enabled; } diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp index 8408931c34..36cd702a94 100644 --- a/search/pre_ranking_info.hpp +++ b/search/pre_ranking_info.hpp @@ -4,6 +4,8 @@ #include "search/model.hpp" #include "search/token_range.hpp" +#include "indexer/feature_decl.hpp" + #include "geometry/point2d.hpp" #include "base/assert.hpp" @@ -44,6 +46,9 @@ struct PreRankingInfo // building and street ids are in |m_geoParts|. IntersectionResult m_geoParts; + // Id of the matched city, if any. + FeatureID m_cityId; + // True iff all tokens that are not stop-words // were used when retrieving the feature. bool m_allTokensUsed = true; diff --git a/search/ranker.cpp b/search/ranker.cpp index fde2673363..65a41d9c99 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -5,6 +5,7 @@ #include "search/highlighting.hpp" #include "search/model.hpp" #include "search/pre_ranking_info.hpp" +#include "search/ranking_utils.hpp" #include "search/token_slice.hpp" #include "search/utils.hpp" @@ -73,6 +74,25 @@ NameScores GetNameScores(FeatureType const & ft, Geocoder::Params const & params return bestScores; } +ErrorsMade GetErrorsMade(FeatureType const & ft, Geocoder::Params const & params, + TokenRange const & range, Model::Type type) +{ + auto errorsMade = GetNameScores(ft, params, range, type).m_errorsMade; + if (errorsMade.IsValid()) + return errorsMade; + + for (auto const token : range) + { + ErrorsMade tokenErrors; + params.GetToken(token).ForEach([&](strings::UniString const & s) { + tokenErrors = ErrorsMade::Max(tokenErrors, ErrorsMade{GetMaxErrorsForToken(s)}); + }); + errorsMade += tokenErrors; + } + + return errorsMade; +} + void RemoveDuplicatingLinear(vector & results) { double constexpr kDistSameStreetMeters = 5000.0; @@ -247,13 +267,26 @@ class RankerResultMaker FeatureType street; if (LoadFeature(FeatureID(mwmId, preInfo.m_geoParts.m_street), street)) { - auto const nameScores = GetNameScores( - street, m_params, preInfo.m_tokenRange[Model::TYPE_STREET], Model::TYPE_STREET); + auto const type = Model::TYPE_STREET; + auto const & range = preInfo.m_tokenRange[type]; + auto const nameScores = GetNameScores(street, m_params, range, type); + nameScore = min(nameScore, nameScores.m_nameScore); errorsMade += nameScores.m_errorsMade; } } + if (!Model::IsLocalityType(info.m_type) && preInfo.m_cityId.IsValid()) + { + FeatureType city; + if (LoadFeature(preInfo.m_cityId, city)) + { + auto const type = Model::TYPE_CITY; + auto const & range = preInfo.m_tokenRange[type]; + errorsMade += GetErrorsMade(city, m_params, range, type); + } + } + info.m_nameScore = nameScore; info.m_errorsMade = errorsMade; diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp index 744663d1d4..fa884e0579 100644 --- a/search/ranking_info.cpp +++ b/search/ranking_info.cpp @@ -12,26 +12,26 @@ namespace { // See search/search_quality/scoring_model.py for details. In short, // these coeffs correspond to coeffs in a linear model. -double const kDistanceToPivot = -0.1940753; -double const kRank = 0.6904166; -double const kFalseCats = -0.0944214; -double const kErrorsMade = -0.0167366; -double const kAllTokensUsed = 1.0000000; +double const kDistanceToPivot = -0.2837370; +double const kRank = 1.0000000; +double const kFalseCats = 0.0000000; +double const kErrorsMade = -0.0118797; +double const kAllTokensUsed = 0.0000000; double const kNameScore[NameScore::NAME_SCORE_COUNT] = { - -0.0704004 /* Zero */, - 0.0178957 /* Substring */, - 0.0274588 /* Prefix */, - 0.0250459 /* Full Match */ + -0.0995842 /* Zero */, + 0.0265404 /* Substring */, + 0.0238720 /* Prefix */, + 0.0491718 /* Full Match */ }; double const kType[Model::TYPE_COUNT] = { - -0.0164093 /* POI */, - -0.0164093 /* Building */, - 0.0067338 /* Street */, - 0.0388209 /* Unclassified */, - -0.0599702 /* Village */, - -0.0996292 /* City */, - 0.0929370 /* State */, - 0.0375170 /* Country */ + -0.0059073 /* POI */, + -0.0059073 /* Building */, + 0.0293600 /* Street */, + 0.0254288 /* Unclassified */, + -0.1130063 /* Village */, + -0.1549069 /* City */, + 0.1656289 /* State */, + 0.0534028 /* Country */ }; double TransformDistance(double distance) diff --git a/search/ranking_utils.hpp b/search/ranking_utils.hpp index 7b22b68e59..b81aeae15a 100644 --- a/search/ranking_utils.hpp +++ b/search/ranking_utils.hpp @@ -72,6 +72,11 @@ struct ErrorsMade return Combine(lhs, rhs, [](size_t u, size_t v) { return std::min(u, v); }); } + static ErrorsMade Max(ErrorsMade const & lhs, ErrorsMade const & rhs) + { + return Combine(lhs, rhs, [](size_t u, size_t v) { return std::max(u, v); }); + } + friend ErrorsMade operator+(ErrorsMade const & lhs, ErrorsMade const & rhs) { return Combine(lhs, rhs, [](size_t u, size_t v) { return u + v; }); diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index ba5d71cd51..d6cc9d7dfa 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -428,7 +428,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade) { string const countryName = "Wonderland"; - TestCity chekhov(m2::PointD(0, 0), "Чехов", "ru", 100 /* rank */); + TestCity chekhov(m2::PointD(0, 0), "Чеховъ Антонъ Павловичъ", "ru", 100 /* rank */); + TestStreet pushkinskaya( vector{m2::PointD(-0.5, -0.5), m2::PointD(0, 0), m2::PointD(0.5, 0.5)}, "Улица Пушкинская", "ru"); @@ -442,17 +443,16 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade) builder.Add(lermontov); }); - SetViewport(m2::RectD(m2::PointD(-1, -1), m2::PointD(1, 1))); + SetViewport(m2::RectD(-1, -1, 1, 1)); auto checkErrors = [&](string const & query, ErrorsMade const & errorsMade) { auto request = MakeRequest(query, "ru"); auto const & results = request->Results(); TRules rules{ExactMatch(wonderlandId, lermontov)}; - TEST(ResultsMatch(results, rules), ()); - TEST_EQUAL(results.size(), 1, ()); - - TEST_EQUAL(results[0].GetRankingInfo().m_errorsMade, errorsMade, ()); + TEST(ResultsMatch(results, rules), (query)); + TEST_EQUAL(results.size(), 1, (query)); + TEST_EQUAL(results[0].GetRankingInfo().m_errorsMade, errorsMade, (query)); }; checkErrors("кафе лермонтов", ErrorsMade(1)); @@ -461,6 +461,13 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade) checkErrors("пушкенская трактир лермонтов", ErrorsMade(3)); checkErrors("пушкенская кафе", ErrorsMade(1)); checkErrors("пушкинская трактиръ лермонтовъ", ErrorsMade(0)); + + checkErrors("лермонтовъ чехов", ErrorsMade(1)); + checkErrors("лермонтовъ чеховъ", ErrorsMade(0)); + checkErrors("лермонтов чехов", ErrorsMade(2)); + checkErrors("лермонтов чеховъ", ErrorsMade(1)); + + checkErrors("лермонтов чеховъ антон павлович", ErrorsMade(3)); } UNIT_CLASS_TEST(ProcessorTest, TestHouseNumbers)