diff --git a/search/ranker.cpp b/search/ranker.cpp index 3cd82c4d00..d11065638d 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -433,6 +433,24 @@ class RankerResultMaker info.m_matchedFraction = totalLength == 0 ? 1.0 : static_cast(matchedLength) / static_cast(totalLength); + + auto const isCountryOrCapital = [](FeatureType & ft) { + auto static const countryType = classif().GetTypeByPath({"place", "country"}); + auto static const capitalType = classif().GetTypeByPath({"place", "city", "capital", "2"}); + + bool hasType = false; + ft.ForEachType([&hasType](uint32_t type) { + if (hasType) + return; + if (type == countryType || type == capitalType) + hasType = true; + }); + + return hasType; + }; + info.m_exactCountryOrCapital = info.m_errorsMade == ErrorsMade(0) && info.m_allTokensUsed && + info.m_nameScore == NAME_SCORE_FULL_MATCH && + isCountryOrCapital(ft); } CategoriesInfo const categoriesInfo(feature::TypesHolder(ft), diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp index d740241e06..d41502aee8 100644 --- a/search/ranking_info.cpp +++ b/search/ranking_info.cpp @@ -33,6 +33,7 @@ double constexpr kFalseCats = -0.4172461; double constexpr kErrorsMade = -0.0391331; double constexpr kMatchedFraction = 0.1876736; double constexpr kAllTokensUsed = 0.0478513; +double constexpr kExactCountryOrCapital = 0.1247733; double constexpr kNameScore[NameScore::NAME_SCORE_COUNT] = { 0.0085962 /* Zero */, -0.0099698 /* Substring */, @@ -57,6 +58,7 @@ static_assert(kDistanceToPivot <= 0, ""); static_assert(kRank >= 0, ""); static_assert(kPopularity >= 0, ""); static_assert(kErrorsMade <= 0, ""); +static_assert(kExactCountryOrCapital >= 0, ""); double TransformDistance(double distance) { @@ -119,6 +121,7 @@ void RankingInfo::PrintCSVHeader(ostream & os) << ",PureCats" << ",FalseCats" << ",AllTokensUsed" + << ",ExactCountryOrCapital" << ",IsCategorialRequest" << ",HasName"; } @@ -142,6 +145,7 @@ string DebugPrint(RankingInfo const & info) os << ", m_pureCats:" << info.m_pureCats; os << ", m_falseCats:" << info.m_falseCats; os << ", m_allTokensUsed:" << info.m_allTokensUsed; + os << ", m_exactCountryOrCapital:" << info.m_exactCountryOrCapital; os << ", m_categorialRequest:" << info.m_categorialRequest; os << ", m_hasName:" << info.m_hasName; os << "]"; @@ -162,6 +166,7 @@ void RankingInfo::ToCSV(ostream & os) const os << m_pureCats << ","; os << m_falseCats << ","; os << (m_allTokensUsed ? 1 : 0) << ","; + os << (m_exactCountryOrCapital ? 1 : 0) << ","; os << (m_categorialRequest ? 1 : 0) << ","; os << (m_hasName ? 1 : 0); } @@ -202,6 +207,7 @@ double RankingInfo::GetLinearModelRank() const result += kErrorsMade * GetErrorsMadePerToken(); result += kMatchedFraction * m_matchedFraction; result += (m_allTokensUsed ? 1 : 0) * kAllTokensUsed; + result += (m_exactCountryOrCapital ? 1 : 0) * kExactCountryOrCapital; } else { diff --git a/search/ranking_info.hpp b/search/ranking_info.hpp index 59c0132bfe..c3afa637e3 100644 --- a/search/ranking_info.hpp +++ b/search/ranking_info.hpp @@ -65,6 +65,10 @@ struct RankingInfo // True iff all tokens retrieved from search index were matched without misprints. bool m_exactMatch = true; + // True iff feature has country or capital type and matches request: full match with all tokens + // used and without misprints. + bool m_exactCountryOrCapital = true; + // Search type for the feature. Model::Type m_type = Model::TYPE_COUNT; diff --git a/search/search_quality/scoring_model.py b/search/search_quality/scoring_model.py index 2d62aa18f8..f74242fb7a 100755 --- a/search/search_quality/scoring_model.py +++ b/search/search_quality/scoring_model.py @@ -21,7 +21,7 @@ RELEVANCES = {'Harmful': -3, 'Irrelevant': 0, 'Relevant': 1, 'Vital': 3} NAME_SCORES = ['Zero', 'Substring', 'Prefix', 'Full Match'] SEARCH_TYPES = ['POI', 'Building', 'Street', 'Unclassified', 'Village', 'City', 'State', 'Country'] FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'Rating', 'FalseCats', 'ErrorsMade', 'MatchedFraction', - 'AllTokensUsed'] + NAME_SCORES + SEARCH_TYPES + 'AllTokensUsed', 'ExactCountryOrCapital'] + NAME_SCORES + SEARCH_TYPES BOOTSTRAP_ITERATIONS = 10000