[search] Count errors in the city part.

This commit is contained in:
Yuri Gorshenin 2017-12-20 16:35:09 +03:00 committed by Tatiana Yan
parent e1989a445e
commit e9ed05623d
7 changed files with 85 additions and 26 deletions

View file

@ -1361,7 +1361,11 @@ void Geocoder::EmitResult(BaseContext & ctx, MwmSet::MwmId const & mwmId, uint32
}
if (ctx.m_city)
info.m_tokenRange[Model::TYPE_CITY] = ctx.m_city->m_tokenRange;
{
auto const & city = *ctx.m_city;
info.m_tokenRange[Model::TYPE_CITY] = city.m_tokenRange;
info.m_cityId = FeatureID(city.m_countryId, city.m_featureId);
}
if (geoParts)
info.m_geoParts = *geoParts;

View file

@ -47,6 +47,11 @@ public:
TYPE_COUNT
};
static bool IsLocalityType(Type const type)
{
return type >= TYPE_VILLAGE && type <= TYPE_COUNTRY;
}
Type GetType(FeatureType const & feature) const;
void SetCianEnabled(bool enabled) { m_cianEnabled = enabled; }

View file

@ -4,6 +4,8 @@
#include "search/model.hpp"
#include "search/token_range.hpp"
#include "indexer/feature_decl.hpp"
#include "geometry/point2d.hpp"
#include "base/assert.hpp"
@ -44,6 +46,9 @@ struct PreRankingInfo
// building and street ids are in |m_geoParts|.
IntersectionResult m_geoParts;
// Id of the matched city, if any.
FeatureID m_cityId;
// True iff all tokens that are not stop-words
// were used when retrieving the feature.
bool m_allTokensUsed = true;

View file

@ -5,6 +5,7 @@
#include "search/highlighting.hpp"
#include "search/model.hpp"
#include "search/pre_ranking_info.hpp"
#include "search/ranking_utils.hpp"
#include "search/token_slice.hpp"
#include "search/utils.hpp"
@ -73,6 +74,25 @@ NameScores GetNameScores(FeatureType const & ft, Geocoder::Params const & params
return bestScores;
}
ErrorsMade GetErrorsMade(FeatureType const & ft, Geocoder::Params const & params,
TokenRange const & range, Model::Type type)
{
auto errorsMade = GetNameScores(ft, params, range, type).m_errorsMade;
if (errorsMade.IsValid())
return errorsMade;
for (auto const token : range)
{
ErrorsMade tokenErrors;
params.GetToken(token).ForEach([&](strings::UniString const & s) {
tokenErrors = ErrorsMade::Max(tokenErrors, ErrorsMade{GetMaxErrorsForToken(s)});
});
errorsMade += tokenErrors;
}
return errorsMade;
}
void RemoveDuplicatingLinear(vector<RankerResult> & results)
{
double constexpr kDistSameStreetMeters = 5000.0;
@ -247,13 +267,26 @@ class RankerResultMaker
FeatureType street;
if (LoadFeature(FeatureID(mwmId, preInfo.m_geoParts.m_street), street))
{
auto const nameScores = GetNameScores(
street, m_params, preInfo.m_tokenRange[Model::TYPE_STREET], Model::TYPE_STREET);
auto const type = Model::TYPE_STREET;
auto const & range = preInfo.m_tokenRange[type];
auto const nameScores = GetNameScores(street, m_params, range, type);
nameScore = min(nameScore, nameScores.m_nameScore);
errorsMade += nameScores.m_errorsMade;
}
}
if (!Model::IsLocalityType(info.m_type) && preInfo.m_cityId.IsValid())
{
FeatureType city;
if (LoadFeature(preInfo.m_cityId, city))
{
auto const type = Model::TYPE_CITY;
auto const & range = preInfo.m_tokenRange[type];
errorsMade += GetErrorsMade(city, m_params, range, type);
}
}
info.m_nameScore = nameScore;
info.m_errorsMade = errorsMade;

View file

@ -12,26 +12,26 @@ namespace
{
// See search/search_quality/scoring_model.py for details. In short,
// these coeffs correspond to coeffs in a linear model.
double const kDistanceToPivot = -0.1940753;
double const kRank = 0.6904166;
double const kFalseCats = -0.0944214;
double const kErrorsMade = -0.0167366;
double const kAllTokensUsed = 1.0000000;
double const kDistanceToPivot = -0.2837370;
double const kRank = 1.0000000;
double const kFalseCats = 0.0000000;
double const kErrorsMade = -0.0118797;
double const kAllTokensUsed = 0.0000000;
double const kNameScore[NameScore::NAME_SCORE_COUNT] = {
-0.0704004 /* Zero */,
0.0178957 /* Substring */,
0.0274588 /* Prefix */,
0.0250459 /* Full Match */
-0.0995842 /* Zero */,
0.0265404 /* Substring */,
0.0238720 /* Prefix */,
0.0491718 /* Full Match */
};
double const kType[Model::TYPE_COUNT] = {
-0.0164093 /* POI */,
-0.0164093 /* Building */,
0.0067338 /* Street */,
0.0388209 /* Unclassified */,
-0.0599702 /* Village */,
-0.0996292 /* City */,
0.0929370 /* State */,
0.0375170 /* Country */
-0.0059073 /* POI */,
-0.0059073 /* Building */,
0.0293600 /* Street */,
0.0254288 /* Unclassified */,
-0.1130063 /* Village */,
-0.1549069 /* City */,
0.1656289 /* State */,
0.0534028 /* Country */
};
double TransformDistance(double distance)

View file

@ -72,6 +72,11 @@ struct ErrorsMade
return Combine(lhs, rhs, [](size_t u, size_t v) { return std::min(u, v); });
}
static ErrorsMade Max(ErrorsMade const & lhs, ErrorsMade const & rhs)
{
return Combine(lhs, rhs, [](size_t u, size_t v) { return std::max(u, v); });
}
friend ErrorsMade operator+(ErrorsMade const & lhs, ErrorsMade const & rhs)
{
return Combine(lhs, rhs, [](size_t u, size_t v) { return u + v; });

View file

@ -428,7 +428,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
{
string const countryName = "Wonderland";
TestCity chekhov(m2::PointD(0, 0), "Чехов", "ru", 100 /* rank */);
TestCity chekhov(m2::PointD(0, 0), "Чеховъ Антонъ Павловичъ", "ru", 100 /* rank */);
TestStreet pushkinskaya(
vector<m2::PointD>{m2::PointD(-0.5, -0.5), m2::PointD(0, 0), m2::PointD(0.5, 0.5)},
"Улица Пушкинская", "ru");
@ -442,17 +443,16 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
builder.Add(lermontov);
});
SetViewport(m2::RectD(m2::PointD(-1, -1), m2::PointD(1, 1)));
SetViewport(m2::RectD(-1, -1, 1, 1));
auto checkErrors = [&](string const & query, ErrorsMade const & errorsMade) {
auto request = MakeRequest(query, "ru");
auto const & results = request->Results();
TRules rules{ExactMatch(wonderlandId, lermontov)};
TEST(ResultsMatch(results, rules), ());
TEST_EQUAL(results.size(), 1, ());
TEST_EQUAL(results[0].GetRankingInfo().m_errorsMade, errorsMade, ());
TEST(ResultsMatch(results, rules), (query));
TEST_EQUAL(results.size(), 1, (query));
TEST_EQUAL(results[0].GetRankingInfo().m_errorsMade, errorsMade, (query));
};
checkErrors("кафе лермонтов", ErrorsMade(1));
@ -461,6 +461,13 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
checkErrors("пушкенская трактир лермонтов", ErrorsMade(3));
checkErrors("пушкенская кафе", ErrorsMade(1));
checkErrors("пушкинская трактиръ лермонтовъ", ErrorsMade(0));
checkErrors("лермонтовъ чехов", ErrorsMade(1));
checkErrors("лермонтовъ чеховъ", ErrorsMade(0));
checkErrors("лермонтов чехов", ErrorsMade(2));
checkErrors("лермонтов чеховъ", ErrorsMade(1));
checkErrors("лермонтов чеховъ антон павлович", ErrorsMade(3));
}
UNIT_CLASS_TEST(ProcessorTest, TestHouseNumbers)