diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index 978824389c..67075a5cfd 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -560,9 +560,9 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade) checkErrors("трактир лермонтов", ErrorsMade(2)); checkErrors("кафе", ErrorsMade()); - checkErrors("Yesenina cafe", ErrorsMade(0)); - checkErrors("Esenina cafe", ErrorsMade(1)); - checkErrors("Jesenina cafe", ErrorsMade(1)); + checkErrors("Cafe Yesenina", ErrorsMade(0)); + checkErrors("Cafe Esenina", ErrorsMade(1)); + checkErrors("Cafe Jesenina", ErrorsMade(1)); checkErrors("Островского кафе", ErrorsMade(0)); checkErrors("Астровского кафе", ErrorsMade(1)); @@ -2052,5 +2052,106 @@ UNIT_CLASS_TEST(ProcessorTest, StreetSynonymsWithMisprints) } } +UNIT_CLASS_TEST(ProcessorTest, HouseOnStreetSynonymsWithMisprints) +{ + string const countryName = "Wonderland"; + + TestStreet tverskoi(vector{m2::PointD(1.0, -1.0), m2::PointD(1.0, 1.0)}, + "Tverskoi Boulevard", "en"); + TestStreet leninsky(vector{m2::PointD(0.0, -1.0), m2::PointD(0.0, 1.0)}, + "Leninsky Avenue", "en"); + TestStreet mira(vector{m2::PointD(-1.0, -1.0), m2::PointD(-1.0, 1.0)}, + "Проспект Мира", "ru"); + + TestPOI houseTverskoi(m2::PointD(1.0, 0.0), "", "en"); + houseTverskoi.SetHouseNumber("3"); + houseTverskoi.SetStreetName(tverskoi.GetName("en")); + + TestPOI houseLeninsky(m2::PointD(0.0, 0.0), "", "en"); + houseLeninsky.SetHouseNumber("5"); + houseLeninsky.SetStreetName(leninsky.GetName("en")); + + TestPOI houseMira(m2::PointD(-1.0, 0.0), "", "en"); + houseMira.SetHouseNumber("7"); + houseMira.SetStreetName(mira.GetName("ru")); + + auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder) { + builder.Add(tverskoi); + builder.Add(leninsky); + builder.Add(mira); + builder.Add(houseTverskoi); + builder.Add(houseLeninsky); + builder.Add(houseMira); + }); + + auto alternativeMatch = [this](string const & query, Rules const & rules1, Rules const & rules2) { + TestSearchRequest request(m_engine, query, "en", Mode::Everywhere, m_viewport); + request.Run(); + return MatchResults(m_dataSource, rules1, request.Results()) || + MatchResults(m_dataSource, rules2, request.Results()); + }; + + SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0))); + { + Rules rules = {ExactMatch(countryId, houseTverskoi)}; + Rules rulesWithStreet = {ExactMatch(countryId, houseTverskoi), ExactMatch(countryId, tverskoi)}; + TEST(alternativeMatch("tverskoi 3", rules, rulesWithStreet), ()); + TEST(alternativeMatch("tverskoi boulevard 3", rules, rulesWithStreet), ()); + TEST(alternativeMatch("tverskoi bulevard 3", rules, rulesWithStreet), ()); + TEST(alternativeMatch("tverskoi blvd 3", rules, rulesWithStreet), ()); + TEST(alternativeMatch("tverskoi blvrd 3", rules, rulesWithStreet), ()); + TEST(alternativeMatch("tverskoi boulevrd 3", rules, rulesWithStreet), ()); + TEST(alternativeMatch("tverskoi bolevard 3", rules, rulesWithStreet), ()); + } + { + Rules rules = {ExactMatch(countryId, houseLeninsky)}; + Rules rulesWithStreet = {ExactMatch(countryId, houseLeninsky), ExactMatch(countryId, leninsky)}; + TEST(alternativeMatch("leninsky 5", rules, rulesWithStreet), ()); + TEST(alternativeMatch("leninsky avenue 5", rules, rulesWithStreet), ()); + TEST(alternativeMatch("leninsky avenu 5", rules, rulesWithStreet), ()); + TEST(alternativeMatch("leninsky avneue 5", rules, rulesWithStreet), ()); + TEST(alternativeMatch("leninsky av 5", rules, rulesWithStreet), ()); + } + { + Rules rules = {ExactMatch(countryId, houseMira)}; + Rules rulesWithStreet = {ExactMatch(countryId, houseMira), ExactMatch(countryId, mira)}; + TEST(alternativeMatch("мира 7", rules, rulesWithStreet), ()); + TEST(alternativeMatch("проспект мира 7", rules, rulesWithStreet), ()); + TEST(alternativeMatch("пропект мира 7", rules, rulesWithStreet), ()); + TEST(alternativeMatch("прсопект мира 7", rules, rulesWithStreet), ()); + TEST(alternativeMatch("пр-т мира 7", rules, rulesWithStreet), ()); + } +} + +UNIT_CLASS_TEST(ProcessorTest, StreetSynonymPrefixMatch) +{ + string const countryName = "Wonderland"; + + TestStreet yesenina( + vector{m2::PointD(0.5, -0.5), m2::PointD(0, 0), m2::PointD(-0.5, 0.5)}, + "Yesenina street", "en"); + + TestPOI cafe(m2::PointD(0, 0), "", "en"); + cafe.SetTypes({{"amenity", "cafe"}}); + + auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder) { + builder.Add(yesenina); + builder.Add(cafe); + }); + + SetViewport(m2::RectD(-1, -1, 1, 1)); + { + Rules rules = {ExactMatch(countryId, cafe)}; + TEST(ResultsMatch("Yesenina cafe ", rules), ()); + TEST(ResultsMatch("Cafe Yesenina ", rules), ()); + TEST(ResultsMatch("Cafe Yesenina", rules), ()); + } + { + Rules rules = {ExactMatch(countryId, cafe), ExactMatch(countryId, yesenina)}; + // Prefix match with misprints to street synonym gives street as additional result + // but we still can find the cafe. + TEST(ResultsMatch("Yesenina cafe", rules), ()); + } +} } // namespace } // namespace search diff --git a/search/streets_matcher.cpp b/search/streets_matcher.cpp index e7cd4b4720..d0ff133b2b 100644 --- a/search/streets_matcher.cpp +++ b/search/streets_matcher.cpp @@ -16,11 +16,43 @@ namespace search { namespace { -bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Prediction const & rhs) +bool LessByHashAndRange(StreetsMatcher::Prediction const & lhs, + StreetsMatcher::Prediction const & rhs) { if (lhs.m_hash != rhs.m_hash) return lhs.m_hash < rhs.m_hash; + if (lhs.GetNumTokens() != rhs.GetNumTokens()) + return lhs.GetNumTokens() > rhs.GetNumTokens(); + + if (lhs.m_tokenRange.Begin() != rhs.m_tokenRange.Begin()) + return lhs.m_tokenRange.Begin() < rhs.m_tokenRange.Begin(); + + if (lhs.m_prob != rhs.m_prob) + return lhs.m_prob > rhs.m_prob; + + if (lhs.m_withMisprints != rhs.m_withMisprints) + return rhs.m_withMisprints; + + return false; +} + +bool EqualsByHashAndRange(StreetsMatcher::Prediction const & lhs, + StreetsMatcher::Prediction const & rhs) +{ + return lhs.GetNumTokens() == rhs.GetNumTokens() && + lhs.m_tokenRange.Begin() == rhs.m_tokenRange.Begin() && lhs.m_hash == rhs.m_hash; +} + +bool LessByHashAndMisprints(StreetsMatcher::Prediction const & lhs, + StreetsMatcher::Prediction const & rhs) +{ + if (lhs.m_hash != rhs.m_hash) + return lhs.m_hash < rhs.m_hash; + + if (lhs.m_withMisprints != rhs.m_withMisprints) + return rhs.m_withMisprints; + if (lhs.m_prob != rhs.m_prob) return lhs.m_prob > rhs.m_prob; @@ -29,6 +61,12 @@ bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Predicti return lhs.m_tokenRange.Begin() < rhs.m_tokenRange.Begin(); } + +bool EqualsByHashAndMisprints(StreetsMatcher::Prediction const & lhs, + StreetsMatcher::Prediction const & rhs) +{ + return lhs.m_withMisprints == rhs.m_withMisprints && lhs.m_hash == rhs.m_hash; +} } // namespace // static @@ -44,10 +82,25 @@ void StreetsMatcher::Go(BaseContext const & ctx, FeaturesFilter const & filter, if (predictions.empty()) return; - sort(predictions.begin(), predictions.end(), &LessByHash); - predictions.erase( - unique(predictions.begin(), predictions.end(), base::EqualsBy(&Prediction::m_hash)), - predictions.end()); + // Remove predictions with the same m_hash (features) and token range. + sort(predictions.begin(), predictions.end(), &LessByHashAndRange); + predictions.erase(unique(predictions.begin(), predictions.end(), &EqualsByHashAndRange), + predictions.end()); + + // Leave the most probable and longest prediction for predictions with the same m_hash (features) + // and m_withMisprints. + // We will still distinguish parses with the same m_hash (features) but different range and m_withMisprints. + // For example, for "Paramount dive" we will have two parses: + // STREET UNUSED (can be matched to poi later) + // Paramount dive + // + // STREET STREET ("drive" with misprints) + // Paramount dive + // + // The parses will have the same features and hash but we need both of them. + sort(predictions.begin(), predictions.end(), &LessByHashAndMisprints); + predictions.erase(unique(predictions.begin(), predictions.end(), &EqualsByHashAndMisprints), + predictions.end()); sort(predictions.rbegin(), predictions.rend(), base::LessBy(&Prediction::m_prob)); while (predictions.size() > kMaxNumOfImprobablePredictions && @@ -66,33 +119,8 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & if (ctx.IsTokenUsed(startToken)) continue; - // Here we try to match as many tokens as possible while - // intersection is a non-empty bit vector of streets. Single - // tokens that are synonyms to streets are ignored. Moreover, - // each time a token that looks like a beginning of a house number - // is met, we try to use current intersection of tokens as a - // street layer and try to match BUILDINGs or POIs. - CBV streets(ctx.m_streets); - - CBV all; - all.SetFull(); - - size_t curToken = startToken; - - // This variable is used for prevention of duplicate calls to - // CreateStreetsLayerAndMatchLowerLayers() with the same - // arguments. - size_t lastToken = startToken; - - // When true, no bit vectors were intersected with |streets| at all. - bool emptyIntersection = true; - - // When true, |streets| is in the incomplete state and can't be - // used for creation of street layers. - bool incomplete = false; - - auto emit = [&]() - { + auto emit = [&](CBV const & streets, CBV const & all, size_t curToken, size_t lastToken, + bool emptyIntersection, bool incomplete, bool withMisprints) { if (!streets.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken) { CBV fs(streets); @@ -121,11 +149,36 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & prediction.m_features = move(fs); prediction.m_hash = prediction.m_features.Hash(); + prediction.m_withMisprints = withMisprints; } }; - auto findStreets = [&](bool withMisprints) - { + auto findStreets = [&](bool withMisprints) { + // Here we try to match as many tokens as possible while + // intersection is a non-empty bit vector of streets. Single + // tokens that are synonyms to streets are ignored. Moreover, + // each time a token that looks like a beginning of a house number + // is met, we try to use current intersection of tokens as a + // street layer and try to match BUILDINGs or POIs. + CBV streets(ctx.m_streets); + + CBV all; + all.SetFull(); + + size_t curToken = startToken; + + // This variable is used for prevention of duplicate calls to + // CreateStreetsLayerAndMatchLowerLayers() with the same + // arguments. + size_t lastToken = startToken; + + // When true, no bit vectors were intersected with |streets| at all. + bool emptyIntersection = true; + + // When true, |streets| is in the incomplete state and can't be + // used for creation of street layers. + bool incomplete = false; + StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag) { auto buffer = streets.Intersect(ctx.m_features[tag].m_features); @@ -148,7 +201,8 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & // the intersection. Therefore we need // to create streets layer right now. if (buffer.IsEmpty()) - emit(); + emit(streets, all, curToken, lastToken, emptyIntersection, + incomplete, withMisprints); streets = buffer; all = all.Intersect(ctx.m_features[tag].m_features); @@ -164,11 +218,11 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & bool const isPrefix = params.IsPrefixToken(curToken); if (house_numbers::LooksLikeHouseNumber(token, isPrefix)) - emit(); + emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints); filter.Put(token, isPrefix, curToken); } - emit(); + emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints); }; findStreets(false /* withMisprints */); diff --git a/search/streets_matcher.hpp b/search/streets_matcher.hpp index a22a8aebc3..6f695eb4e7 100644 --- a/search/streets_matcher.hpp +++ b/search/streets_matcher.hpp @@ -22,6 +22,7 @@ public: CBV m_features; TokenRange m_tokenRange; + bool m_withMisprints = false; double m_prob = 0.0; uint64_t m_hash = 0; };