diff --git a/search/ranker.cpp b/search/ranker.cpp index 914dff4620..73892bbc26 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -33,7 +33,7 @@ namespace search { namespace { -size_t GetMaxNumberOfErros(Geocoder::Params const & params) +size_t GetMaxNumberOfErrors(Geocoder::Params const & params) { size_t result = 0; for (size_t i = 0; i < params.GetNumTokens(); ++i) @@ -50,16 +50,14 @@ struct NameScoresEx : public NameScores template void UpdateNameScores(string const & name, Slice const & slice, NameScores & bestScores) { - auto const newScores = GetNameScores(name, slice); - bestScores = NameScores::BestScores(newScores, bestScores); + bestScores.UpdateIfBetter(GetNameScores(name, slice);); } template void UpdateNameScores(vector const & tokens, Slice const & slice, NameScores & bestScores) { - auto const newScores = GetNameScores(tokens, slice); - bestScores = NameScores::BestScores(newScores, bestScores); + bestScores.UpdateIfBetter(GetNameScores(tokens, slice)); } // This function supports only street names like "abcdstrasse"/"abcd strasse". @@ -427,7 +425,7 @@ class RankerResultMaker info.m_nameScore = nameScore; info.m_errorsMade = errorsMade; - info.m_maxErrorsMade = GetMaxNumberOfErros(m_params); + info.m_maxErrorsMade = GetMaxNumberOfErrors(m_params); info.m_matchedFraction = totalLength == 0 ? 1.0 : static_cast(matchedLength) / static_cast(totalLength); diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp index c4cf9838ef..17c347342d 100644 --- a/search/ranking_info.cpp +++ b/search/ranking_info.cpp @@ -184,6 +184,7 @@ double RankingInfo::GetErrorsMade() const if (m_maxErrorsMade == 0) return 0.0; + CHECK_GREATER_OR_EQUAL(m_maxErrorsMade, m_errorsMade.m_errorsMade, ()); return static_cast(m_errorsMade.m_errorsMade) / static_cast(m_maxErrorsMade); } } // namespace search diff --git a/search/ranking_utils.cpp b/search/ranking_utils.cpp index 90f77c493d..182dcccc3d 100644 --- a/search/ranking_utils.cpp +++ b/search/ranking_utils.cpp @@ -27,18 +27,6 @@ struct TokenInfo }; } // namespace -// static -NameScores NameScores::BestScores(NameScores const & lhs, NameScores const & rhs) -{ - if (lhs.m_nameScore != rhs.m_nameScore) - return lhs.m_nameScore > rhs.m_nameScore ? lhs : rhs; - - NameScores result = lhs; - result.m_errorsMade = ErrorsMade::Min(lhs.m_errorsMade, rhs.m_errorsMade); - - return result; -} - // CategoriesInfo ---------------------------------------------------------------------------------- CategoriesInfo::CategoriesInfo(feature::TypesHolder const & holder, TokenSlice const & tokens, Locales const & locales, CategoriesHolder const & categories) @@ -105,7 +93,7 @@ ErrorsMade GetPrefixErrorsMade(QueryParams::Token const & token, strings::UniStr auto it = dfa.Begin(); strings::DFAMove(it, s.begin(), s.end()); if (!it.Rejects()) - errorsMade = ErrorsMade::Min(errorsMade, ErrorsMade(it.ErrorsMade())); + errorsMade = ErrorsMade::Min(errorsMade, ErrorsMade(it.PrefixErrorsMade())); }); return errorsMade; @@ -147,4 +135,11 @@ string DebugPrint(NameScore score) } return "Unknown"; } + +string DebugPrint(NameScores scores) +{ + ostringstream os; + os << "[ " << DebugPrint(scores.m_nameScore) << ", " << DebugPrint(scores.m_errorsMade) << " ]"; + return os.str(); +} } // namespace search diff --git a/search/ranking_utils.hpp b/search/ranking_utils.hpp index 2df47eb731..5d1be824c8 100644 --- a/search/ranking_utils.hpp +++ b/search/ranking_utils.hpp @@ -97,9 +97,8 @@ std::string DebugPrint(ErrorsMade const & errorsMade); namespace impl { -// Returns the minimum number of errors needed to match |text| with -// any of the |tokens|. If it's not possible in accordance with -// GetMaxErrorsForToken(|text|), returns kInfiniteErrors. +// Returns the minimum number of errors needed to match |text| with |token|. +// If it's not possible in accordance with GetMaxErrorsForToken(|text|), returns kInfiniteErrors. ErrorsMade GetErrorsMade(QueryParams::Token const & token, strings::UniString const & text); ErrorsMade GetPrefixErrorsMade(QueryParams::Token const & token, strings::UniString const & text); } // namespace impl @@ -118,7 +117,28 @@ enum NameScore struct NameScores { - static NameScores BestScores(NameScores const & lhs, NameScores const & rhs); + NameScores() = default; + NameScores(NameScore nameScore, ErrorsMade const & errorsMade) + : m_nameScore(nameScore), m_errorsMade(errorsMade) + { + } + + void UpdateIfBetter(NameScores const & rhs) + { + if (rhs.m_nameScore > m_nameScore) + { + m_nameScore = rhs.m_nameScore; + m_errorsMade = rhs.m_errorsMade; + return; + } + if (rhs.m_nameScore == m_nameScore) + m_errorsMade = ErrorsMade::Min(m_errorsMade, rhs.m_errorsMade); + } + + bool operator==(NameScores const & rhs) + { + return m_nameScore == rhs.m_nameScore && m_errorsMade == rhs.m_errorsMade; + } NameScore m_nameScore = NAME_SCORE_ZERO; ErrorsMade m_errorsMade; @@ -157,7 +177,8 @@ NameScores GetNameScores(std::vector const & tokens, Slice c continue; auto const prefixErrorsMade = - impl::GetPrefixErrorsMade(slice.Get(m - 1), tokens[offset + m - 1]); + lastTokenIsPrefix ? impl::GetPrefixErrorsMade(slice.Get(m - 1), tokens[offset + m - 1]) + : ErrorsMade{}; auto const fullErrorsMade = impl::GetErrorsMade(slice.Get(m - 1), tokens[offset + m - 1]); if (!fullErrorsMade.IsValid() && !(prefixErrorsMade.IsValid() && lastTokenIsPrefix)) continue; @@ -169,16 +190,13 @@ NameScores GetNameScores(std::vector const & tokens, Slice c return scores; } + auto const newErrors = + lastTokenIsPrefix ? ErrorsMade::Min(fullErrorsMade, prefixErrorsMade) : fullErrorsMade; + if (offset == 0) - { - scores.m_nameScore = std::max(scores.m_nameScore, NAME_SCORE_PREFIX); - scores.m_errorsMade = totalErrorsMade + prefixErrorsMade; - } - else - { - scores.m_nameScore = std::max(scores.m_nameScore, NAME_SCORE_SUBSTRING); - scores.m_errorsMade = totalErrorsMade + prefixErrorsMade; - } + scores.UpdateIfBetter(NameScores(NAME_SCORE_PREFIX, totalErrorsMade + newErrors)); + + scores.UpdateIfBetter(NameScores(NAME_SCORE_SUBSTRING, totalErrorsMade + newErrors)); } return scores; } @@ -193,4 +211,5 @@ NameScores GetNameScores(std::string const & name, Slice const & slice) } std::string DebugPrint(NameScore score); +std::string DebugPrint(NameScores scores); } // namespace search diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index 0f780347cc..059843405c 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -556,7 +556,10 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade) TEST_EQUAL(results[0].GetRankingInfo().m_errorsMade, errorsMade, (query)); }; - checkErrors("кафе лермонтов", ErrorsMade(1)); + // Prefix match "лермонтов" -> "Лермонтовъ" without errors. + checkErrors("кафе лермонтов", ErrorsMade(0)); + checkErrors("кафе лермнтовъ", ErrorsMade(1)); + // Full match. checkErrors("трактир лермонтов", ErrorsMade(2)); checkErrors("кафе", ErrorsMade()); @@ -572,9 +575,14 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade) checkErrors("пушкенская кафе", ErrorsMade(1)); checkErrors("пушкинская трактиръ лермонтовъ", ErrorsMade(0)); - checkErrors("лермонтовъ чехов", ErrorsMade(1)); + // Prefix match "чехов" -> "Чеховъ" without errors. + checkErrors("лермонтовъ чехов", ErrorsMade(0)); + checkErrors("лермонтовъ чехов ", ErrorsMade(1)); checkErrors("лермонтовъ чеховъ", ErrorsMade(0)); - checkErrors("лермонтов чехов", ErrorsMade(2)); + + // Prefix match "чехов" -> "Чеховъ" without errors. + checkErrors("лермонтов чехов", ErrorsMade(1)); + checkErrors("лермонтов чехов ", ErrorsMade(2)); checkErrors("лермонтов чеховъ", ErrorsMade(1)); checkErrors("лермонтов чеховъ антон павлович", ErrorsMade(3)); diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index 7ee3b6c7b7..dbeff63c94 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -21,7 +21,7 @@ using namespace strings; namespace { -NameScore GetScore(string const & name, string const & query, TokenRange const & tokenRange) +NameScores GetScore(string const & name, string const & query, TokenRange const & tokenRange) { search::Delimiters delims; QueryParams params; @@ -39,26 +39,33 @@ NameScore GetScore(string const & name, string const & query, TokenRange const & params.InitNoPrefix(tokens.begin(), tokens.end()); } - return GetNameScores(name, TokenSlice(params, tokenRange)).m_nameScore; + return GetNameScores(name, TokenSlice(params, tokenRange)); } UNIT_TEST(NameTest_Smoke) { - TEST_EQUAL(GetScore("New York", "Central Park, New York, US", TokenRange(2, 4)), - NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("New York", "York", TokenRange(0, 1)), NAME_SCORE_SUBSTRING, ()); - TEST_EQUAL(GetScore("Moscow", "Red Square Mosc", TokenRange(2, 3)), NAME_SCORE_PREFIX, ()); - TEST_EQUAL(GetScore("Moscow", "Red Square Moscow", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("Moscow", "Red Square Moscw", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("San Francisco", "Fran", TokenRange(0, 1)), NAME_SCORE_SUBSTRING, ()); - TEST_EQUAL(GetScore("San Francisco", "Fran ", TokenRange(0, 1)), NAME_SCORE_ZERO, ()); - TEST_EQUAL(GetScore("San Francisco", "Sa", TokenRange(0, 1)), NAME_SCORE_PREFIX, ()); - TEST_EQUAL(GetScore("San Francisco", "San ", TokenRange(0, 1)), NAME_SCORE_PREFIX, ()); - TEST_EQUAL(GetScore("Лермонтовъ", "Лермон", TokenRange(0, 1)), NAME_SCORE_PREFIX, ()); - TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтов", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтово", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("Лермонтовъ", "Лермнтовъ", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH, ()); - TEST_EQUAL(GetScore("фото на документы", "фото", TokenRange(0, 1)), NAME_SCORE_PREFIX, ()); - TEST_EQUAL(GetScore("фотоателье", "фото", TokenRange(0, 1)), NAME_SCORE_PREFIX, ()); + auto const test = [](string const & name, string const & query, TokenRange const & tokenRange, + NameScore nameScore, size_t errorsMade) { + TEST_EQUAL( + GetScore(name, query, tokenRange), + NameScores(nameScore, nameScore == NAME_SCORE_ZERO ? ErrorsMade() : ErrorsMade(errorsMade)), + (name, query, tokenRange)); + }; + + test("New York", "Central Park, New York, US", TokenRange(2, 4), NAME_SCORE_FULL_MATCH, 0); + test("New York", "York", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0); + test("Moscow", "Red Square Mosc", TokenRange(2, 3), NAME_SCORE_PREFIX, 0); + test("Moscow", "Red Square Moscow", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 0); + test("Moscow", "Red Square Moscw", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 1); + test("San Francisco", "Fran", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0); + test("San Francisco", "Fran ", TokenRange(0, 1), NAME_SCORE_ZERO, 0); + test("San Francisco", "Sa", TokenRange(0, 1), NAME_SCORE_PREFIX, 0); + test("San Francisco", "San ", TokenRange(0, 1), NAME_SCORE_PREFIX, 0); + test("Лермонтовъ", "Лермон", TokenRange(0, 1), NAME_SCORE_PREFIX, 0); + test("Лермонтовъ", "Лермонтов", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1); + test("Лермонтовъ", "Лермонтово", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1); + test("Лермонтовъ", "Лермнтовъ", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1); + test("фото на документы", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0); + test("фотоателье", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0); } } // namespace