[search] Review fixes.

This commit is contained in:
tatiana-yan 2019-07-10 13:38:38 +03:00 committed by mpimenov
parent a04d3e210e
commit 0c77f4ff92
6 changed files with 82 additions and 54 deletions

View file

@ -33,7 +33,7 @@ namespace search
{
namespace
{
size_t GetMaxNumberOfErros(Geocoder::Params const & params)
size_t GetMaxNumberOfErrors(Geocoder::Params const & params)
{
size_t result = 0;
for (size_t i = 0; i < params.GetNumTokens(); ++i)
@ -50,16 +50,14 @@ struct NameScoresEx : public NameScores
template <typename Slice>
void UpdateNameScores(string const & name, Slice const & slice, NameScores & bestScores)
{
auto const newScores = GetNameScores(name, slice);
bestScores = NameScores::BestScores(newScores, bestScores);
bestScores.UpdateIfBetter(GetNameScores(name, slice););
}
template <typename Slice>
void UpdateNameScores(vector<strings::UniString> const & tokens, Slice const & slice,
NameScores & bestScores)
{
auto const newScores = GetNameScores(tokens, slice);
bestScores = NameScores::BestScores(newScores, bestScores);
bestScores.UpdateIfBetter(GetNameScores(tokens, slice));
}
// This function supports only street names like "abcdstrasse"/"abcd strasse".
@ -427,7 +425,7 @@ class RankerResultMaker
info.m_nameScore = nameScore;
info.m_errorsMade = errorsMade;
info.m_maxErrorsMade = GetMaxNumberOfErros(m_params);
info.m_maxErrorsMade = GetMaxNumberOfErrors(m_params);
info.m_matchedFraction =
totalLength == 0 ? 1.0
: static_cast<double>(matchedLength) / static_cast<double>(totalLength);

View file

@ -184,6 +184,7 @@ double RankingInfo::GetErrorsMade() const
if (m_maxErrorsMade == 0)
return 0.0;
CHECK_GREATER_OR_EQUAL(m_maxErrorsMade, m_errorsMade.m_errorsMade, ());
return static_cast<double>(m_errorsMade.m_errorsMade) / static_cast<double>(m_maxErrorsMade);
}
} // namespace search

View file

@ -27,18 +27,6 @@ struct TokenInfo
};
} // namespace
// static
NameScores NameScores::BestScores(NameScores const & lhs, NameScores const & rhs)
{
if (lhs.m_nameScore != rhs.m_nameScore)
return lhs.m_nameScore > rhs.m_nameScore ? lhs : rhs;
NameScores result = lhs;
result.m_errorsMade = ErrorsMade::Min(lhs.m_errorsMade, rhs.m_errorsMade);
return result;
}
// CategoriesInfo ----------------------------------------------------------------------------------
CategoriesInfo::CategoriesInfo(feature::TypesHolder const & holder, TokenSlice const & tokens,
Locales const & locales, CategoriesHolder const & categories)
@ -105,7 +93,7 @@ ErrorsMade GetPrefixErrorsMade(QueryParams::Token const & token, strings::UniStr
auto it = dfa.Begin();
strings::DFAMove(it, s.begin(), s.end());
if (!it.Rejects())
errorsMade = ErrorsMade::Min(errorsMade, ErrorsMade(it.ErrorsMade()));
errorsMade = ErrorsMade::Min(errorsMade, ErrorsMade(it.PrefixErrorsMade()));
});
return errorsMade;
@ -147,4 +135,11 @@ string DebugPrint(NameScore score)
}
return "Unknown";
}
string DebugPrint(NameScores scores)
{
ostringstream os;
os << "[ " << DebugPrint(scores.m_nameScore) << ", " << DebugPrint(scores.m_errorsMade) << " ]";
return os.str();
}
} // namespace search

View file

@ -97,9 +97,8 @@ std::string DebugPrint(ErrorsMade const & errorsMade);
namespace impl
{
// Returns the minimum number of errors needed to match |text| with
// any of the |tokens|. If it's not possible in accordance with
// GetMaxErrorsForToken(|text|), returns kInfiniteErrors.
// Returns the minimum number of errors needed to match |text| with |token|.
// If it's not possible in accordance with GetMaxErrorsForToken(|text|), returns kInfiniteErrors.
ErrorsMade GetErrorsMade(QueryParams::Token const & token, strings::UniString const & text);
ErrorsMade GetPrefixErrorsMade(QueryParams::Token const & token, strings::UniString const & text);
} // namespace impl
@ -118,7 +117,28 @@ enum NameScore
struct NameScores
{
static NameScores BestScores(NameScores const & lhs, NameScores const & rhs);
NameScores() = default;
NameScores(NameScore nameScore, ErrorsMade const & errorsMade)
: m_nameScore(nameScore), m_errorsMade(errorsMade)
{
}
void UpdateIfBetter(NameScores const & rhs)
{
if (rhs.m_nameScore > m_nameScore)
{
m_nameScore = rhs.m_nameScore;
m_errorsMade = rhs.m_errorsMade;
return;
}
if (rhs.m_nameScore == m_nameScore)
m_errorsMade = ErrorsMade::Min(m_errorsMade, rhs.m_errorsMade);
}
bool operator==(NameScores const & rhs)
{
return m_nameScore == rhs.m_nameScore && m_errorsMade == rhs.m_errorsMade;
}
NameScore m_nameScore = NAME_SCORE_ZERO;
ErrorsMade m_errorsMade;
@ -157,7 +177,8 @@ NameScores GetNameScores(std::vector<strings::UniString> const & tokens, Slice c
continue;
auto const prefixErrorsMade =
impl::GetPrefixErrorsMade(slice.Get(m - 1), tokens[offset + m - 1]);
lastTokenIsPrefix ? impl::GetPrefixErrorsMade(slice.Get(m - 1), tokens[offset + m - 1])
: ErrorsMade{};
auto const fullErrorsMade = impl::GetErrorsMade(slice.Get(m - 1), tokens[offset + m - 1]);
if (!fullErrorsMade.IsValid() && !(prefixErrorsMade.IsValid() && lastTokenIsPrefix))
continue;
@ -169,16 +190,13 @@ NameScores GetNameScores(std::vector<strings::UniString> const & tokens, Slice c
return scores;
}
auto const newErrors =
lastTokenIsPrefix ? ErrorsMade::Min(fullErrorsMade, prefixErrorsMade) : fullErrorsMade;
if (offset == 0)
{
scores.m_nameScore = std::max(scores.m_nameScore, NAME_SCORE_PREFIX);
scores.m_errorsMade = totalErrorsMade + prefixErrorsMade;
}
else
{
scores.m_nameScore = std::max(scores.m_nameScore, NAME_SCORE_SUBSTRING);
scores.m_errorsMade = totalErrorsMade + prefixErrorsMade;
}
scores.UpdateIfBetter(NameScores(NAME_SCORE_PREFIX, totalErrorsMade + newErrors));
scores.UpdateIfBetter(NameScores(NAME_SCORE_SUBSTRING, totalErrorsMade + newErrors));
}
return scores;
}
@ -193,4 +211,5 @@ NameScores GetNameScores(std::string const & name, Slice const & slice)
}
std::string DebugPrint(NameScore score);
std::string DebugPrint(NameScores scores);
} // namespace search

View file

@ -556,7 +556,10 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
TEST_EQUAL(results[0].GetRankingInfo().m_errorsMade, errorsMade, (query));
};
checkErrors("кафе лермонтов", ErrorsMade(1));
// Prefix match "лермонтов" -> "Лермонтовъ" without errors.
checkErrors("кафе лермонтов", ErrorsMade(0));
checkErrors("кафе лермнтовъ", ErrorsMade(1));
// Full match.
checkErrors("трактир лермонтов", ErrorsMade(2));
checkErrors("кафе", ErrorsMade());
@ -572,9 +575,14 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
checkErrors("пушкенская кафе", ErrorsMade(1));
checkErrors("пушкинская трактиръ лермонтовъ", ErrorsMade(0));
checkErrors("лермонтовъ чехов", ErrorsMade(1));
// Prefix match "чехов" -> "Чеховъ" without errors.
checkErrors("лермонтовъ чехов", ErrorsMade(0));
checkErrors("лермонтовъ чехов ", ErrorsMade(1));
checkErrors("лермонтовъ чеховъ", ErrorsMade(0));
checkErrors("лермонтов чехов", ErrorsMade(2));
// Prefix match "чехов" -> "Чеховъ" without errors.
checkErrors("лермонтов чехов", ErrorsMade(1));
checkErrors("лермонтов чехов ", ErrorsMade(2));
checkErrors("лермонтов чеховъ", ErrorsMade(1));
checkErrors("лермонтов чеховъ антон павлович", ErrorsMade(3));

View file

@ -21,7 +21,7 @@ using namespace strings;
namespace
{
NameScore GetScore(string const & name, string const & query, TokenRange const & tokenRange)
NameScores GetScore(string const & name, string const & query, TokenRange const & tokenRange)
{
search::Delimiters delims;
QueryParams params;
@ -39,26 +39,33 @@ NameScore GetScore(string const & name, string const & query, TokenRange const &
params.InitNoPrefix(tokens.begin(), tokens.end());
}
return GetNameScores(name, TokenSlice(params, tokenRange)).m_nameScore;
return GetNameScores(name, TokenSlice(params, tokenRange));
}
UNIT_TEST(NameTest_Smoke)
{
TEST_EQUAL(GetScore("New York", "Central Park, New York, US", TokenRange(2, 4)),
NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("New York", "York", TokenRange(0, 1)), NAME_SCORE_SUBSTRING, ());
TEST_EQUAL(GetScore("Moscow", "Red Square Mosc", TokenRange(2, 3)), NAME_SCORE_PREFIX, ());
TEST_EQUAL(GetScore("Moscow", "Red Square Moscow", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("Moscow", "Red Square Moscw", TokenRange(2, 3)), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("San Francisco", "Fran", TokenRange(0, 1)), NAME_SCORE_SUBSTRING, ());
TEST_EQUAL(GetScore("San Francisco", "Fran ", TokenRange(0, 1)), NAME_SCORE_ZERO, ());
TEST_EQUAL(GetScore("San Francisco", "Sa", TokenRange(0, 1)), NAME_SCORE_PREFIX, ());
TEST_EQUAL(GetScore("San Francisco", "San ", TokenRange(0, 1)), NAME_SCORE_PREFIX, ());
TEST_EQUAL(GetScore("Лермонтовъ", "Лермон", TokenRange(0, 1)), NAME_SCORE_PREFIX, ());
TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтов", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("Лермонтовъ", "Лермонтово", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("Лермонтовъ", "Лермнтовъ", TokenRange(0, 1)), NAME_SCORE_FULL_MATCH, ());
TEST_EQUAL(GetScore("фото на документы", "фото", TokenRange(0, 1)), NAME_SCORE_PREFIX, ());
TEST_EQUAL(GetScore("фотоателье", "фото", TokenRange(0, 1)), NAME_SCORE_PREFIX, ());
auto const test = [](string const & name, string const & query, TokenRange const & tokenRange,
NameScore nameScore, size_t errorsMade) {
TEST_EQUAL(
GetScore(name, query, tokenRange),
NameScores(nameScore, nameScore == NAME_SCORE_ZERO ? ErrorsMade() : ErrorsMade(errorsMade)),
(name, query, tokenRange));
};
test("New York", "Central Park, New York, US", TokenRange(2, 4), NAME_SCORE_FULL_MATCH, 0);
test("New York", "York", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0);
test("Moscow", "Red Square Mosc", TokenRange(2, 3), NAME_SCORE_PREFIX, 0);
test("Moscow", "Red Square Moscow", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 0);
test("Moscow", "Red Square Moscw", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 1);
test("San Francisco", "Fran", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0);
test("San Francisco", "Fran ", TokenRange(0, 1), NAME_SCORE_ZERO, 0);
test("San Francisco", "Sa", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
test("San Francisco", "San ", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
test("Лермонтовъ", "Лермон", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
test("Лермонтовъ", "Лермонтов", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1);
test("Лермонтовъ", "Лермонтово", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1);
test("Лермонтовъ", "Лермнтовъ", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1);
test("фото на документы", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
test("фотоателье", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
}
} // namespace