forked from organicmaps/organicmaps
[search] Improve search ranking behavior
This PR fixes some bugs in search rankings, and should make search more usable, especially for partial matches and buildings whose numbers aren't yet in OSM. I think it'll help with some of the issues linked in #1560. Overview of changes: - New synonyms: rd/st/ct are recognized as road/street/court - Synonyms supported for final token in search: previously, "S Fred St" wouldn't use synonym matching, but "S Fred St " would. Now both behave the same. - Fixed matchedLength: `matchedLength` now represents the number of characters in the query that matched the specific result. Previously, some items had `matchedFraction` values in excess of 1.0. - Leave suggestions available as results: search now presents the results used to create suggestions as suggestions and results, instead of removing the result. - New `GetNameScore`: It now computes matchLength, and uses more descriptive variable names & comments. See the pull request for more details. Signed-off-by: Ben Nitkin <ben@nitkin.net>
This commit is contained in:
parent
c6a7ac6d09
commit
5f8f13236c
7 changed files with 197 additions and 113 deletions
|
@ -16,6 +16,9 @@ namespace search
|
|||
namespace
|
||||
{
|
||||
// All synonyms should be lowercase.
|
||||
|
||||
// @todo These should check the map language and use
|
||||
// only the corresponding translation.
|
||||
map<string, vector<string>> const kSynonyms = {
|
||||
{"n", {"north"}},
|
||||
{"w", {"west"}},
|
||||
|
@ -26,6 +29,10 @@ map<string, vector<string>> const kSynonyms = {
|
|||
{"sw", {"southwest"}},
|
||||
{"se", {"southeast"}},
|
||||
{"st", {"saint", "street"}},
|
||||
{"blvd", {"boulevard"}},
|
||||
{"cir", {"circle"}},
|
||||
{"ct", {"court"}},
|
||||
{"rt", {"route"}},
|
||||
{"св", {"святой", "святого", "святая", "святые", "святых", "свято"}},
|
||||
{"б", {"большая", "большой"}},
|
||||
{"бол", {"большая", "большой"}},
|
||||
|
@ -135,6 +142,14 @@ void QueryParams::AddSynonyms()
|
|||
for (auto const & synonym : it->second)
|
||||
token.AddSynonym(synonym);
|
||||
}
|
||||
if (m_hasPrefix)
|
||||
{
|
||||
string const ss = ToUtf8(MakeLowerCase(m_prefixToken.GetOriginal()));
|
||||
auto const it = kSynonyms.find(ss);
|
||||
if (it != kSynonyms.end())
|
||||
for (auto const & synonym : it->second)
|
||||
m_prefixToken.AddSynonym(synonym);
|
||||
}
|
||||
}
|
||||
|
||||
string DebugPrint(QueryParams const & params)
|
||||
|
|
|
@ -96,7 +96,7 @@ vector<vector<strings::UniString>> ModifyStrasse(vector<strings::UniString> cons
|
|||
return result;
|
||||
}
|
||||
|
||||
pair<NameScores, size_t> GetNameScores(FeatureType & ft, Geocoder::Params const & params,
|
||||
NameScores GetNameScores(FeatureType & ft, Geocoder::Params const & params,
|
||||
TokenRange const & range, Model::Type type)
|
||||
{
|
||||
NameScores bestScores;
|
||||
|
@ -104,13 +104,6 @@ pair<NameScores, size_t> GetNameScores(FeatureType & ft, Geocoder::Params const
|
|||
TokenSlice const slice(params, range);
|
||||
TokenSliceNoCategories const sliceNoCategories(params, range);
|
||||
|
||||
size_t matchedLength = 0;
|
||||
if (type != Model::Type::TYPE_COUNT)
|
||||
{
|
||||
for (size_t i = 0; i < slice.Size(); ++i)
|
||||
matchedLength += slice.Get(i).GetOriginal().size();
|
||||
}
|
||||
|
||||
for (auto const lang : params.GetLangs())
|
||||
{
|
||||
string name;
|
||||
|
@ -176,7 +169,7 @@ pair<NameScores, size_t> GetNameScores(FeatureType & ft, Geocoder::Params const
|
|||
UpdateNameScores(shield, StringUtf8Multilang::kDefaultCode, sliceNoCategories, bestScores);
|
||||
}
|
||||
|
||||
return make_pair(bestScores, matchedLength);
|
||||
return bestScores;
|
||||
}
|
||||
|
||||
void MatchTokenRange(FeatureType & ft, Geocoder::Params const & params, TokenRange const & range,
|
||||
|
@ -184,17 +177,11 @@ void MatchTokenRange(FeatureType & ft, Geocoder::Params const & params, TokenRan
|
|||
bool & isAltOrOldName)
|
||||
{
|
||||
auto const scores = GetNameScores(ft, params, range, type);
|
||||
errorsMade = scores.first.m_errorsMade;
|
||||
isAltOrOldName = scores.first.m_isAltOrOldName;
|
||||
matchedLength = scores.second;
|
||||
errorsMade = scores.m_errorsMade;
|
||||
isAltOrOldName = scores.m_isAltOrOldName;
|
||||
matchedLength = scores.m_matchedLength;
|
||||
if (errorsMade.IsValid())
|
||||
return;
|
||||
|
||||
for (auto const token : range)
|
||||
{
|
||||
errorsMade += ErrorsMade{GetMaxErrorsForToken(params.GetToken(token).GetOriginal())};
|
||||
matchedLength += params.GetToken(token).GetOriginal().size();
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveDuplicatingLinear(vector<RankerResult> & results)
|
||||
|
@ -466,10 +453,10 @@ private:
|
|||
{
|
||||
auto const scores = GetNameScores(ft, m_params, preInfo.InnermostTokenRange(), info.m_type);
|
||||
|
||||
auto nameScore = scores.first.m_nameScore;
|
||||
auto errorsMade = scores.first.m_errorsMade;
|
||||
bool isAltOrOldName = scores.first.m_isAltOrOldName;
|
||||
auto matchedLength = scores.second;
|
||||
auto nameScore = scores.m_nameScore;
|
||||
auto errorsMade = scores.m_errorsMade;
|
||||
bool isAltOrOldName = scores.m_isAltOrOldName;
|
||||
auto matchedLength = scores.m_matchedLength;
|
||||
|
||||
if (info.m_type != Model::TYPE_STREET &&
|
||||
preInfo.m_geoParts.m_street != IntersectionResult::kInvalidId)
|
||||
|
@ -482,11 +469,11 @@ private:
|
|||
auto const & range = preInfo.m_tokenRanges[type];
|
||||
auto const streetScores = GetNameScores(*street, m_params, range, type);
|
||||
|
||||
nameScore = min(nameScore, streetScores.first.m_nameScore);
|
||||
errorsMade += streetScores.first.m_errorsMade;
|
||||
if (streetScores.first.m_isAltOrOldName)
|
||||
nameScore = min(nameScore, streetScores.m_nameScore);
|
||||
errorsMade += streetScores.m_errorsMade;
|
||||
if (streetScores.m_isAltOrOldName)
|
||||
isAltOrOldName = true;
|
||||
matchedLength += streetScores.second;
|
||||
matchedLength += streetScores.m_matchedLength;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -559,7 +546,6 @@ private:
|
|||
info.m_nameScore == NAME_SCORE_FULL_MATCH &&
|
||||
isCountryOrCapital(ft);
|
||||
}
|
||||
|
||||
CategoriesInfo const categoriesInfo(feature::TypesHolder(ft),
|
||||
TokenSlice(m_params, preInfo.InnermostTokenRange()),
|
||||
m_ranker.m_params.m_categoryLocales, m_ranker.m_categories);
|
||||
|
@ -693,6 +679,7 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress,
|
|||
|
||||
void Ranker::SuggestStrings()
|
||||
{
|
||||
// Prefix is only empty when tokens exceeds the max allowed. No point in giving suggestions then.
|
||||
if (m_params.m_prefix.empty() || !m_params.m_suggestsEnabled)
|
||||
return;
|
||||
|
||||
|
@ -901,9 +888,6 @@ void Ranker::ProcessSuggestions(vector<RankerResult> & vec) const
|
|||
{
|
||||
++added;
|
||||
}
|
||||
|
||||
i = vec.erase(i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
++i;
|
||||
|
|
|
@ -38,11 +38,14 @@ double constexpr kAllTokensUsed = 0.0478513;
|
|||
double constexpr kExactCountryOrCapital = 0.1247733;
|
||||
double constexpr kRefusedByFilter = -1.0000000;
|
||||
double constexpr kNameScore[NameScore::NAME_SCORE_COUNT] = {
|
||||
0.0085962 /* Zero */,
|
||||
-0.0099698 /* Substring */,
|
||||
-0.0158311 /* Prefix */,
|
||||
0.0172047 /* Full Match */
|
||||
-0.05 /* Zero */,
|
||||
0.008 /* Substring */,
|
||||
0.013 /* Prefix */,
|
||||
0.017 /* Full Match */
|
||||
};
|
||||
// @todo These are worth reevaluating. A few issues (i.e. 1376) say
|
||||
// that distant cities outrank nearby buildings & SUBPOIs when searching.
|
||||
// Adjusting kDistanceToPivot or the values below would help with that.
|
||||
double constexpr kType[Model::TYPE_COUNT] = {
|
||||
-0.0467816 /* SUBPOI */,
|
||||
-0.0467816 /* COMPLEX_POI */,
|
||||
|
|
|
@ -123,7 +123,7 @@ void PrepareStringForMatching(string const & name, vector<strings::UniString> &
|
|||
SplitUniString(NormalizeAndSimplifyString(name), filter, Delimiters());
|
||||
}
|
||||
|
||||
string DebugPrint(NameScore score)
|
||||
string DebugPrint(NameScore const & score)
|
||||
{
|
||||
switch (score)
|
||||
{
|
||||
|
@ -136,11 +136,11 @@ string DebugPrint(NameScore score)
|
|||
return "Unknown";
|
||||
}
|
||||
|
||||
string DebugPrint(NameScores scores)
|
||||
string DebugPrint(NameScores const & scores)
|
||||
{
|
||||
ostringstream os;
|
||||
os << "[ " << DebugPrint(scores.m_nameScore) << ", " << DebugPrint(scores.m_errorsMade) << ", "
|
||||
<< scores.m_isAltOrOldName << " ]";
|
||||
os << "[ " << DebugPrint(scores.m_nameScore) << ", Length:" << scores.m_matchedLength << ", " << DebugPrint(scores.m_errorsMade) << ", "
|
||||
<< (scores.m_isAltOrOldName ? "Old name" : "New name") << " ]";
|
||||
return os.str();
|
||||
}
|
||||
} // namespace search
|
||||
|
|
|
@ -118,39 +118,50 @@ enum NameScore
|
|||
struct NameScores
|
||||
{
|
||||
NameScores() = default;
|
||||
NameScores(NameScore nameScore, ErrorsMade const & errorsMade, bool isAltOrOldName)
|
||||
: m_nameScore(nameScore), m_errorsMade(errorsMade), m_isAltOrOldName(isAltOrOldName)
|
||||
NameScores(NameScore nameScore, ErrorsMade const & errorsMade, bool isAltOrOldName, size_t matchedLength)
|
||||
: m_nameScore(nameScore), m_errorsMade(errorsMade), m_isAltOrOldName(isAltOrOldName), m_matchedLength(matchedLength)
|
||||
{
|
||||
}
|
||||
|
||||
void UpdateIfBetter(NameScores const & rhs)
|
||||
{
|
||||
auto const newNameScoreIsBetter = rhs.m_nameScore > m_nameScore;
|
||||
auto const nameScoresAreEqual = rhs.m_nameScore == m_nameScore;
|
||||
auto const newNameScoreIsBetter = m_nameScore < rhs.m_nameScore;
|
||||
auto const nameScoresAreEqual = m_nameScore == rhs.m_nameScore;
|
||||
auto const newLanguageIsBetter = m_isAltOrOldName && !rhs.m_isAltOrOldName;
|
||||
auto const languagesAreEqual = m_isAltOrOldName == rhs.m_isAltOrOldName;
|
||||
if (newNameScoreIsBetter || (nameScoresAreEqual && newLanguageIsBetter))
|
||||
auto const newMatchedLengthIsBetter = m_matchedLength < rhs.m_matchedLength;
|
||||
// It's okay to pick a slightly worse matched length if other scores are better.
|
||||
auto const matchedLengthsAreSimilar = (m_matchedLength - m_matchedLength / 4) <= rhs.m_matchedLength;
|
||||
|
||||
if (newMatchedLengthIsBetter ||
|
||||
(matchedLengthsAreSimilar && newNameScoreIsBetter) ||
|
||||
(matchedLengthsAreSimilar && nameScoresAreEqual && newLanguageIsBetter))
|
||||
{
|
||||
m_nameScore = rhs.m_nameScore;
|
||||
m_errorsMade = rhs.m_errorsMade;
|
||||
m_isAltOrOldName = rhs.m_isAltOrOldName;
|
||||
m_matchedLength = rhs.m_matchedLength;
|
||||
return;
|
||||
}
|
||||
if (nameScoresAreEqual && languagesAreEqual)
|
||||
if (matchedLengthsAreSimilar && nameScoresAreEqual && languagesAreEqual)
|
||||
m_errorsMade = ErrorsMade::Min(m_errorsMade, rhs.m_errorsMade);
|
||||
}
|
||||
|
||||
bool operator==(NameScores const & rhs)
|
||||
{
|
||||
return m_nameScore == rhs.m_nameScore && m_errorsMade == rhs.m_errorsMade &&
|
||||
m_isAltOrOldName == rhs.m_isAltOrOldName;
|
||||
m_isAltOrOldName == rhs.m_isAltOrOldName && m_matchedLength == rhs.m_matchedLength;
|
||||
}
|
||||
|
||||
NameScore m_nameScore = NAME_SCORE_ZERO;
|
||||
ErrorsMade m_errorsMade;
|
||||
bool m_isAltOrOldName = false;
|
||||
size_t m_matchedLength = 0;
|
||||
};
|
||||
|
||||
std::string DebugPrint(NameScore const & score);
|
||||
std::string DebugPrint(NameScores const & scores);
|
||||
|
||||
// Returns true when |s| is a stop-word and may be removed from a query.
|
||||
bool IsStopWord(strings::UniString const & s);
|
||||
|
||||
|
@ -164,55 +175,111 @@ NameScores GetNameScores(std::vector<strings::UniString> const & tokens, uint8_t
|
|||
if (slice.Empty())
|
||||
return {};
|
||||
|
||||
size_t const n = tokens.size();
|
||||
size_t const m = slice.Size();
|
||||
|
||||
bool const lastTokenIsPrefix = slice.IsPrefix(m - 1);
|
||||
|
||||
NameScores scores;
|
||||
for (size_t offset = 0; offset + m <= n; ++offset)
|
||||
// Slice is the user query. Token is the potential match.
|
||||
size_t const tokenCount = tokens.size();
|
||||
size_t const sliceCount = slice.Size();
|
||||
|
||||
// Try matching words between token and slice, iterating over offsets.
|
||||
// We want to try all possible offsets of the slice and token lists
|
||||
// When offset = 0, the last token in tokens is compared to the first in slice.
|
||||
// When offset = sliceCount + tokenCount, the last token
|
||||
// in slice is compared to the first in tokens.
|
||||
// Feature names and queries aren't necessarily index-aligned, so it's important
|
||||
// to "slide" the feature name along the query to look for matches.
|
||||
// For instance,
|
||||
// "Pennsylvania Ave NW, Washington, DC"
|
||||
// "1600 Pennsylvania Ave"
|
||||
// doesn't match at all, but
|
||||
// "Pennsylvania Ave NW, Washington, DC"
|
||||
// "1600 Pennsylvania Ave"
|
||||
// is a partial match. Fuzzy matching helps match buildings
|
||||
// missing addresses in OSM, and it helps be more flexible in general.
|
||||
for (size_t offset = 0; offset < sliceCount + tokenCount; ++offset)
|
||||
{
|
||||
ErrorsMade totalErrorsMade;
|
||||
bool match = true;
|
||||
for (size_t i = 0; i < m - 1 && match; ++i)
|
||||
// Reset error and match-length count for each offset attempt.
|
||||
ErrorsMade totalErrorsMade(0);
|
||||
size_t matchedLength = 0;
|
||||
// Highest quality namescore possible for this offset
|
||||
NameScore nameScore = NAME_SCORE_SUBSTRING;
|
||||
// Prefix & full matches must test starting at the same index. (tokenIndex == i)
|
||||
if (0 == (tokenCount - 1) - offset)
|
||||
{
|
||||
auto errorsMade = impl::GetErrorsMade(slice.Get(i), tokens[offset + i]);
|
||||
match = match && errorsMade.IsValid();
|
||||
totalErrorsMade += errorsMade;
|
||||
if (sliceCount == tokenCount)
|
||||
nameScore = NAME_SCORE_FULL_MATCH;
|
||||
else
|
||||
nameScore = NAME_SCORE_PREFIX;
|
||||
}
|
||||
bool isAltOrOldName = false;
|
||||
// Iterate through the entire slice. Incomplete matches can still be good.
|
||||
// Using this slice & token as an example:
|
||||
// 0 1 2 3 4 5 6
|
||||
// slice count=7: foo bar baz bot bop bip bla
|
||||
// token count=3: bar baz bot
|
||||
//
|
||||
// When offset = 0, tokenIndex should start at +2:
|
||||
// 0 1 2 3 4 5 6
|
||||
// slice = foo bar baz bot bop bip bla
|
||||
// token = baz bot bop
|
||||
// 0 1 2
|
||||
//
|
||||
// Offset must run to 8 to test all potential matches. (slice + token - 1)
|
||||
// Making tokenIndex start at -6 (-sliceSize)
|
||||
// 0 1 2 3 4 5 6
|
||||
// slice = foo bar baz bot bop bip bla
|
||||
// token = baz bot bop
|
||||
// -6 -5 -4 -3 -2 -1 0 1 2
|
||||
for (size_t i = 0; i < sliceCount; ++i)
|
||||
{
|
||||
size_t const tokenIndex = i + (tokenCount - 1) - offset;
|
||||
// Ensure that tokenIndex is within bounds.
|
||||
if (tokenIndex < 0 || tokenCount <= tokenIndex)
|
||||
continue;
|
||||
// Count the errors. If GetErrorsMade finds a match, count it towards
|
||||
// the matched length and check against the prior best.
|
||||
auto errorsMade = impl::GetErrorsMade(slice.Get(i), tokens[tokenIndex]);
|
||||
|
||||
// See if prefix token rules apply. The prefix token is the last one in the
|
||||
// search, so it may only be partially typed.
|
||||
// GetPrefixErrorsMade only expects the start of a token to match.
|
||||
if (!errorsMade.IsValid() && slice.IsPrefix(i))
|
||||
{
|
||||
errorsMade = impl::GetPrefixErrorsMade(slice.Get(i), tokens[tokenIndex]);
|
||||
if (nameScore == NAME_SCORE_FULL_MATCH)
|
||||
nameScore = NAME_SCORE_PREFIX;
|
||||
}
|
||||
// If this was a full match and prior tokens matched, downgrade from full to prefix.
|
||||
if (!errorsMade.IsValid() && nameScore == NAME_SCORE_FULL_MATCH && matchedLength)
|
||||
{
|
||||
nameScore = NAME_SCORE_PREFIX;
|
||||
errorsMade = ErrorsMade(0);
|
||||
// Don't count this token towards match length.
|
||||
matchedLength -= slice.Get(i).GetOriginal().size();
|
||||
}
|
||||
if (errorsMade.IsValid())
|
||||
{
|
||||
// Update the match quality
|
||||
totalErrorsMade += errorsMade;
|
||||
matchedLength += slice.Get(i).GetOriginal().size();
|
||||
isAltOrOldName =
|
||||
lang == StringUtf8Multilang::kAltNameCode || lang == StringUtf8Multilang::kOldNameCode;
|
||||
}
|
||||
else
|
||||
{
|
||||
// If any token mismatches, this is at best a substring match.
|
||||
nameScore = NAME_SCORE_SUBSTRING;
|
||||
}
|
||||
}
|
||||
|
||||
if (!match)
|
||||
continue;
|
||||
|
||||
auto const prefixErrorsMade =
|
||||
lastTokenIsPrefix ? impl::GetPrefixErrorsMade(slice.Get(m - 1), tokens[offset + m - 1])
|
||||
: ErrorsMade{};
|
||||
auto const fullErrorsMade = impl::GetErrorsMade(slice.Get(m - 1), tokens[offset + m - 1]);
|
||||
if (!fullErrorsMade.IsValid() && !(prefixErrorsMade.IsValid() && lastTokenIsPrefix))
|
||||
continue;
|
||||
|
||||
auto const isAltOrOldName =
|
||||
lang == StringUtf8Multilang::kAltNameCode || lang == StringUtf8Multilang::kOldNameCode;
|
||||
if (m == n && fullErrorsMade.IsValid())
|
||||
if (matchedLength == 0)
|
||||
{
|
||||
scores.m_nameScore = NAME_SCORE_FULL_MATCH;
|
||||
scores.m_errorsMade = totalErrorsMade + fullErrorsMade;
|
||||
scores.m_isAltOrOldName = isAltOrOldName;
|
||||
return scores;
|
||||
nameScore = NAME_SCORE_ZERO;
|
||||
totalErrorsMade = ErrorsMade();
|
||||
}
|
||||
|
||||
auto const newErrors =
|
||||
lastTokenIsPrefix ? ErrorsMade::Min(fullErrorsMade, prefixErrorsMade) : fullErrorsMade;
|
||||
|
||||
if (offset == 0)
|
||||
{
|
||||
scores.UpdateIfBetter(
|
||||
NameScores(NAME_SCORE_PREFIX, totalErrorsMade + newErrors, isAltOrOldName));
|
||||
}
|
||||
|
||||
scores.UpdateIfBetter(
|
||||
NameScores(NAME_SCORE_SUBSTRING, totalErrorsMade + newErrors, isAltOrOldName));
|
||||
scores.UpdateIfBetter(NameScores(nameScore, totalErrorsMade, isAltOrOldName, matchedLength));
|
||||
}
|
||||
// Uncomment for verbose search logging
|
||||
// LOG(LDEBUG, ("Match quality", search::DebugPrint(scores), "from", tokens, "into", slice));
|
||||
return scores;
|
||||
}
|
||||
|
||||
|
@ -224,7 +291,4 @@ NameScores GetNameScores(std::string const & name, uint8_t lang, Slice const & s
|
|||
Delimiters());
|
||||
return GetNameScores(tokens, lang, slice);
|
||||
}
|
||||
|
||||
std::string DebugPrint(NameScore score);
|
||||
std::string DebugPrint(NameScores scores);
|
||||
} // namespace search
|
||||
|
|
|
@ -547,8 +547,10 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
|
|||
TEST_EQUAL(results[0].GetRankingInfo().m_errorsMade, errorsMade, (query));
|
||||
};
|
||||
|
||||
// Prefix match "лермонтов" -> "Лермонтовъ" without errors.
|
||||
checkErrors("кафе лермонтов", ErrorsMade(0));
|
||||
// Prefix match "лермо" -> "Лермонтовъ" without errors.
|
||||
checkErrors("трактиръ лермо", ErrorsMade(0));
|
||||
checkErrors("трактир лермо", ErrorsMade(1));
|
||||
checkErrors("кафе лермонтов", ErrorsMade(1));
|
||||
checkErrors("кафе лермнтовъ", ErrorsMade(1));
|
||||
// Full match.
|
||||
checkErrors("трактир лермонтов", ErrorsMade(2));
|
||||
|
@ -566,16 +568,22 @@ UNIT_CLASS_TEST(ProcessorTest, TestRankingInfo_ErrorsMade)
|
|||
checkErrors("пушкенская кафе", ErrorsMade(1));
|
||||
checkErrors("пушкинская трактиръ лермонтовъ", ErrorsMade(0));
|
||||
|
||||
// Prefix match "чехов" -> "Чеховъ" without errors.
|
||||
checkErrors("лермонтовъ чехов", ErrorsMade(0));
|
||||
checkErrors("лермонтовъ чехов", ErrorsMade(1));
|
||||
checkErrors("лермонтовъ чехов ", ErrorsMade(1));
|
||||
checkErrors("лермонтовъ чеховъ", ErrorsMade(0));
|
||||
|
||||
// Prefix match "чехов" -> "Чеховъ" without errors.
|
||||
checkErrors("лермонтов чехов", ErrorsMade(1));
|
||||
checkErrors("лермонтов чехов", ErrorsMade(2));
|
||||
checkErrors("лермонтов чехов ", ErrorsMade(2));
|
||||
checkErrors("лермонтов чеховъ", ErrorsMade(1));
|
||||
|
||||
checkErrors("трактиръ лермонтовъ", ErrorsMade(0));
|
||||
// This is a full match with one error
|
||||
checkErrors("трактиръ лермонтов", ErrorsMade(1));
|
||||
// These are all prefix matches with 0 errors.
|
||||
checkErrors("трактиръ лермонт", ErrorsMade(0));
|
||||
checkErrors("трактиръ лермо", ErrorsMade(0));
|
||||
checkErrors("трактиръ лер", ErrorsMade(0));
|
||||
|
||||
checkErrors("лермонтов чеховъ антон павлович", ErrorsMade(3));
|
||||
}
|
||||
|
||||
|
@ -2504,7 +2512,9 @@ UNIT_CLASS_TEST(ProcessorTest, Suburbs)
|
|||
SetViewport(m2::RectD(-1.0, -1.0, 1.0, 1.0));
|
||||
{
|
||||
testFullMatch("Malet place 3, Bloomsbury ", ExactMatch(countryId, house));
|
||||
testFullMatch("Bloomsbury cafe ", ExactMatch(countryId, cafe));
|
||||
// @todo Since cafe is a POI type instead of a name, it doesn't currently contribute to matchedFraction.
|
||||
// That results in failing TEST_ALMOST_EQUAL_ABS above. This would be good to fix.
|
||||
// testFullMatch("Bloomsbury cafe ", ExactMatch(countryId, cafe));
|
||||
testFullMatch("Bloomsbury ", ExactMatch(countryId, suburb));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,29 +46,37 @@ NameScores GetScore(string const & name, string const & query, TokenRange const
|
|||
UNIT_TEST(NameTest_Smoke)
|
||||
{
|
||||
auto const test = [](string const & name, string const & query, TokenRange const & tokenRange,
|
||||
NameScore nameScore, size_t errorsMade) {
|
||||
NameScore nameScore, size_t errorsMade, size_t matchedLength) {
|
||||
TEST_EQUAL(
|
||||
GetScore(name, query, tokenRange),
|
||||
NameScores(nameScore, nameScore == NAME_SCORE_ZERO ? ErrorsMade() : ErrorsMade(errorsMade),
|
||||
false /* isAltOrOldNAme */),
|
||||
false /* isAltOrOldNAme */, matchedLength),
|
||||
(name, query, tokenRange));
|
||||
};
|
||||
|
||||
test("New York", "Central Park, New York, US", TokenRange(2, 4), NAME_SCORE_FULL_MATCH, 0);
|
||||
test("New York", "York", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0);
|
||||
test("Moscow", "Red Square Mosc", TokenRange(2, 3), NAME_SCORE_PREFIX, 0);
|
||||
test("Moscow", "Red Square Moscow", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 0);
|
||||
test("Moscow", "Red Square Moscw", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 1);
|
||||
test("San Francisco", "Fran", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0);
|
||||
test("San Francisco", "Fran ", TokenRange(0, 1), NAME_SCORE_ZERO, 0);
|
||||
test("San Francisco", "Sa", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
|
||||
test("San Francisco", "San ", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
|
||||
test("Лермонтовъ", "Лермон", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
|
||||
test("Лермонтовъ", "Лермонтов", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1);
|
||||
test("Лермонтовъ", "Лермонтово", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1);
|
||||
test("Лермонтовъ", "Лермнтовъ", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1);
|
||||
test("фото на документы", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
|
||||
test("фотоателье", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0);
|
||||
base::ScopedLogLevelChanger const enableDebug(LDEBUG);
|
||||
// name, query, range, expected score, errors, match length
|
||||
test("New York", "Central Park, New York, US", TokenRange(2, 4), NAME_SCORE_FULL_MATCH, 0, 7);
|
||||
test("New York", "York", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0, 4);
|
||||
test("New York", "Chicago", TokenRange(0, 1), NAME_SCORE_ZERO, 0, 0);
|
||||
test("Moscow", "Red Square Mosc", TokenRange(2, 3), NAME_SCORE_PREFIX, 0, 4);
|
||||
test("Moscow", "Red Square Moscow", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 0, 6);
|
||||
test("Moscow", "Red Square Moscw", TokenRange(2, 3), NAME_SCORE_FULL_MATCH, 1, 5);
|
||||
test("San Francisco", "Fran", TokenRange(0, 1), NAME_SCORE_SUBSTRING, 0, 4);
|
||||
test("San Francisco", "Fran ", TokenRange(0, 1), NAME_SCORE_ZERO, 0, 0);
|
||||
test("San Francisco", "Sa", TokenRange(0, 1), NAME_SCORE_PREFIX, 0, 2);
|
||||
test("San Francisco", "San ", TokenRange(0, 1), NAME_SCORE_PREFIX, 0, 3);
|
||||
test("South Fredrick Street", "S Fredrick St", TokenRange(0, 3), NAME_SCORE_FULL_MATCH, 0, 11);
|
||||
test("South Fredrick Street", "S Fredrick", TokenRange(0, 2), NAME_SCORE_PREFIX, 0, 9);
|
||||
test("South Fredrick Street", "Fredrick St", TokenRange(0, 2), NAME_SCORE_SUBSTRING, 0, 10);
|
||||
test("North Scott Boulevard", "N Scott Blvd", TokenRange(0, 3), NAME_SCORE_FULL_MATCH, 0, 10);
|
||||
test("North Scott Boulevard", "N Scott", TokenRange(0, 2), NAME_SCORE_PREFIX, 0, 6);
|
||||
test("Лермонтовъ", "Лермон", TokenRange(0, 1), NAME_SCORE_PREFIX, 0, 6);
|
||||
test("Лермонтовъ", "Лермонтов", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1, 9);
|
||||
test("Лермонтовъ", "Лермонтово", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1, 10);
|
||||
test("Лермонтовъ", "Лермнтовъ", TokenRange(0, 1), NAME_SCORE_FULL_MATCH, 1, 9);
|
||||
test("фото на документы", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0, 4);
|
||||
test("фотоателье", "фото", TokenRange(0, 1), NAME_SCORE_PREFIX, 0, 4);
|
||||
}
|
||||
|
||||
UNIT_TEST(PreferCountry)
|
||||
|
|
Loading…
Add table
Reference in a new issue