From a88ecc82f202a8604b1d2ce4bfcdeda325bb54c7 Mon Sep 17 00:00:00 2001 From: Viktor Govako Date: Wed, 3 May 2023 15:57:48 -0300 Subject: [PATCH] [search] Don't use alt or old names in street matching. Signed-off-by: Viktor Govako --- coding/string_utf8_multilang.hpp | 6 ++++++ search/ranker.cpp | 9 ++++----- search/ranking_utils.hpp | 3 +-- search/reverse_geocoder.cpp | 10 ++++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/coding/string_utf8_multilang.hpp b/coding/string_utf8_multilang.hpp index 48d4109d3d..051e69343d 100644 --- a/coding/string_utf8_multilang.hpp +++ b/coding/string_utf8_multilang.hpp @@ -92,6 +92,12 @@ public: static Languages const & GetSupportedLanguages(); + // These names require separate search/street processing. + static bool IsAltOrOldName(int8_t langCode) + { + return langCode == kAltNameCode || langCode == kOldNameCode; + } + /// @returns kUnsupportedLanguageCode if language is not recognized. static int8_t GetLangIndex(std::string_view lang); /// @returns empty string if langCode is invalid. diff --git a/search/ranker.cpp b/search/ranker.cpp index 8dd3f499d3..f823b9f9d1 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -37,7 +37,7 @@ namespace template void UpdateNameScores(string_view name, uint8_t lang, Slice const & slice, NameScores & bestScores) { - if (lang == StringUtf8Multilang::kAltNameCode || lang == StringUtf8Multilang::kOldNameCode) + if (StringUtf8Multilang::IsAltOrOldName(lang)) { strings::Tokenize(name, ";", [&](string_view n) { @@ -129,7 +129,7 @@ NameScores GetNameScores(FeatureType & ft, Geocoder::Params const & params, } }; - if (lang == StringUtf8Multilang::kAltNameCode || lang == StringUtf8Multilang::kOldNameCode) + if (StringUtf8Multilang::IsAltOrOldName(lang)) { strings::Tokenize(name, ";", [&updateScore](string_view n) { @@ -875,7 +875,7 @@ void Ranker::GetBestMatchName(FeatureType & f, string & name) const auto bestNameFinder = [&](int8_t lang, string_view s) { - if (lang == StringUtf8Multilang::kAltNameCode || lang == StringUtf8Multilang::kOldNameCode) + if (StringUtf8Multilang::IsAltOrOldName(lang)) { strings::Tokenize(s, ";", [lang, &updateScore](std::string_view n) { @@ -899,8 +899,7 @@ void Ranker::GetBestMatchName(FeatureType & f, string & name) const }; UNUSED_VALUE(f.ForEachName(bestNameFinder)); - if (bestLang == StringUtf8Multilang::kAltNameCode || - bestLang == StringUtf8Multilang::kOldNameCode) + if (StringUtf8Multilang::IsAltOrOldName(bestLang)) { string_view const readableName = f.GetReadableName(); // Do nothing if alt/old name is the only name we have. diff --git a/search/ranking_utils.hpp b/search/ranking_utils.hpp index 9172023c38..1983e4aef9 100644 --- a/search/ranking_utils.hpp +++ b/search/ranking_utils.hpp @@ -273,8 +273,7 @@ NameScores GetNameScores(std::vector const & tokens, uint8_t // Update the match quality totalErrorsMade += errorsMade; matchedLength += slice.Get(i).GetOriginal().size(); - isAltOrOldName = - lang == StringUtf8Multilang::kAltNameCode || lang == StringUtf8Multilang::kOldNameCode; + isAltOrOldName = StringUtf8Multilang::IsAltOrOldName(lang); } else { diff --git a/search/reverse_geocoder.cpp b/search/reverse_geocoder.cpp index 85ea680a4f..62809ff17b 100644 --- a/search/reverse_geocoder.cpp +++ b/search/reverse_geocoder.cpp @@ -109,15 +109,17 @@ optional ReverseGeocoder::GetMatchedStreetIndex(string_view keyName, for (auto const & street : streets) { bool fullMatchFound = false; - street.m_multilangName.ForEach([&](int8_t /* langCode */, string_view name) + street.m_multilangName.ForEach([&](int8_t lang, string_view name) { if (fullMatchFound) return; - strings::UniString const actual = GetStreetNameAsKey(name, ignoreStreetSynonyms); + // Skip _non-language_ names for street<->address matching. + if (StringUtf8Multilang::IsAltOrOldName(lang)) + return; - size_t const editDistance = - strings::EditDistance(key.begin(), key.end(), actual.begin(), actual.end()); + strings::UniString const actual = GetStreetNameAsKey(name, ignoreStreetSynonyms); + size_t const editDistance = strings::EditDistance(key.begin(), key.end(), actual.begin(), actual.end()); if (editDistance == 0) {