diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 9bc79abd07..9a59f00b7a 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -128,6 +128,32 @@ UniString NormalizeAndSimplifyString(string const & s) */ } +void PreprocessBeforeTokenization(strings::UniString & query) +{ + search::Delimiters const delims; + vector> const replacements = { + {MakeUniString("пр-т"), MakeUniString("проспект")}, + {MakeUniString("пр-д"), MakeUniString("проезд")}, + {MakeUniString("наб-я"), MakeUniString("набережная")}}; + + for (auto const & replacement : replacements) + { + auto start = query.begin(); + while ((start = std::search(start, query.end(), replacement.first.begin(), + replacement.first.end())) != query.end()) + { + auto end = start + replacement.first.size(); + if ((start == query.begin() || delims(*(start - 1))) && (end == query.end() || delims(*end))) + { + auto const dist = distance(query.begin(), start); + query.Replace(start, end, replacement.second.begin(), replacement.second.end()); + start = query.begin() + dist; + } + start += 1; + } + } +} + UniString FeatureTypeToString(uint32_t type) { string const s = "!type:" + to_string(type); diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 45e65fab18..74de6a716b 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -16,6 +16,10 @@ namespace search // It does some magic text transformation which greatly helps us to improve our search. strings::UniString NormalizeAndSimplifyString(std::string const & s); +// Replace abbreviations which can be split during tokenization with full form. +// Eg. "пр-т" -> "проспект". +void PreprocessBeforeTokenization(strings::UniString & query); + template void SplitUniString(strings::UniString const & uniS, Fn && f, Delims const & delims) { diff --git a/search/processor.cpp b/search/processor.cpp index 3c919afbe7..61dc41fd6c 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -209,7 +209,9 @@ void Processor::SetQuery(string const & query) vector tokens; { search::DelimitersWithExceptions delims(vector{'#'}); - SplitUniString(NormalizeAndSimplifyString(query), base::MakeBackInsertFunctor(tokens), delims); + auto normalizedQuery = NormalizeAndSimplifyString(query); + PreprocessBeforeTokenization(normalizedQuery); + SplitUniString(normalizedQuery, base::MakeBackInsertFunctor(tokens), delims); } search::Delimiters delims; diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index c4f41fcaf8..a060412beb 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -1765,5 +1765,48 @@ UNIT_CLASS_TEST(ProcessorTest, SynonymsTest) TEST(ResultsMatch("собор св петра ", rules), ()); } } + +UNIT_CLASS_TEST(ProcessorTest, PreprocessBeforeTokenizationTest) +{ + string const countryName = "Wonderland"; + + TestStreet prt( + vector{m2::PointD(0.5, -0.5), m2::PointD(0.0, 0.0), m2::PointD(-0.5, 0.5)}, + "Октябрьский проспект", "ru"); + + TestStreet prd( + vector{m2::PointD(-0.5, -0.5), m2::PointD(0.0, 0.0), m2::PointD(0.5, 0.5)}, + "Жуков проезд", "ru"); + + TestStreet nabya( + vector{m2::PointD(0.0, -0.5), m2::PointD(0.0, 0.0), m2::PointD(0.0, 0.5)}, + "Москворецкая набережная", "ru"); + + auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder) { + builder.Add(prt); + builder.Add(prd); + builder.Add(nabya); + }); + + SetViewport(m2::RectD(-1, -1, 1, 1)); + { + Rules rules = {ExactMatch(wonderlandId, prt)}; + TEST(ResultsMatch("Октябрьский проспект", rules), ()); + TEST(ResultsMatch("пр-т Октябрьский", rules), ()); + TEST(ResultsMatch("Октябрьский пр-т", rules), ()); + } + { + Rules rules = {ExactMatch(wonderlandId, prd)}; + TEST(ResultsMatch("Жуков проезд", rules), ()); + TEST(ResultsMatch("пр-д Жуков", rules), ()); + TEST(ResultsMatch("Жуков пр-д", rules), ()); + } + { + Rules rules = {ExactMatch(wonderlandId, nabya)}; + TEST(ResultsMatch("Москворецкая набережная", rules), ()); + TEST(ResultsMatch("наб-я Москворецкая", rules), ()); + TEST(ResultsMatch("Москворецкая наб-я", rules), ()); + } +} } // namespace } // namespace search