diff --git a/indexer/indexer_tests/search_string_utils_test.cpp b/indexer/indexer_tests/search_string_utils_test.cpp index 9d27fe3e46..8e0dab4b77 100644 --- a/indexer/indexer_tests/search_string_utils_test.cpp +++ b/indexer/indexer_tests/search_string_utils_test.cpp @@ -9,12 +9,12 @@ #include #include +namespace search_string_utils_test +{ using namespace search; using namespace std; using namespace strings; -namespace -{ class Utf8StreetTokensFilter { public: @@ -60,14 +60,13 @@ string NormalizeAndSimplifyStringUtf8(string const & s) { return strings::ToUtf8(NormalizeAndSimplifyString(s)); } -} // namespace UNIT_TEST(FeatureTypeToString) { TEST_EQUAL("!type:123", ToUtf8(FeatureTypeToString(123)), ()); } -UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines) +UNIT_TEST(NormalizeAndSimplifyString_WithOurTambourines) { // This test is dependent from strings::NormalizeAndSimplifyString implementation. // TODO: Fix it when logic with и-й will change. @@ -96,10 +95,10 @@ UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines) }; for (size_t i = 0; i < ARRAY_SIZE(arr); i += 2) - TEST_EQUAL(arr[i + 1], ToUtf8(NormalizeAndSimplifyString(arr[i])), (i)); + TEST_EQUAL(arr[i + 1], NormalizeAndSimplifyStringUtf8(arr[i]), ()); } -UNIT_TEST(Contains) +UNIT_TEST(NormalizeAndSimplifyString_Contains) { constexpr char const * kTestStr = "ØøÆ挜 Ўвага!"; TEST(ContainsNormalized(kTestStr, ""), ()); @@ -258,9 +257,12 @@ UNIT_TEST(StreetTokensFilter) UNIT_TEST(NormalizeAndSimplifyString_Numero) { + /// @todo Consider adding "numero" processing in SplitUniString instead of NormalizeAndSimplifyString. TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Зона №51"), "зона 51", ()); TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Зона № 51"), "зона 51", ()); TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area #51"), "area 51", ()); TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area # "), "area ", ()); TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area #One"), "area #one", ()); } + +} // namespace search_string_utils_test diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 71c0509fdf..dc734d35f3 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -3,13 +3,10 @@ #include "indexer/search_delimiters.hpp" #include "base/levenshtein_dfa.hpp" -#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" -#include #include #include -#include #include namespace search @@ -28,12 +25,35 @@ strings::UniString NormalizeAndSimplifyString(std::string_view s); void PreprocessBeforeTokenization(strings::UniString & query); template -void SplitUniString(strings::UniString const & uniS, Fn && f, Delims const & delims) +void SplitUniString(strings::UniString const & uniS, Fn && fn, Delims const & delims) { - using namespace strings; - TokenizeIterator iter(uniS.begin(), uniS.end(), delims); - for (; iter; ++iter) - f(iter.GetUniString()); + size_t const count = uniS.size(); + size_t i = 0; + while (true) + { + while (i < count && delims(uniS[i])) + ++i; + if (i >= count) + break; + + size_t j = i + 1; + while (j < count && !delims(uniS[j])) + ++j; + + auto const beg = uniS.begin(); + strings::UniString str(beg + i, beg + j); + + // Transform "xyz's" -> "xyzs". + if (j+1 < count && uniS[j] == '\'' && uniS[j+1] == 's' && (j+2 == count || delims(uniS[j+2]))) + { + str.push_back(uniS[j+1]); + j += 2; + } + + fn(std::move(str)); + + i = j; + } } template diff --git a/search/search_quality/search_quality_tests/real_mwm_tests.cpp b/search/search_quality/search_quality_tests/real_mwm_tests.cpp index d624b52794..afa98d66a9 100644 --- a/search/search_quality/search_quality_tests/real_mwm_tests.cpp +++ b/search/search_quality/search_quality_tests/real_mwm_tests.cpp @@ -241,6 +241,25 @@ UNIT_CLASS_TEST(MwmTestsFixture, Lyon_Aldi) TEST_LESS(dist, 4000, ()); } +// https://github.com/organicmaps/organicmaps/issues/1262 +UNIT_CLASS_TEST(MwmTestsFixture, NY_BarnesNoble) +{ + // New York + ms::LatLon const center(40.7355019, -73.9948155); + SetViewportAndLoadMaps(center); + + auto request = MakeRequest("barne's & noble"); + auto const & results = request->Results(); + TEST_GREATER(results.size(), 10, ()); + + TEST_EQUAL(results[0].GetFeatureType(), classif().GetTypeByPath({"amenity", "cafe"}), ()); + + Range const range(results, 1); + EqualClassifType(range, GetClassifTypes({{"shop", "books"}})); + double const dist = SortedByDistance(range, center); + TEST_LESS(dist, 2000, ()); +} + // https://github.com/organicmaps/organicmaps/issues/2470 UNIT_CLASS_TEST(MwmTestsFixture, Hamburg_Park) { diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index d8fd03e11c..beca4dd812 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -87,6 +87,10 @@ UNIT_TEST(NameScore_Smoke) test("Barnes & Noble", "barne & noble", NameScore::FULL_MATCH, 1, 10); test("Barnes Avenue", "barne ", NameScore::FULL_PREFIX, 1, 5); test("Barnes Avenue", "barne & noble", NameScore::SUBSTRING, 1, 5); + + test("Barnes Avenue", "barne's & noble", NameScore::SUBSTRING, 0, 6); + test("Barnes & Noble", "barne's & noble", NameScore::FULL_MATCH, 0, 11); + test("Barne's & Noble", "barnes & noble", NameScore::FULL_MATCH, 0, 11); } UNIT_TEST(NameScore_SubstringVsErrors) diff --git a/search/search_tests/string_match_test.cpp b/search/search_tests/string_match_test.cpp index 2c665d9b31..594c647f4d 100644 --- a/search/search_tests/string_match_test.cpp +++ b/search/search_tests/string_match_test.cpp @@ -43,7 +43,7 @@ uint32_t PrefixMatchCost(char const * a, char const * b) void TestEqual(vector const & v, base::StringIL const & expected) { - TEST_EQUAL(v.size(), expected.size(), ()); + TEST_EQUAL(v.size(), expected.size(), (expected)); size_t i = 0; for (auto const & e : expected) @@ -106,7 +106,15 @@ UNIT_TEST(StringSplit_Smoke) { TestEqual(NormalizeAndTokenizeString("1/2"), { "1", "2" }); TestEqual(NormalizeAndTokenizeString("xxx-yyy"), { "xxx", "yyy" }); - TestEqual(NormalizeAndTokenizeString("Barne's & Noble"), { "barne", "s", "noble" }); +} + +UNIT_TEST(StringSplit_Apostrophe) +{ + TestEqual(NormalizeAndTokenizeString("Barne's & Noble"), { "barnes", "noble" }); + TestEqual(NormalizeAndTokenizeString("Michael's"), { "michaels" }); + TestEqual(NormalizeAndTokenizeString("'s"), { "s" }); + TestEqual(NormalizeAndTokenizeString("xyz'"), { "xyz" }); + TestEqual(NormalizeAndTokenizeString("'''"), { }); } } // namespace string_match_test