From 13ddedc0459fc9b60d55d55feae75970776cbc13 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Fri, 17 Feb 2017 14:14:24 +0300 Subject: [PATCH] [search] Fixed stop words processing in house numbers matching. --- search/house_numbers_matcher.cpp | 9 +++++++-- search/search_tests/house_numbers_matcher_test.cpp | 13 +++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/search/house_numbers_matcher.cpp b/search/house_numbers_matcher.cpp index 8a08350331..06da74bf05 100644 --- a/search/house_numbers_matcher.cpp +++ b/search/house_numbers_matcher.cpp @@ -98,7 +98,7 @@ const char * const g_buildingPartSynonyms[] = { "корп", "кор", "литер", "лит", "строение", "стр", "блок", "бл"}; // List of common stop words for buildings. Constructed by hand. -UniString const g_stopWords[] = {MakeUniString("дом"), MakeUniString("house")}; +UniString const g_stopWords[] = {MakeUniString("дом"), MakeUniString("house"), MakeUniString("д")}; bool IsStopWord(UniString const & s, bool isPrefix) { @@ -213,9 +213,14 @@ public: return false; // fallthrough } + case Token::TYPE_LETTER: + { + if (j == 0 && IsStopWord(token.m_value, token.m_prefix)) + break; + // fallthrough + } case Token::TYPE_NUMBER: // fallthrough case Token::TYPE_BUILDING_PART: // fallthrough - case Token::TYPE_LETTER: // fallthrough case Token::TYPE_BUILDING_PART_OR_LETTER: parse[i] = move(parse[j]); ++i; diff --git a/search/search_tests/house_numbers_matcher_test.cpp b/search/search_tests/house_numbers_matcher_test.cpp index 87c581c8d0..02efe6eda1 100644 --- a/search/search_tests/house_numbers_matcher_test.cpp +++ b/search/search_tests/house_numbers_matcher_test.cpp @@ -134,6 +134,12 @@ UNIT_TEST(HouseNumbersMatcher_Smoke) TEST(!HouseNumbersMatch("10/42 корпус 2", "42"), ()); TEST(!HouseNumbersMatch("22к", "22я"), ()); TEST(!HouseNumbersMatch("22к", "22л"), ()); + + TEST(HouseNumbersMatch("16 к1", "д 16 к 1"), ()); + TEST(HouseNumbersMatch("16 к1", "д 16 к1"), ()); + TEST(HouseNumbersMatch("16 к1", "16 к1"), ()); + TEST(HouseNumbersMatch("16 к1", "дом 16 к1"), ()); + TEST(HouseNumbersMatch("14 д 1", "дом 14 д1"), ()); } UNIT_TEST(LooksLikeHouseNumber_Smoke) @@ -173,4 +179,11 @@ UNIT_TEST(LooksLikeHouseNumber_Smoke) TEST(LooksLikeHouseNumber("3/7 с1Б", false /* isPrefix */), ()); TEST(LooksLikeHouseNumber("3/7 с1Б", true /* isPrefix */), ()); + + TEST(LooksLikeHouseNumber("16 к 1", false /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("д 16 к 1", false /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("дом 16 к 1", false /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("д 16", false /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("дом 16", false /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("дом 14 д 1", false /* isPrefix */), ()); }