[search] Fixes for multi-token postcodes search.

This commit is contained in:
Yuri Gorshenin 2016-04-24 21:50:51 +03:00
parent fd295a51d4
commit d0e3442c83
5 changed files with 54 additions and 17 deletions

View file

@ -366,7 +366,9 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
{
string const countryName = "Russia";
TestCity city(m2::PointD(0, 0), "Долгопрудный", "ru", 100 /* rank */);
TestCity dolgoprudny(m2::PointD(0, 0), "Долгопрудный", "ru", 100 /* rank */);
TestCity london(m2::PointD(10, 10), "London", "en", 100 /* rank */);
TestStreet street(
vector<m2::PointD>{m2::PointD(-0.5, 0.0), m2::PointD(0, 0), m2::PointD(0.5, 0.0)},
"Первомайская", "ru");
@ -379,9 +381,13 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
TestBuilding building30(m2::PointD(0.00001, 0.00001), "", "30", street, "ru");
building30.SetPostcode("141702");
TestBuilding building1(m2::PointD(10, 10), "", "1", "en");
building1.SetPostcode("WC2H 7BX");
BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(city);
builder.Add(dolgoprudny);
builder.Add(london);
});
auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
{
@ -389,6 +395,8 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
builder.Add(building28);
builder.Add(building29);
builder.Add(building30);
builder.Add(building1);
});
// Tests that postcode is added to the search index.
@ -438,6 +446,15 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
TRules rules{ExactMatch(countryId, building30)};
TEST(ResultsMatch("Долгопрудный 141702", "ru", rules), ());
}
{
string const kQueries[] = {"london WC2H 7BX", "london WC2H 7", "london WC2H ", "london WC"};
for (auto const & query : kQueries)
{
TRules rules{ExactMatch(countryId, building1)};
TEST(ResultsMatch(query, rules), (query));
}
}
}
} // namespace
} // namespace search

View file

@ -1018,19 +1018,16 @@ void Geocoder::WithPostcodes(TFn && fn)
for (size_t startToken = 0; startToken != m_numTokens; ++startToken)
{
if (m_usedTokens[startToken])
continue;
size_t endToken = startToken;
for (; endToken < m_numTokens && endToken - startToken < maxPostcodeTokens &&
!m_usedTokens[endToken];
++endToken)
for (size_t n = 1; startToken + n <= m_numTokens && n <= maxPostcodeTokens; ++n)
{
TokenSlice slice(m_params, startToken, endToken + 1);
if (!LooksLikePostcode(slice))
if (m_usedTokens[startToken + n - 1])
break;
}
TokenSlice slice(m_params, startToken, startToken + n);
if (LooksLikePostcode(slice))
endToken = startToken + n;
}
if (startToken == endToken)
continue;

View file

@ -109,6 +109,9 @@ public:
// Complexity: O(total length of tokens in |slice|).
bool HasString(TokenSlice const & slice) const
{
if (slice.Size() == 0)
return m_root.m_isLeaf;
Node const * cur = &m_root;
for (size_t i = 0; i < slice.Size() && cur; ++i)
{
@ -122,7 +125,11 @@ public:
if (!cur)
return false;
if (slice.Size() > 0 && slice.IsPrefix(slice.Size() - 1))
if (slice.IsPrefix(slice.Size() - 1))
return true;
// Last token may be not a prefix, but just a part of a multi-token postcode.
if (slice.IsLast(slice.Size() - 1) && cur->Move(' ') != nullptr)
return true;
return cur->m_isLeaf;

View file

@ -10,6 +10,20 @@ TokenSlice::TokenSlice(SearchQueryParams const & params, size_t startToken, size
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
}
bool TokenSlice::IsPrefix(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_offset + i == m_params.m_tokens.size();
}
bool TokenSlice::IsLast(size_t i) const
{
ASSERT_LESS(i, Size(), ());
if (m_params.m_prefixTokens.empty())
return m_offset + i + 1 == m_params.m_tokens.size();
return m_offset + i == m_params.m_tokens.size();
}
TokenSliceNoCategories::TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken,
size_t endToken)
: m_params(params)

View file

@ -26,11 +26,13 @@ public:
inline bool Empty() const { return Size() == 0; }
inline bool IsPrefix(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_offset + i == m_params.m_tokens.size();
}
// Returns true if the |i|-th token in the slice is the incomplete
// (prefix) token.
bool IsPrefix(size_t i) const;
// Returns true if the |i|-th token in the slice is the last
// (regardless - full or not) token in the query.
bool IsLast(size_t i) const;
private:
SearchQueryParams const & m_params;