[search] Postcodes are added to the search index.

This commit is contained in:
Yuri Gorshenin 2016-04-20 15:46:11 +03:00
parent ca9a3693b9
commit f48c3e6a6a
23 changed files with 557 additions and 160 deletions

View file

@ -220,6 +220,10 @@ bool IsASCIIString(string const & str)
return true;
}
bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; }
bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
bool StartsWith(UniString const & s, UniString const & p)
{
if (p.size() > s.size())

View file

@ -29,6 +29,19 @@ public:
template <class IterT> UniString(IterT b, IterT e) : BaseT(b, e) {}
bool IsEqualAscii(char const * s) const;
UniString & operator+=(UniString const & rhs)
{
append(rhs);
return *this;
}
UniString operator+(UniString const & rhs) const
{
UniString result(*this);
result += rhs;
return result;
}
};
/// Performs full case folding for string to make it search-compatible according
@ -67,6 +80,8 @@ bool EqualNoCase(string const & s1, string const & s2);
UniString MakeUniString(string const & utf8s);
string ToUtf8(UniString const & s);
bool IsASCIIString(string const & str);
bool IsASCIIDigit(UniChar c);
bool IsASCIILatin(UniChar c);
inline string DebugPrint(UniString const & s)
{

View file

@ -136,11 +136,11 @@ struct FeatureNameInserter
{
}
void AddToken(signed char lang, strings::UniString const & s) const
void AddToken(uint8_t lang, strings::UniString const & s) const
{
strings::UniString key;
key.reserve(s.size() + 1);
key.push_back(static_cast<uint8_t>(lang));
key.push_back(lang);
key.append(s.begin(), s.end());
m_keyValuePairs.emplace_back(key, m_val);
@ -278,6 +278,18 @@ public:
skipIndex.IsCountryOrState(types) ? m_synonyms : nullptr, m_keyValuePairs, hasStreetType);
m_valueBuilder.MakeValue(f, types, index, inserter.m_val);
string const postcode = f.GetMetadata().Get(feature::Metadata::FMD_POSTCODE);
if (!postcode.empty())
{
// See OSM TagInfo or Wiki about modern postcodes format. The average number of tokens is less
// than two.
buffer_vector<strings::UniString, 2> tokens;
SplitUniString(search::NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens),
search::Delimiters());
for (auto const & token : tokens)
inserter.AddToken(search::kCategoriesLang, search::PostcodeToString(token));
}
// Skip types for features without names.
if (!f.ForEachName(inserter))
skipIndex.SkipEmptyNameTypes(types);

View file

@ -90,6 +90,12 @@ UniString FeatureTypeToString(uint32_t type)
return UniString(s.begin(), s.end());
}
UniString PostcodeToString(strings::UniString const & postcode)
{
static UniString const kPrefix = MakeUniString("!postcode:");
return kPrefix + postcode;
}
namespace
{
char const * kStreetTokensSeparator = "\t -,.";

View file

@ -19,6 +19,8 @@ void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims
strings::UniString FeatureTypeToString(uint32_t type);
strings::UniString PostcodeToString(strings::UniString const & postcode);
template <class ContainerT, class DelimsT>
bool TokenizeStringAndCheckIfLastTokenIsPrefix(strings::UniString const & s,
ContainerT & tokens,

View file

@ -162,6 +162,40 @@ unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeaturesImpl(
return SortFeaturesAndBuildCBV(move(features));
}
template <typename T>
struct RetrieveAddressFeaturesAdaptor
{
template <typename... TArgs>
unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args)
{
return RetrieveAddressFeaturesImpl<T>(forward<TArgs>(args)...);
}
};
template <template <typename> class T>
struct Selector
{
template <typename... TArgs>
unique_ptr<coding::CompressedBitVector> operator()(MwmSet::MwmId const & id, MwmValue & value,
TArgs &&... args)
{
version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
if (mwmTraits.GetSearchIndexFormat() ==
version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
{
T<FeatureWithRankAndCenter> t;
return t(id, value, forward<TArgs>(args)...);
}
if (mwmTraits.GetSearchIndexFormat() ==
version::MwmTraits::SearchIndexFormat::CompressedBitVector)
{
T<FeatureIndexValue> t;
return t(id, value, forward<TArgs>(args)...);
}
return unique_ptr<coding::CompressedBitVector>();
}
};
} // namespace
namespace v2
@ -170,21 +204,8 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
SearchQueryParams const & params)
{
version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
if (mwmTraits.GetSearchIndexFormat() ==
version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
{
using TValue = FeatureWithRankAndCenter;
return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
}
else if (mwmTraits.GetSearchIndexFormat() ==
version::MwmTraits::SearchIndexFormat::CompressedBitVector)
{
using TValue = FeatureIndexValue;
return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
}
return unique_ptr<coding::CompressedBitVector>();
Selector<RetrieveAddressFeaturesAdaptor> selector;
return selector(id, value, cancellable, params);
}
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(

View file

@ -24,13 +24,14 @@ class MwmContext;
// Retrieves from the search index corresponding to |value| all
// features matching to |params|.
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
SearchQueryParams const & params);
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmId const & id,
MwmValue & value,
my::Cancellable const & cancellable,
SearchQueryParams const & params);
// Retrieves from the geometry index corresponding to |value| all features belonging to |rect|.
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
MwmContext const & context, my::Cancellable const & cancellable,
m2::RectD const & rect, int scale);
MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect,
int scale);
} // namespace v2
} // namespace search

View file

@ -54,6 +54,7 @@ HEADERS += \
v2/locality_scorer.hpp \
v2/mwm_context.hpp \
v2/nested_rects_cache.hpp \
v2/postcodes_matcher.hpp \
v2/pre_ranking_info.hpp \
v2/rank_table_cache.hpp \
v2/ranking_info.hpp \
@ -62,6 +63,7 @@ HEADERS += \
v2/search_query_v2.hpp \
v2/stats_cache.hpp \
v2/street_vicinity_loader.hpp \
v2/tokens_slice.hpp \
SOURCES += \
approximate_string_match.cpp \
@ -99,6 +101,7 @@ SOURCES += \
v2/locality_scorer.cpp \
v2/mwm_context.cpp \
v2/nested_rects_cache.cpp \
v2/postcodes_matcher.cpp \
v2/pre_ranking_info.cpp \
v2/rank_table_cache.cpp \
v2/ranking_info.cpp \
@ -106,3 +109,4 @@ SOURCES += \
v2/search_model.cpp \
v2/search_query_v2.cpp \
v2/street_vicinity_loader.cpp \
v2/tokens_slice.cpp \

View file

@ -33,7 +33,14 @@ void SearchTest::RegisterCountry(string const & name, m2::RectD const & rect)
bool SearchTest::ResultsMatch(string const & query,
vector<shared_ptr<tests_support::MatchingRule>> const & rules)
{
tests_support::TestSearchRequest request(m_engine, query, "en", Mode::Everywhere, m_viewport);
return ResultsMatch(query, "en" /* locale */, rules);
}
bool SearchTest::ResultsMatch(string const & query,
string const & locale,
vector<shared_ptr<tests_support::MatchingRule>> const & rules)
{
tests_support::TestSearchRequest request(m_engine, query, locale, Mode::Everywhere, m_viewport);
request.Wait();
return MatchResults(m_engine, rules, request.Results());
}

View file

@ -72,11 +72,26 @@ public:
return id;
}
template <typename TBuildFn>
MwmSet::MwmId BuildWorld(TBuildFn && fn)
{
return BuildMwm("testWorld", feature::DataHeader::world, forward<TBuildFn>(fn));
}
template <typename TBuildFn>
MwmSet::MwmId BuildCountry(string const & name, TBuildFn && fn)
{
return BuildMwm(name, feature::DataHeader::country, forward<TBuildFn>(fn));
}
inline void SetViewport(m2::RectD const & viewport) { m_viewport = viewport; }
bool ResultsMatch(string const & query,
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
bool ResultsMatch(string const & query, string const & locale,
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
bool ResultsMatch(string const & query, Mode mode,
vector<shared_ptr<tests_support::MatchingRule>> const & rules);

View file

@ -77,39 +77,38 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke)
TestPOI lantern1(m2::PointD(10.0005, 10.0005), "lantern 1", "en");
TestPOI lantern2(m2::PointD(10.0006, 10.0005), "lantern 2", "en");
BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
{
builder.Add(wonderlandCountry);
builder.Add(losAlamosCity);
builder.Add(mskCity);
});
auto wonderlandId =
BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
{
builder.Add(losAlamosCity);
builder.Add(mskCity);
builder.Add(longPondVillage);
BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(wonderlandCountry);
builder.Add(losAlamosCity);
builder.Add(mskCity);
});
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
{
builder.Add(losAlamosCity);
builder.Add(mskCity);
builder.Add(longPondVillage);
builder.Add(feynmanStreet);
builder.Add(bohrStreet1);
builder.Add(bohrStreet2);
builder.Add(bohrStreet3);
builder.Add(firstAprilStreet);
builder.Add(feynmanStreet);
builder.Add(bohrStreet1);
builder.Add(bohrStreet2);
builder.Add(bohrStreet3);
builder.Add(firstAprilStreet);
builder.Add(feynmanHouse);
builder.Add(bohrHouse);
builder.Add(hilbertHouse);
builder.Add(descartesHouse);
builder.Add(bornHouse);
builder.Add(feynmanHouse);
builder.Add(bohrHouse);
builder.Add(hilbertHouse);
builder.Add(descartesHouse);
builder.Add(bornHouse);
builder.Add(busStop);
builder.Add(tramStop);
builder.Add(quantumTeleport1);
builder.Add(quantumTeleport2);
builder.Add(quantumCafe);
builder.Add(lantern1);
builder.Add(lantern2);
});
builder.Add(busStop);
builder.Add(tramStop);
builder.Add(quantumTeleport1);
builder.Add(quantumTeleport2);
builder.Add(quantumCafe);
builder.Add(lantern1);
builder.Add(lantern2);
});
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
{
@ -182,11 +181,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld)
TestCountry wonderland(m2::PointD(0, 0), countryName, "en");
TestCity losAlamos(m2::PointD(0, 0), "Los Alamos", "en", 100 /* rank */);
auto testWorldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
{
builder.Add(wonderland);
builder.Add(losAlamos);
});
auto testWorldId = BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(wonderland);
builder.Add(losAlamos);
});
RegisterCountry(countryName, m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.5, -0.5)));
@ -213,16 +212,15 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchByName)
"Hyde Park", "en");
TestPOI cafe(m2::PointD(1.0, 1.0), "London Cafe", "en");
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
{
builder.Add(london);
});
auto wonderlandId =
BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
{
builder.Add(hydePark);
builder.Add(cafe);
});
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(london);
});
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
{
builder.Add(hydePark);
builder.Add(cafe);
});
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.9, -0.9)));
{
@ -248,11 +246,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, DisableSuggests)
TestCity london1(m2::PointD(1, 1), "London", "en", 100 /* rank */);
TestCity london2(m2::PointD(-1, -1), "London", "en", 100 /* rank */);
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
{
builder.Add(london1);
builder.Add(london2);
});
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(london1);
builder.Add(london2);
});
SetViewport(m2::RectD(m2::PointD(0.5, 0.5), m2::PointD(1.5, 1.5)));
{
@ -299,21 +297,20 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
TestPOI cafe2(m2::PointD(-0.99, -0.99), "", "en");
cafe2.SetTypes({{"amenity", "cafe"}});
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
{
builder.Add(sanFrancisco);
builder.Add(lermontovo);
});
auto wonderlandId = BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
{
builder.Add(cafe1);
builder.Add(cafe2);
builder.Add(goldenGateBridge);
builder.Add(goldenGateStreet);
builder.Add(lermontov);
builder.Add(waterfall);
});
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(sanFrancisco);
builder.Add(lermontovo);
});
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
{
builder.Add(cafe1);
builder.Add(cafe2);
builder.Add(goldenGateBridge);
builder.Add(goldenGateStreet);
builder.Add(lermontov);
builder.Add(waterfall);
});
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
{
@ -359,5 +356,39 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
TEST(ResultsMatch("waterfall", rules), ());
}
}
UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
{
string const countryName = "Russia";
TestCity city(m2::PointD(0, 0), "Долгопрудный", "ru", 100 /* rank */);
TestStreet street(
vector<m2::PointD>{m2::PointD(-0.5, 0.0), m2::PointD(0, 0), m2::PointD(0.5, 0.0)},
"Первомайская", "ru");
TestBuilding building(m2::PointD(0.0, 0.00001), "", "28 а", street, "ru");
building.SetPostcode("141701");
BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(city);
});
auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
{
builder.Add(street);
builder.Add(building);
});
{
TRules rules{ExactMatch(countryId, building)};
TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru" /* locale */, rules), ());
}
// TODO (@y): uncomment this test and add more tests when postcodes
// search will be implemented.
//
// {
// TRules rules{ExactMatch(countryId, building)};
// TEST(ResultsMatch("Долгопрудный первомайская 28а, 141701", "ru" /* locale */, rules), ());
// }
}
} // namespace
} // namespace search

View file

@ -12,6 +12,7 @@
#include "search/v2/pre_ranking_info.hpp"
#include "search/v2/ranking_info.hpp"
#include "search/v2/ranking_utils.hpp"
#include "search/v2/tokens_slice.hpp"
#include "storage/country_info_getter.hpp"
#include "storage/index.hpp"
@ -464,10 +465,8 @@ void Query::SetQuery(string const & query)
search::Delimiters delims;
SplitUniString(NormalizeAndSimplifyString(query), MakeBackInsertFunctor(m_tokens), delims);
bool checkPrefix = true;
// Assign prefix with last parsed token.
if (checkPrefix && !m_tokens.empty() && !delims(strings::LastUniChar(query)))
if (!m_tokens.empty() && !delims(strings::LastUniChar(query)))
{
m_prefix.swap(m_tokens.back());
m_tokens.pop_back();

View file

@ -0,0 +1,72 @@
#include "../../testing/testing.hpp"
#include "search/search_query_params.hpp"
#include "search/v2/postcodes_matcher.hpp"
#include "search/v2/tokens_slice.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/stl_add.hpp"
#include "base/string_utils.hpp"
#include "std/string.hpp"
#include "std/vector.hpp"
using namespace strings;
namespace search
{
namespace v2
{
namespace
{
bool LooksLikePostcode(string const & s, bool checkPrefix)
{
vector<UniString> tokens;
bool const lastTokenIsPrefix =
TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters());
size_t const numTokens = tokens.size();
SearchQueryParams params;
if (checkPrefix && lastTokenIsPrefix)
{
params.m_prefixTokens.push_back(tokens.back());
tokens.pop_back();
}
for (auto const & token : tokens)
{
params.m_tokens.emplace_back();
params.m_tokens.back().push_back(token);
}
return LooksLikePostcode(TokensSlice(params, 0, numTokens));
}
UNIT_TEST(PostcodesMatcher_Smoke)
{
TEST(LooksLikePostcode("141701", false /* checkPrefix */), ());
TEST(LooksLikePostcode("141", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA22 9HR", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA22", true /* checkPrefix */), ());
TEST(LooksLikePostcode("DE56 4FW", true /* checkPrefix */), ());
TEST(LooksLikePostcode("NY 1000", true /* checkPrefix */), ());
TEST(LooksLikePostcode("AZ 85203", true /* checkPrefix */), ());
TEST(LooksLikePostcode("AZ", true /* checkPrefix */), ());
TEST(LooksLikePostcode("803 0271", true /* checkPrefix */), ());
TEST(LooksLikePostcode("803-0271", true /* checkPrefix */), ());
TEST(LooksLikePostcode("〒803-0271", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("1 мая", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("1 мая улица", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("москва", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("39 с 79", true /* checkPrefix */), ());
}
} // namespace
} // namespace v2
} // namespace search

View file

@ -2,6 +2,7 @@
#include "search/search_query_params.hpp"
#include "search/v2/ranking_utils.hpp"
#include "search/v2/tokens_slice.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"

View file

@ -27,6 +27,7 @@ SOURCES += \
latlon_match_test.cpp \
locality_finder_test.cpp \
locality_scorer_test.cpp \
postcodes_matcher_tests.cpp \
query_saver_tests.cpp \
ranking_tests.cpp \
string_intersection_test.cpp \

View file

@ -5,6 +5,7 @@
namespace search
{
static const uint8_t kPostcodeLang = 127;
static const uint8_t kCategoriesLang = 128;
static const uint8_t kPointCodingBits = 20;
} // namespace search

View file

@ -979,6 +979,8 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter)
m_filter = &filter;
MY_SCOPE_GUARD(resetFilter, [&]() { m_filter = nullptr; });
// TODO (@y): implement postcodes matching here.
// The order is rather important. Match streets first, then all other stuff.
GreedilyMatchStreets();
MatchPOIsAndBuildings(0 /* curToken */);

View file

@ -1,5 +1,7 @@
#include "search/v2/locality_scorer.hpp"
#include "search/v2/tokens_slice.hpp"
#include "std/algorithm.hpp"
namespace search

View file

@ -0,0 +1,161 @@
#include "search/v2/postcodes_matcher.hpp"
#include "search/v2/tokens_slice.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/logging.hpp"
#include "base/macros.hpp"
#include "base/stl_add.hpp"
#include "base/string_utils.hpp"
#include "std/transform_iterator.hpp"
#include "std/unique_ptr.hpp"
#include "std/utility.hpp"
#include "std/vector.hpp"
using namespace strings;
namespace search
{
namespace v2
{
namespace
{
// Top patterns for postcodes. See
// search/search_quality/clusterize_postcodes.lisp for details.
char const * const g_patterns[] = {
"aa nnnn", "aa nnnnn", "aaa nnnn", "aan", "aan naa", "aana naa", "aann",
"aann naa", "aannaa", "aannnaa", "aannnn", "an naa", "ana naa", "ana nan",
"ananan", "ann aann", "ann naa", "annnnaaa", "nn nnn", "nnn", "nnn nn",
"nnn nnn", "nnn nnnn", "nnnn", "nnnn aa", "nnnn nnn", "nnnnaa", "nnnnn",
"nnnnn nnn", "nnnnn nnnn", "nnnnn nnnnn", "nnnnnn", "nnnnnnn", "nnnnnnnn", "〒nnn nnnn"};
UniChar SimplifyChar(UniChar const & c)
{
if (IsASCIIDigit(c))
return 'n';
if (IsASCIILatin(c))
return 'a';
return c;
}
struct Node
{
Node() : m_isLeaf(false) {}
Node const * Move(UniChar c) const
{
for (auto const & p : m_moves)
{
if (p.first == c)
return p.second.get();
}
return nullptr;
}
template <typename TIt>
Node const * Move(TIt begin, TIt end) const
{
Node const * cur = this;
for (; begin != end && cur; ++begin)
cur = cur->Move(*begin);
return cur;
}
Node & MakeMove(UniChar c)
{
for (auto const & p : m_moves)
{
if (p.first == c)
return *p.second;
}
m_moves.emplace_back(c, make_unique<Node>());
return *m_moves.back().second;
}
template <typename TIt>
Node & MakeMove(TIt begin, TIt end)
{
Node * cur = this;
for (; begin != end; ++begin)
cur = &cur->MakeMove(*begin);
return *cur;
}
buffer_vector<pair<UniChar, unique_ptr<Node>>, 2> m_moves;
bool m_isLeaf;
DISALLOW_COPY(Node);
};
class PostcodesMatcher
{
public:
PostcodesMatcher() : m_root(), m_maxNumTokensInPostcode(0)
{
search::Delimiters delimiters;
for (auto const * pattern : g_patterns)
AddString(MakeUniString(pattern), delimiters);
}
bool HasString(TokensSlice const & slice) const
{
Node const * cur = &m_root;
for (size_t i = 0; i < slice.Size() && cur; ++i)
{
auto const & s = slice.Get(i).front();
cur = cur->Move(make_transform_iterator(s.begin(), &SimplifyChar),
make_transform_iterator(s.end(), &SimplifyChar));
if (cur && i + 1 < slice.Size())
cur = cur->Move(' ');
}
if (!cur)
return false;
if (slice.Size() > 0 && slice.IsPrefix(slice.Size() - 1))
return true;
return cur->m_isLeaf;
}
inline size_t GetMaxNumTokensInPostcode() const { return m_maxNumTokensInPostcode; }
private:
void AddString(UniString const & s, search::Delimiters & delimiters)
{
vector<UniString> tokens;
SplitUniString(s, MakeBackInsertFunctor(tokens), delimiters);
m_maxNumTokensInPostcode = max(m_maxNumTokensInPostcode, tokens.size());
Node * cur = &m_root;
for (size_t i = 0; i < tokens.size(); ++i)
{
cur = &cur->MakeMove(tokens[i].begin(), tokens[i].end());
if (i + 1 != tokens.size())
cur = &cur->MakeMove(' ');
}
cur->m_isLeaf = true;
}
Node m_root;
size_t m_maxNumTokensInPostcode;
DISALLOW_COPY(PostcodesMatcher);
};
PostcodesMatcher const & GetPostcodesMatcher()
{
static PostcodesMatcher kMatcher;
return kMatcher;
}
} // namespace
bool LooksLikePostcode(TokensSlice const & slice) { return GetPostcodesMatcher().HasString(slice); }
size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); }
} // namespace v2
} // namespace search

View file

@ -0,0 +1,15 @@
#pragma once
#include "std/cstdint.hpp"
namespace search
{
namespace v2
{
class TokensSlice;
bool LooksLikePostcode(TokensSlice const & slice);
size_t GetMaxNumTokensInPostcode();
} // namespace v2
} // namespace search

View file

@ -7,7 +7,6 @@
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/assert.hpp"
#include "base/stl_add.hpp"
#include "base/string_utils.hpp"
@ -42,74 +41,6 @@ enum NameScore
NAME_SCORE_COUNT
};
class TokensSlice
{
public:
TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
{
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
}
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_params.GetTokens(m_offset + i);
}
inline size_t Size() const { return m_size; }
inline bool Empty() const { return Size() == 0; }
inline bool IsPrefix(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_offset + i == m_params.m_tokens.size();
}
private:
SearchQueryParams const & m_params;
size_t const m_offset;
size_t const m_size;
};
class TokensSliceNoCategories
{
public:
TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken)
: m_params(params)
{
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
m_indexes.reserve(endToken - startToken);
for (size_t i = startToken; i < endToken; ++i)
{
if (!m_params.m_isCategorySynonym[i])
m_indexes.push_back(i);
}
}
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_params.GetTokens(m_indexes[i]);
}
inline size_t Size() const { return m_indexes.size(); }
inline bool Empty() const { return Size() == 0; }
inline bool IsPrefix(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_indexes[i] == m_params.m_tokens.size();
}
private:
SearchQueryParams const & m_params;
vector<size_t> m_indexes;
};
template <typename TSlice>
NameScore GetNameScore(string const & name, TSlice const & slice)
{

View file

@ -0,0 +1,27 @@
#include "search/v2/tokens_slice.hpp"
namespace search
{
namespace v2
{
TokensSlice::TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
{
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
}
TokensSliceNoCategories::TokensSliceNoCategories(SearchQueryParams const & params,
size_t startToken, size_t endToken)
: m_params(params)
{
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
m_indexes.reserve(endToken - startToken);
for (size_t i = startToken; i < endToken; ++i)
{
if (!m_params.m_isCategorySynonym[i])
m_indexes.push_back(i);
}
}
} // namespace v2
} // namespace search

View file

@ -0,0 +1,67 @@
#pragma once
#include "search/search_query_params.hpp"
#include "base/assert.hpp"
#include "std/cstdint.hpp"
#include "std/vector.hpp"
namespace search
{
namespace v2
{
class TokensSlice
{
public:
TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken);
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_params.GetTokens(m_offset + i);
}
inline size_t Size() const { return m_size; }
inline bool Empty() const { return Size() == 0; }
inline bool IsPrefix(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_offset + i == m_params.m_tokens.size();
}
private:
SearchQueryParams const & m_params;
size_t const m_offset;
size_t const m_size;
};
class TokensSliceNoCategories
{
public:
TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken);
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_params.GetTokens(m_indexes[i]);
}
inline size_t Size() const { return m_indexes.size(); }
inline bool Empty() const { return Size() == 0; }
inline bool IsPrefix(size_t i) const
{
ASSERT_LESS(i, Size(), ());
return m_indexes[i] == m_params.m_tokens.size();
}
private:
SearchQueryParams const & m_params;
vector<size_t> m_indexes;
};
} // namespace v2
} // namespace search