forked from organicmaps/organicmaps
[search] Postcodes are added to the search index.
This commit is contained in:
parent
ca9a3693b9
commit
f48c3e6a6a
23 changed files with 557 additions and 160 deletions
|
@ -220,6 +220,10 @@ bool IsASCIIString(string const & str)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; }
|
||||
|
||||
bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
|
||||
|
||||
bool StartsWith(UniString const & s, UniString const & p)
|
||||
{
|
||||
if (p.size() > s.size())
|
||||
|
|
|
@ -29,6 +29,19 @@ public:
|
|||
template <class IterT> UniString(IterT b, IterT e) : BaseT(b, e) {}
|
||||
|
||||
bool IsEqualAscii(char const * s) const;
|
||||
|
||||
UniString & operator+=(UniString const & rhs)
|
||||
{
|
||||
append(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UniString operator+(UniString const & rhs) const
|
||||
{
|
||||
UniString result(*this);
|
||||
result += rhs;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
/// Performs full case folding for string to make it search-compatible according
|
||||
|
@ -67,6 +80,8 @@ bool EqualNoCase(string const & s1, string const & s2);
|
|||
UniString MakeUniString(string const & utf8s);
|
||||
string ToUtf8(UniString const & s);
|
||||
bool IsASCIIString(string const & str);
|
||||
bool IsASCIIDigit(UniChar c);
|
||||
bool IsASCIILatin(UniChar c);
|
||||
|
||||
inline string DebugPrint(UniString const & s)
|
||||
{
|
||||
|
|
|
@ -136,11 +136,11 @@ struct FeatureNameInserter
|
|||
{
|
||||
}
|
||||
|
||||
void AddToken(signed char lang, strings::UniString const & s) const
|
||||
void AddToken(uint8_t lang, strings::UniString const & s) const
|
||||
{
|
||||
strings::UniString key;
|
||||
key.reserve(s.size() + 1);
|
||||
key.push_back(static_cast<uint8_t>(lang));
|
||||
key.push_back(lang);
|
||||
key.append(s.begin(), s.end());
|
||||
|
||||
m_keyValuePairs.emplace_back(key, m_val);
|
||||
|
@ -278,6 +278,18 @@ public:
|
|||
skipIndex.IsCountryOrState(types) ? m_synonyms : nullptr, m_keyValuePairs, hasStreetType);
|
||||
m_valueBuilder.MakeValue(f, types, index, inserter.m_val);
|
||||
|
||||
string const postcode = f.GetMetadata().Get(feature::Metadata::FMD_POSTCODE);
|
||||
if (!postcode.empty())
|
||||
{
|
||||
// See OSM TagInfo or Wiki about modern postcodes format. The average number of tokens is less
|
||||
// than two.
|
||||
buffer_vector<strings::UniString, 2> tokens;
|
||||
SplitUniString(search::NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens),
|
||||
search::Delimiters());
|
||||
for (auto const & token : tokens)
|
||||
inserter.AddToken(search::kCategoriesLang, search::PostcodeToString(token));
|
||||
}
|
||||
|
||||
// Skip types for features without names.
|
||||
if (!f.ForEachName(inserter))
|
||||
skipIndex.SkipEmptyNameTypes(types);
|
||||
|
|
|
@ -90,6 +90,12 @@ UniString FeatureTypeToString(uint32_t type)
|
|||
return UniString(s.begin(), s.end());
|
||||
}
|
||||
|
||||
UniString PostcodeToString(strings::UniString const & postcode)
|
||||
{
|
||||
static UniString const kPrefix = MakeUniString("!postcode:");
|
||||
return kPrefix + postcode;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
char const * kStreetTokensSeparator = "\t -,.";
|
||||
|
|
|
@ -19,6 +19,8 @@ void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims
|
|||
|
||||
strings::UniString FeatureTypeToString(uint32_t type);
|
||||
|
||||
strings::UniString PostcodeToString(strings::UniString const & postcode);
|
||||
|
||||
template <class ContainerT, class DelimsT>
|
||||
bool TokenizeStringAndCheckIfLastTokenIsPrefix(strings::UniString const & s,
|
||||
ContainerT & tokens,
|
||||
|
|
|
@ -162,6 +162,40 @@ unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeaturesImpl(
|
|||
return SortFeaturesAndBuildCBV(move(features));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct RetrieveAddressFeaturesAdaptor
|
||||
{
|
||||
template <typename... TArgs>
|
||||
unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args)
|
||||
{
|
||||
return RetrieveAddressFeaturesImpl<T>(forward<TArgs>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class T>
|
||||
struct Selector
|
||||
{
|
||||
template <typename... TArgs>
|
||||
unique_ptr<coding::CompressedBitVector> operator()(MwmSet::MwmId const & id, MwmValue & value,
|
||||
TArgs &&... args)
|
||||
{
|
||||
version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
|
||||
|
||||
if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
|
||||
{
|
||||
T<FeatureWithRankAndCenter> t;
|
||||
return t(id, value, forward<TArgs>(args)...);
|
||||
}
|
||||
if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::CompressedBitVector)
|
||||
{
|
||||
T<FeatureIndexValue> t;
|
||||
return t(id, value, forward<TArgs>(args)...);
|
||||
}
|
||||
return unique_ptr<coding::CompressedBitVector>();
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace v2
|
||||
|
@ -170,21 +204,8 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
|
|||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
SearchQueryParams const & params)
|
||||
{
|
||||
version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
|
||||
|
||||
if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
|
||||
{
|
||||
using TValue = FeatureWithRankAndCenter;
|
||||
return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
|
||||
}
|
||||
else if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::CompressedBitVector)
|
||||
{
|
||||
using TValue = FeatureIndexValue;
|
||||
return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
|
||||
}
|
||||
return unique_ptr<coding::CompressedBitVector>();
|
||||
Selector<RetrieveAddressFeaturesAdaptor> selector;
|
||||
return selector(id, value, cancellable, params);
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
|
||||
|
|
|
@ -24,13 +24,14 @@ class MwmContext;
|
|||
|
||||
// Retrieves from the search index corresponding to |value| all
|
||||
// features matching to |params|.
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
|
||||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
SearchQueryParams const & params);
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmId const & id,
|
||||
MwmValue & value,
|
||||
my::Cancellable const & cancellable,
|
||||
SearchQueryParams const & params);
|
||||
|
||||
// Retrieves from the geometry index corresponding to |value| all features belonging to |rect|.
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
|
||||
MwmContext const & context, my::Cancellable const & cancellable,
|
||||
m2::RectD const & rect, int scale);
|
||||
MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect,
|
||||
int scale);
|
||||
} // namespace v2
|
||||
} // namespace search
|
||||
|
|
|
@ -54,6 +54,7 @@ HEADERS += \
|
|||
v2/locality_scorer.hpp \
|
||||
v2/mwm_context.hpp \
|
||||
v2/nested_rects_cache.hpp \
|
||||
v2/postcodes_matcher.hpp \
|
||||
v2/pre_ranking_info.hpp \
|
||||
v2/rank_table_cache.hpp \
|
||||
v2/ranking_info.hpp \
|
||||
|
@ -62,6 +63,7 @@ HEADERS += \
|
|||
v2/search_query_v2.hpp \
|
||||
v2/stats_cache.hpp \
|
||||
v2/street_vicinity_loader.hpp \
|
||||
v2/tokens_slice.hpp \
|
||||
|
||||
SOURCES += \
|
||||
approximate_string_match.cpp \
|
||||
|
@ -99,6 +101,7 @@ SOURCES += \
|
|||
v2/locality_scorer.cpp \
|
||||
v2/mwm_context.cpp \
|
||||
v2/nested_rects_cache.cpp \
|
||||
v2/postcodes_matcher.cpp \
|
||||
v2/pre_ranking_info.cpp \
|
||||
v2/rank_table_cache.cpp \
|
||||
v2/ranking_info.cpp \
|
||||
|
@ -106,3 +109,4 @@ SOURCES += \
|
|||
v2/search_model.cpp \
|
||||
v2/search_query_v2.cpp \
|
||||
v2/street_vicinity_loader.cpp \
|
||||
v2/tokens_slice.cpp \
|
||||
|
|
|
@ -33,7 +33,14 @@ void SearchTest::RegisterCountry(string const & name, m2::RectD const & rect)
|
|||
bool SearchTest::ResultsMatch(string const & query,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules)
|
||||
{
|
||||
tests_support::TestSearchRequest request(m_engine, query, "en", Mode::Everywhere, m_viewport);
|
||||
return ResultsMatch(query, "en" /* locale */, rules);
|
||||
}
|
||||
|
||||
bool SearchTest::ResultsMatch(string const & query,
|
||||
string const & locale,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules)
|
||||
{
|
||||
tests_support::TestSearchRequest request(m_engine, query, locale, Mode::Everywhere, m_viewport);
|
||||
request.Wait();
|
||||
return MatchResults(m_engine, rules, request.Results());
|
||||
}
|
||||
|
|
|
@ -72,11 +72,26 @@ public:
|
|||
return id;
|
||||
}
|
||||
|
||||
template <typename TBuildFn>
|
||||
MwmSet::MwmId BuildWorld(TBuildFn && fn)
|
||||
{
|
||||
return BuildMwm("testWorld", feature::DataHeader::world, forward<TBuildFn>(fn));
|
||||
}
|
||||
|
||||
template <typename TBuildFn>
|
||||
MwmSet::MwmId BuildCountry(string const & name, TBuildFn && fn)
|
||||
{
|
||||
return BuildMwm(name, feature::DataHeader::country, forward<TBuildFn>(fn));
|
||||
}
|
||||
|
||||
inline void SetViewport(m2::RectD const & viewport) { m_viewport = viewport; }
|
||||
|
||||
bool ResultsMatch(string const & query,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
|
||||
|
||||
bool ResultsMatch(string const & query, string const & locale,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
|
||||
|
||||
bool ResultsMatch(string const & query, Mode mode,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
|
||||
|
||||
|
|
|
@ -77,39 +77,38 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke)
|
|||
TestPOI lantern1(m2::PointD(10.0005, 10.0005), "lantern 1", "en");
|
||||
TestPOI lantern2(m2::PointD(10.0006, 10.0005), "lantern 2", "en");
|
||||
|
||||
BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderlandCountry);
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
});
|
||||
auto wonderlandId =
|
||||
BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
builder.Add(longPondVillage);
|
||||
BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderlandCountry);
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
});
|
||||
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
builder.Add(longPondVillage);
|
||||
|
||||
builder.Add(feynmanStreet);
|
||||
builder.Add(bohrStreet1);
|
||||
builder.Add(bohrStreet2);
|
||||
builder.Add(bohrStreet3);
|
||||
builder.Add(firstAprilStreet);
|
||||
builder.Add(feynmanStreet);
|
||||
builder.Add(bohrStreet1);
|
||||
builder.Add(bohrStreet2);
|
||||
builder.Add(bohrStreet3);
|
||||
builder.Add(firstAprilStreet);
|
||||
|
||||
builder.Add(feynmanHouse);
|
||||
builder.Add(bohrHouse);
|
||||
builder.Add(hilbertHouse);
|
||||
builder.Add(descartesHouse);
|
||||
builder.Add(bornHouse);
|
||||
builder.Add(feynmanHouse);
|
||||
builder.Add(bohrHouse);
|
||||
builder.Add(hilbertHouse);
|
||||
builder.Add(descartesHouse);
|
||||
builder.Add(bornHouse);
|
||||
|
||||
builder.Add(busStop);
|
||||
builder.Add(tramStop);
|
||||
builder.Add(quantumTeleport1);
|
||||
builder.Add(quantumTeleport2);
|
||||
builder.Add(quantumCafe);
|
||||
builder.Add(lantern1);
|
||||
builder.Add(lantern2);
|
||||
});
|
||||
builder.Add(busStop);
|
||||
builder.Add(tramStop);
|
||||
builder.Add(quantumTeleport1);
|
||||
builder.Add(quantumTeleport2);
|
||||
builder.Add(quantumCafe);
|
||||
builder.Add(lantern1);
|
||||
builder.Add(lantern2);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
|
||||
{
|
||||
|
@ -182,11 +181,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld)
|
|||
TestCountry wonderland(m2::PointD(0, 0), countryName, "en");
|
||||
TestCity losAlamos(m2::PointD(0, 0), "Los Alamos", "en", 100 /* rank */);
|
||||
|
||||
auto testWorldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderland);
|
||||
builder.Add(losAlamos);
|
||||
});
|
||||
auto testWorldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderland);
|
||||
builder.Add(losAlamos);
|
||||
});
|
||||
RegisterCountry(countryName, m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.5, -0.5)));
|
||||
|
@ -213,16 +212,15 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchByName)
|
|||
"Hyde Park", "en");
|
||||
TestPOI cafe(m2::PointD(1.0, 1.0), "London Cafe", "en");
|
||||
|
||||
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london);
|
||||
});
|
||||
auto wonderlandId =
|
||||
BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(hydePark);
|
||||
builder.Add(cafe);
|
||||
});
|
||||
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london);
|
||||
});
|
||||
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(hydePark);
|
||||
builder.Add(cafe);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.9, -0.9)));
|
||||
{
|
||||
|
@ -248,11 +246,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, DisableSuggests)
|
|||
TestCity london1(m2::PointD(1, 1), "London", "en", 100 /* rank */);
|
||||
TestCity london2(m2::PointD(-1, -1), "London", "en", 100 /* rank */);
|
||||
|
||||
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london1);
|
||||
builder.Add(london2);
|
||||
});
|
||||
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london1);
|
||||
builder.Add(london2);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(0.5, 0.5), m2::PointD(1.5, 1.5)));
|
||||
{
|
||||
|
@ -299,21 +297,20 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
|
|||
TestPOI cafe2(m2::PointD(-0.99, -0.99), "", "en");
|
||||
cafe2.SetTypes({{"amenity", "cafe"}});
|
||||
|
||||
|
||||
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(sanFrancisco);
|
||||
builder.Add(lermontovo);
|
||||
});
|
||||
auto wonderlandId = BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(cafe1);
|
||||
builder.Add(cafe2);
|
||||
builder.Add(goldenGateBridge);
|
||||
builder.Add(goldenGateStreet);
|
||||
builder.Add(lermontov);
|
||||
builder.Add(waterfall);
|
||||
});
|
||||
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(sanFrancisco);
|
||||
builder.Add(lermontovo);
|
||||
});
|
||||
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(cafe1);
|
||||
builder.Add(cafe2);
|
||||
builder.Add(goldenGateBridge);
|
||||
builder.Add(goldenGateStreet);
|
||||
builder.Add(lermontov);
|
||||
builder.Add(waterfall);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
|
||||
{
|
||||
|
@ -359,5 +356,39 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
|
|||
TEST(ResultsMatch("waterfall", rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
|
||||
{
|
||||
string const countryName = "Russia";
|
||||
|
||||
TestCity city(m2::PointD(0, 0), "Долгопрудный", "ru", 100 /* rank */);
|
||||
TestStreet street(
|
||||
vector<m2::PointD>{m2::PointD(-0.5, 0.0), m2::PointD(0, 0), m2::PointD(0.5, 0.0)},
|
||||
"Первомайская", "ru");
|
||||
TestBuilding building(m2::PointD(0.0, 0.00001), "", "28 а", street, "ru");
|
||||
building.SetPostcode("141701");
|
||||
|
||||
BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(city);
|
||||
});
|
||||
auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(street);
|
||||
builder.Add(building);
|
||||
});
|
||||
{
|
||||
TRules rules{ExactMatch(countryId, building)};
|
||||
TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru" /* locale */, rules), ());
|
||||
}
|
||||
|
||||
// TODO (@y): uncomment this test and add more tests when postcodes
|
||||
// search will be implemented.
|
||||
//
|
||||
// {
|
||||
// TRules rules{ExactMatch(countryId, building)};
|
||||
// TEST(ResultsMatch("Долгопрудный первомайская 28а, 141701", "ru" /* locale */, rules), ());
|
||||
// }
|
||||
}
|
||||
} // namespace
|
||||
} // namespace search
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "search/v2/pre_ranking_info.hpp"
|
||||
#include "search/v2/ranking_info.hpp"
|
||||
#include "search/v2/ranking_utils.hpp"
|
||||
#include "search/v2/tokens_slice.hpp"
|
||||
|
||||
#include "storage/country_info_getter.hpp"
|
||||
#include "storage/index.hpp"
|
||||
|
@ -464,10 +465,8 @@ void Query::SetQuery(string const & query)
|
|||
search::Delimiters delims;
|
||||
SplitUniString(NormalizeAndSimplifyString(query), MakeBackInsertFunctor(m_tokens), delims);
|
||||
|
||||
bool checkPrefix = true;
|
||||
|
||||
// Assign prefix with last parsed token.
|
||||
if (checkPrefix && !m_tokens.empty() && !delims(strings::LastUniChar(query)))
|
||||
if (!m_tokens.empty() && !delims(strings::LastUniChar(query)))
|
||||
{
|
||||
m_prefix.swap(m_tokens.back());
|
||||
m_tokens.pop_back();
|
||||
|
|
72
search/search_tests/postcodes_matcher_tests.cpp
Normal file
72
search/search_tests/postcodes_matcher_tests.cpp
Normal file
|
@ -0,0 +1,72 @@
|
|||
#include "../../testing/testing.hpp"
|
||||
|
||||
#include "search/search_query_params.hpp"
|
||||
#include "search/v2/postcodes_matcher.hpp"
|
||||
#include "search/v2/tokens_slice.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/string.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
using namespace strings;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
namespace
|
||||
{
|
||||
bool LooksLikePostcode(string const & s, bool checkPrefix)
|
||||
{
|
||||
vector<UniString> tokens;
|
||||
bool const lastTokenIsPrefix =
|
||||
TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters());
|
||||
|
||||
size_t const numTokens = tokens.size();
|
||||
|
||||
SearchQueryParams params;
|
||||
if (checkPrefix && lastTokenIsPrefix)
|
||||
{
|
||||
params.m_prefixTokens.push_back(tokens.back());
|
||||
tokens.pop_back();
|
||||
}
|
||||
|
||||
for (auto const & token : tokens)
|
||||
{
|
||||
params.m_tokens.emplace_back();
|
||||
params.m_tokens.back().push_back(token);
|
||||
}
|
||||
|
||||
return LooksLikePostcode(TokensSlice(params, 0, numTokens));
|
||||
}
|
||||
|
||||
UNIT_TEST(PostcodesMatcher_Smoke)
|
||||
{
|
||||
TEST(LooksLikePostcode("141701", false /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("141", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA22 9HR", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA22", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("DE56 4FW", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("NY 1000", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("AZ 85203", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("AZ", true /* checkPrefix */), ());
|
||||
|
||||
TEST(LooksLikePostcode("803 0271", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("803-0271", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("〒803-0271", true /* checkPrefix */), ());
|
||||
|
||||
TEST(!LooksLikePostcode("1 мая", true /* checkPrefix */), ());
|
||||
TEST(!LooksLikePostcode("1 мая улица", true /* checkPrefix */), ());
|
||||
TEST(!LooksLikePostcode("москва", true /* checkPrefix */), ());
|
||||
TEST(!LooksLikePostcode("39 с 79", true /* checkPrefix */), ());
|
||||
}
|
||||
} // namespace
|
||||
} // namespace v2
|
||||
} // namespace search
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "search/search_query_params.hpp"
|
||||
#include "search/v2/ranking_utils.hpp"
|
||||
#include "search/v2/tokens_slice.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
|
|
@ -27,6 +27,7 @@ SOURCES += \
|
|||
latlon_match_test.cpp \
|
||||
locality_finder_test.cpp \
|
||||
locality_scorer_test.cpp \
|
||||
postcodes_matcher_tests.cpp \
|
||||
query_saver_tests.cpp \
|
||||
ranking_tests.cpp \
|
||||
string_intersection_test.cpp \
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
namespace search
|
||||
{
|
||||
static const uint8_t kPostcodeLang = 127;
|
||||
static const uint8_t kCategoriesLang = 128;
|
||||
static const uint8_t kPointCodingBits = 20;
|
||||
} // namespace search
|
||||
|
|
|
@ -979,6 +979,8 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter)
|
|||
m_filter = &filter;
|
||||
MY_SCOPE_GUARD(resetFilter, [&]() { m_filter = nullptr; });
|
||||
|
||||
// TODO (@y): implement postcodes matching here.
|
||||
|
||||
// The order is rather important. Match streets first, then all other stuff.
|
||||
GreedilyMatchStreets();
|
||||
MatchPOIsAndBuildings(0 /* curToken */);
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#include "search/v2/locality_scorer.hpp"
|
||||
|
||||
#include "search/v2/tokens_slice.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
|
||||
namespace search
|
||||
|
|
161
search/v2/postcodes_matcher.cpp
Normal file
161
search/v2/postcodes_matcher.cpp
Normal file
|
@ -0,0 +1,161 @@
|
|||
#include "search/v2/postcodes_matcher.hpp"
|
||||
|
||||
#include "search/v2/tokens_slice.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/macros.hpp"
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/transform_iterator.hpp"
|
||||
#include "std/unique_ptr.hpp"
|
||||
#include "std/utility.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
using namespace strings;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// Top patterns for postcodes. See
|
||||
// search/search_quality/clusterize_postcodes.lisp for details.
|
||||
char const * const g_patterns[] = {
|
||||
"aa nnnn", "aa nnnnn", "aaa nnnn", "aan", "aan naa", "aana naa", "aann",
|
||||
"aann naa", "aannaa", "aannnaa", "aannnn", "an naa", "ana naa", "ana nan",
|
||||
"ananan", "ann aann", "ann naa", "annnnaaa", "nn nnn", "nnn", "nnn nn",
|
||||
"nnn nnn", "nnn nnnn", "nnnn", "nnnn aa", "nnnn nnn", "nnnnaa", "nnnnn",
|
||||
"nnnnn nnn", "nnnnn nnnn", "nnnnn nnnnn", "nnnnnn", "nnnnnnn", "nnnnnnnn", "〒nnn nnnn"};
|
||||
|
||||
UniChar SimplifyChar(UniChar const & c)
|
||||
{
|
||||
if (IsASCIIDigit(c))
|
||||
return 'n';
|
||||
if (IsASCIILatin(c))
|
||||
return 'a';
|
||||
return c;
|
||||
}
|
||||
|
||||
struct Node
|
||||
{
|
||||
Node() : m_isLeaf(false) {}
|
||||
|
||||
Node const * Move(UniChar c) const
|
||||
{
|
||||
for (auto const & p : m_moves)
|
||||
{
|
||||
if (p.first == c)
|
||||
return p.second.get();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename TIt>
|
||||
Node const * Move(TIt begin, TIt end) const
|
||||
{
|
||||
Node const * cur = this;
|
||||
for (; begin != end && cur; ++begin)
|
||||
cur = cur->Move(*begin);
|
||||
return cur;
|
||||
}
|
||||
|
||||
Node & MakeMove(UniChar c)
|
||||
{
|
||||
for (auto const & p : m_moves)
|
||||
{
|
||||
if (p.first == c)
|
||||
return *p.second;
|
||||
}
|
||||
m_moves.emplace_back(c, make_unique<Node>());
|
||||
return *m_moves.back().second;
|
||||
}
|
||||
|
||||
template <typename TIt>
|
||||
Node & MakeMove(TIt begin, TIt end)
|
||||
{
|
||||
Node * cur = this;
|
||||
for (; begin != end; ++begin)
|
||||
cur = &cur->MakeMove(*begin);
|
||||
return *cur;
|
||||
}
|
||||
|
||||
buffer_vector<pair<UniChar, unique_ptr<Node>>, 2> m_moves;
|
||||
bool m_isLeaf;
|
||||
|
||||
DISALLOW_COPY(Node);
|
||||
};
|
||||
|
||||
class PostcodesMatcher
|
||||
{
|
||||
public:
|
||||
PostcodesMatcher() : m_root(), m_maxNumTokensInPostcode(0)
|
||||
{
|
||||
search::Delimiters delimiters;
|
||||
for (auto const * pattern : g_patterns)
|
||||
AddString(MakeUniString(pattern), delimiters);
|
||||
}
|
||||
|
||||
bool HasString(TokensSlice const & slice) const
|
||||
{
|
||||
Node const * cur = &m_root;
|
||||
for (size_t i = 0; i < slice.Size() && cur; ++i)
|
||||
{
|
||||
auto const & s = slice.Get(i).front();
|
||||
cur = cur->Move(make_transform_iterator(s.begin(), &SimplifyChar),
|
||||
make_transform_iterator(s.end(), &SimplifyChar));
|
||||
if (cur && i + 1 < slice.Size())
|
||||
cur = cur->Move(' ');
|
||||
}
|
||||
|
||||
if (!cur)
|
||||
return false;
|
||||
|
||||
if (slice.Size() > 0 && slice.IsPrefix(slice.Size() - 1))
|
||||
return true;
|
||||
|
||||
return cur->m_isLeaf;
|
||||
}
|
||||
|
||||
inline size_t GetMaxNumTokensInPostcode() const { return m_maxNumTokensInPostcode; }
|
||||
|
||||
private:
|
||||
void AddString(UniString const & s, search::Delimiters & delimiters)
|
||||
{
|
||||
vector<UniString> tokens;
|
||||
SplitUniString(s, MakeBackInsertFunctor(tokens), delimiters);
|
||||
m_maxNumTokensInPostcode = max(m_maxNumTokensInPostcode, tokens.size());
|
||||
|
||||
Node * cur = &m_root;
|
||||
for (size_t i = 0; i < tokens.size(); ++i)
|
||||
{
|
||||
cur = &cur->MakeMove(tokens[i].begin(), tokens[i].end());
|
||||
if (i + 1 != tokens.size())
|
||||
cur = &cur->MakeMove(' ');
|
||||
}
|
||||
cur->m_isLeaf = true;
|
||||
}
|
||||
|
||||
Node m_root;
|
||||
|
||||
size_t m_maxNumTokensInPostcode;
|
||||
|
||||
DISALLOW_COPY(PostcodesMatcher);
|
||||
};
|
||||
|
||||
PostcodesMatcher const & GetPostcodesMatcher()
|
||||
{
|
||||
static PostcodesMatcher kMatcher;
|
||||
return kMatcher;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool LooksLikePostcode(TokensSlice const & slice) { return GetPostcodesMatcher().HasString(slice); }
|
||||
|
||||
size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); }
|
||||
} // namespace v2
|
||||
} // namespace search
|
15
search/v2/postcodes_matcher.hpp
Normal file
15
search/v2/postcodes_matcher.hpp
Normal file
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include "std/cstdint.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
class TokensSlice;
|
||||
|
||||
bool LooksLikePostcode(TokensSlice const & slice);
|
||||
|
||||
size_t GetMaxNumTokensInPostcode();
|
||||
} // namespace v2
|
||||
} // namespace search
|
|
@ -7,7 +7,6 @@
|
|||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
|
@ -42,74 +41,6 @@ enum NameScore
|
|||
NAME_SCORE_COUNT
|
||||
};
|
||||
|
||||
class TokensSlice
|
||||
{
|
||||
public:
|
||||
TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
}
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_offset + i);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_size; }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_offset + i == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
size_t const m_offset;
|
||||
size_t const m_size;
|
||||
};
|
||||
|
||||
class TokensSliceNoCategories
|
||||
{
|
||||
public:
|
||||
TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
|
||||
m_indexes.reserve(endToken - startToken);
|
||||
for (size_t i = startToken; i < endToken; ++i)
|
||||
{
|
||||
if (!m_params.m_isCategorySynonym[i])
|
||||
m_indexes.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_indexes[i]);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_indexes.size(); }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_indexes[i] == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
vector<size_t> m_indexes;
|
||||
};
|
||||
|
||||
template <typename TSlice>
|
||||
NameScore GetNameScore(string const & name, TSlice const & slice)
|
||||
{
|
||||
|
|
27
search/v2/tokens_slice.cpp
Normal file
27
search/v2/tokens_slice.cpp
Normal file
|
@ -0,0 +1,27 @@
|
|||
#include "search/v2/tokens_slice.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
TokensSlice::TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
}
|
||||
|
||||
TokensSliceNoCategories::TokensSliceNoCategories(SearchQueryParams const & params,
|
||||
size_t startToken, size_t endToken)
|
||||
: m_params(params)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
|
||||
m_indexes.reserve(endToken - startToken);
|
||||
for (size_t i = startToken; i < endToken; ++i)
|
||||
{
|
||||
if (!m_params.m_isCategorySynonym[i])
|
||||
m_indexes.push_back(i);
|
||||
}
|
||||
}
|
||||
} // namespace v2
|
||||
} // namespace search
|
67
search/v2/tokens_slice.hpp
Normal file
67
search/v2/tokens_slice.hpp
Normal file
|
@ -0,0 +1,67 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/search_query_params.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include "std/cstdint.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
class TokensSlice
|
||||
{
|
||||
public:
|
||||
TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken);
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_offset + i);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_size; }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_offset + i == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
size_t const m_offset;
|
||||
size_t const m_size;
|
||||
};
|
||||
|
||||
class TokensSliceNoCategories
|
||||
{
|
||||
public:
|
||||
TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken);
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_indexes[i]);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_indexes.size(); }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_indexes[i] == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
vector<size_t> m_indexes;
|
||||
};
|
||||
} // namespace v2
|
||||
} // namespace search
|
Loading…
Add table
Reference in a new issue