forked from organicmaps/organicmaps
Merge pull request #2961 from ygorshenin/add-postcodes-to-search-index
[search] Postcodes are added to the search index.
This commit is contained in:
commit
b9cc722dd7
25 changed files with 806 additions and 216 deletions
|
@ -220,6 +220,10 @@ bool IsASCIIString(string const & str)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; }
|
||||
|
||||
bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
|
||||
|
||||
bool StartsWith(UniString const & s, UniString const & p)
|
||||
{
|
||||
if (p.size() > s.size())
|
||||
|
|
|
@ -29,6 +29,19 @@ public:
|
|||
template <class IterT> UniString(IterT b, IterT e) : BaseT(b, e) {}
|
||||
|
||||
bool IsEqualAscii(char const * s) const;
|
||||
|
||||
UniString & operator+=(UniString const & rhs)
|
||||
{
|
||||
append(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UniString operator+(UniString const & rhs) const
|
||||
{
|
||||
UniString result(*this);
|
||||
result += rhs;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
/// Performs full case folding for string to make it search-compatible according
|
||||
|
@ -67,6 +80,8 @@ bool EqualNoCase(string const & s1, string const & s2);
|
|||
UniString MakeUniString(string const & utf8s);
|
||||
string ToUtf8(UniString const & s);
|
||||
bool IsASCIIString(string const & str);
|
||||
bool IsASCIIDigit(UniChar c);
|
||||
bool IsASCIILatin(UniChar c);
|
||||
|
||||
inline string DebugPrint(UniString const & s)
|
||||
{
|
||||
|
|
|
@ -136,11 +136,11 @@ struct FeatureNameInserter
|
|||
{
|
||||
}
|
||||
|
||||
void AddToken(signed char lang, strings::UniString const & s) const
|
||||
void AddToken(uint8_t lang, strings::UniString const & s) const
|
||||
{
|
||||
strings::UniString key;
|
||||
key.reserve(s.size() + 1);
|
||||
key.push_back(static_cast<uint8_t>(lang));
|
||||
key.push_back(lang);
|
||||
key.append(s.begin(), s.end());
|
||||
|
||||
m_keyValuePairs.emplace_back(key, m_val);
|
||||
|
@ -261,9 +261,11 @@ public:
|
|||
|
||||
void operator() (FeatureType const & f, uint32_t index) const
|
||||
{
|
||||
using namespace search;
|
||||
|
||||
feature::TypesHolder types(f);
|
||||
|
||||
static search::TypesSkipper skipIndex;
|
||||
static TypesSkipper skipIndex;
|
||||
|
||||
skipIndex.SkipTypes(types);
|
||||
if (types.Empty())
|
||||
|
@ -278,6 +280,18 @@ public:
|
|||
skipIndex.IsCountryOrState(types) ? m_synonyms : nullptr, m_keyValuePairs, hasStreetType);
|
||||
m_valueBuilder.MakeValue(f, types, index, inserter.m_val);
|
||||
|
||||
string const postcode = f.GetMetadata().Get(feature::Metadata::FMD_POSTCODE);
|
||||
if (!postcode.empty())
|
||||
{
|
||||
// See OSM TagInfo or Wiki about modern postcodes format. The average number of tokens is less
|
||||
// than two.
|
||||
buffer_vector<strings::UniString, 2> tokens;
|
||||
SplitUniString(NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens),
|
||||
Delimiters());
|
||||
for (auto const & token : tokens)
|
||||
inserter.AddToken(kPostcodesLang, PostcodeToString(token));
|
||||
}
|
||||
|
||||
// Skip types for features without names.
|
||||
if (!f.ForEachName(inserter))
|
||||
skipIndex.SkipEmptyNameTypes(types);
|
||||
|
@ -291,7 +305,7 @@ public:
|
|||
|
||||
// add names of categories of the feature
|
||||
for (uint32_t t : categoryTypes)
|
||||
inserter.AddToken(search::kCategoriesLang, search::FeatureTypeToString(c.GetIndexForType(t)));
|
||||
inserter.AddToken(kCategoriesLang, FeatureTypeToString(c.GetIndexForType(t)));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -90,6 +90,12 @@ UniString FeatureTypeToString(uint32_t type)
|
|||
return UniString(s.begin(), s.end());
|
||||
}
|
||||
|
||||
UniString PostcodeToString(strings::UniString const & postcode)
|
||||
{
|
||||
static UniString const kPrefix = MakeUniString("!postcode:");
|
||||
return kPrefix + postcode;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
char const * kStreetTokensSeparator = "\t -,.";
|
||||
|
|
|
@ -19,6 +19,8 @@ void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims
|
|||
|
||||
strings::UniString FeatureTypeToString(uint32_t type);
|
||||
|
||||
strings::UniString PostcodeToString(strings::UniString const & postcode);
|
||||
|
||||
template <class ContainerT, class DelimsT>
|
||||
bool TokenizeStringAndCheckIfLastTokenIsPrefix(strings::UniString const & s,
|
||||
ContainerT & tokens,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "search/search_index_values.hpp"
|
||||
#include "search/search_query.hpp"
|
||||
#include "search/search_query_params.hpp"
|
||||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
#include "indexer/trie.hpp"
|
||||
|
||||
|
@ -103,6 +104,25 @@ bool CheckMatchString(strings::UniChar const * rootPrefix, size_t rootPrefixSize
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename TValue>
|
||||
bool FindLangIndex(trie::Iterator<ValueList<TValue>> const & trieRoot, uint8_t lang, uint32_t & langIx)
|
||||
{
|
||||
ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ());
|
||||
|
||||
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size());
|
||||
for (uint32_t i = 0; i < numLangs; ++i)
|
||||
{
|
||||
auto const & edge = trieRoot.m_edge[i].m_label;
|
||||
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
|
||||
if (edge[0] == lang)
|
||||
{
|
||||
langIx = i;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename TValue, typename TF>
|
||||
|
@ -222,7 +242,7 @@ public:
|
|||
toDo(value);
|
||||
}
|
||||
};
|
||||
} // namespace search::impl
|
||||
} // impl
|
||||
|
||||
template <typename TValue>
|
||||
struct TrieRootPrefix
|
||||
|
@ -345,27 +365,23 @@ template <typename TValue, typename THolder>
|
|||
bool MatchCategoriesInTrie(SearchQueryParams const & params,
|
||||
trie::Iterator<ValueList<TValue>> const & trieRoot, THolder && holder)
|
||||
{
|
||||
ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ());
|
||||
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size());
|
||||
for (uint32_t langIx = 0; langIx < numLangs; ++langIx)
|
||||
{
|
||||
auto const & edge = trieRoot.m_edge[langIx].m_label;
|
||||
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
|
||||
if (edge[0] == search::kCategoriesLang)
|
||||
{
|
||||
auto const catRoot = trieRoot.GoToEdge(langIx);
|
||||
MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
|
||||
uint32_t langIx = 0;
|
||||
if (!impl::FindLangIndex(trieRoot, search::kCategoriesLang, langIx))
|
||||
return false;
|
||||
|
||||
// Last token's prefix is used as a complete token here, to
|
||||
// limit the number of features in the last bucket of a
|
||||
// holder. Probably, this is a false optimization.
|
||||
holder.Resize(params.m_tokens.size() + 1);
|
||||
holder.SwitchTo(params.m_tokens.size());
|
||||
MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
auto const & edge = trieRoot.m_edge[langIx].m_label;
|
||||
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
|
||||
|
||||
auto const catRoot = trieRoot.GoToEdge(langIx);
|
||||
MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
|
||||
|
||||
// Last token's prefix is used as a complete token here, to limit
|
||||
// the number of features in the last bucket of a holder. Probably,
|
||||
// this is a false optimization.
|
||||
holder.Resize(params.m_tokens.size() + 1);
|
||||
holder.SwitchTo(params.m_tokens.size());
|
||||
MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Calls toDo with trie root prefix and language code on each language
|
||||
|
@ -427,4 +443,29 @@ void MatchFeaturesInTrie(SearchQueryParams const & params,
|
|||
|
||||
intersecter.ForEachResult(forward<ToDo>(toDo));
|
||||
}
|
||||
|
||||
template <typename TValue, typename TFilter, typename ToDo>
|
||||
void MatchPostcodesInTrie(v2::TokenSlice const & slice,
|
||||
trie::Iterator<ValueList<TValue>> const & trieRoot,
|
||||
TFilter const & filter, ToDo && toDo)
|
||||
{
|
||||
uint32_t langIx = 0;
|
||||
if (!impl::FindLangIndex(trieRoot, search::kPostcodesLang, langIx))
|
||||
return;
|
||||
|
||||
auto const & edge = trieRoot.m_edge[langIx].m_label;
|
||||
auto const postcodesRoot = trieRoot.GoToEdge(langIx);
|
||||
|
||||
impl::OffsetIntersecter<TFilter, TValue> intersecter(filter);
|
||||
for (size_t i = 0; i < slice.Size(); ++i)
|
||||
{
|
||||
if (slice.IsPrefix(i))
|
||||
MatchTokenPrefixInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter);
|
||||
else
|
||||
MatchTokenInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter);
|
||||
intersecter.NextStep();
|
||||
}
|
||||
|
||||
intersecter.ForEachResult(forward<ToDo>(toDo));
|
||||
}
|
||||
} // namespace search
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "search_trie.hpp"
|
||||
|
||||
#include "v2/mwm_context.hpp"
|
||||
#include "v2/token_slice.hpp"
|
||||
|
||||
#include "indexer/feature.hpp"
|
||||
#include "indexer/feature_algo.hpp"
|
||||
|
@ -29,8 +30,65 @@ using osm::Editor;
|
|||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
namespace
|
||||
{
|
||||
class FeaturesCollector
|
||||
{
|
||||
public:
|
||||
FeaturesCollector(my::Cancellable const & cancellable, vector<uint64_t> & features)
|
||||
: m_cancellable(cancellable), m_features(features), m_counter(0)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename TValue>
|
||||
void operator()(TValue const & value)
|
||||
{
|
||||
if ((++m_counter & 0xFF) == 0)
|
||||
BailIfCancelled(m_cancellable);
|
||||
m_features.push_back(value.m_featureId);
|
||||
}
|
||||
|
||||
inline void operator()(uint32_t feature) { m_features.push_back(feature); }
|
||||
|
||||
inline void operator()(uint64_t feature) { m_features.push_back(feature); }
|
||||
|
||||
private:
|
||||
my::Cancellable const & m_cancellable;
|
||||
vector<uint64_t> & m_features;
|
||||
uint32_t m_counter;
|
||||
};
|
||||
|
||||
class EditedFeaturesHolder
|
||||
{
|
||||
public:
|
||||
EditedFeaturesHolder(MwmSet::MwmId const & id)
|
||||
{
|
||||
Editor & editor = Editor::Instance();
|
||||
m_deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted);
|
||||
m_modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified);
|
||||
m_created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created);
|
||||
}
|
||||
|
||||
bool ModifiedOrDeleted(uint32_t featureIndex) const
|
||||
{
|
||||
return binary_search(m_deleted.begin(), m_deleted.end(), featureIndex) ||
|
||||
binary_search(m_modified.begin(), m_modified.end(), featureIndex);
|
||||
}
|
||||
|
||||
template <typename TFn>
|
||||
void ForEachModifiedOrCreated(TFn & fn)
|
||||
{
|
||||
for_each(m_modified.begin(), m_modified.end(), fn);
|
||||
for_each(m_created.begin(), m_created.end(), fn);
|
||||
}
|
||||
|
||||
private:
|
||||
vector<uint32_t> m_deleted;
|
||||
vector<uint32_t> m_modified;
|
||||
vector<uint32_t> m_created;
|
||||
};
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> SortFeaturesAndBuildCBV(vector<uint64_t> && features)
|
||||
{
|
||||
|
@ -89,6 +147,13 @@ bool MatchFeatureByName(FeatureType const & ft, SearchQueryParams const & params
|
|||
return matched;
|
||||
}
|
||||
|
||||
bool MatchFeatureByPostcode(FeatureType const & ft, v2::TokenSlice const & slice)
|
||||
{
|
||||
string const postcode = ft.GetMetadata().Get(feature::Metadata::FMD_POSTCODE);
|
||||
// TODO(@y): implement this.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Retrieves from the search index corresponding to |value| all
|
||||
// features matching to |params|.
|
||||
template <typename TValue>
|
||||
|
@ -96,16 +161,7 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl(
|
|||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
SearchQueryParams const & params)
|
||||
{
|
||||
// Exclude from search all deleted/modified features and match all edited/created features separately.
|
||||
Editor & editor = Editor::Instance();
|
||||
|
||||
auto const deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted);
|
||||
auto const modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified);
|
||||
auto const filter = [&](uint32_t featureIndex) -> bool
|
||||
{
|
||||
return (!binary_search(deleted.begin(), deleted.end(), featureIndex) &&
|
||||
!binary_search(modified.begin(), modified.end(), featureIndex));
|
||||
};
|
||||
EditedFeaturesHolder holder(id);
|
||||
|
||||
serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams()));
|
||||
ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
|
||||
|
@ -116,17 +172,16 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl(
|
|||
// TODO (@y, @m): This code may be optimized in the case where
|
||||
// bit vectors are sorted in the search index.
|
||||
vector<uint64_t> features;
|
||||
uint32_t counter = 0;
|
||||
auto const collector = [&](TValue const & value)
|
||||
{
|
||||
if ((++counter & 0xFF) == 0)
|
||||
BailIfCancelled(cancellable);
|
||||
features.push_back(value.m_featureId);
|
||||
};
|
||||
FeaturesCollector collector(cancellable, features);
|
||||
|
||||
MatchFeaturesInTrie(params, *trieRoot, filter, collector);
|
||||
MatchFeaturesInTrie(params, *trieRoot, [&holder](uint32_t featureIndex)
|
||||
{
|
||||
return !holder.ModifiedOrDeleted(featureIndex);
|
||||
},
|
||||
collector);
|
||||
|
||||
// Match all edited/created features separately.
|
||||
Editor & editor = Editor::Instance();
|
||||
auto const matcher = [&](uint32_t featureIndex)
|
||||
{
|
||||
FeatureType ft;
|
||||
|
@ -136,9 +191,47 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl(
|
|||
features.push_back(featureIndex);
|
||||
};
|
||||
|
||||
for_each(modified.begin(), modified.end(), matcher);
|
||||
auto const created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created);
|
||||
for_each(created.begin(), created.end(), matcher);
|
||||
holder.ForEachModifiedOrCreated(matcher);
|
||||
|
||||
return SortFeaturesAndBuildCBV(move(features));
|
||||
}
|
||||
|
||||
template <typename TValue>
|
||||
unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeaturesImpl(
|
||||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
TokenSlice const & slice)
|
||||
{
|
||||
EditedFeaturesHolder holder(id);
|
||||
|
||||
serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams()));
|
||||
ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
|
||||
|
||||
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
|
||||
SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams));
|
||||
|
||||
// TODO (@y, @m): This code may be optimized in the case where
|
||||
// bit vectors are sorted in the search index.
|
||||
vector<uint64_t> features;
|
||||
FeaturesCollector collector(cancellable, features);
|
||||
|
||||
MatchPostcodesInTrie(slice, *trieRoot, [&holder](uint32_t featureIndex)
|
||||
{
|
||||
return !holder.ModifiedOrDeleted(featureIndex);
|
||||
},
|
||||
collector);
|
||||
|
||||
// Match all edited/created features separately.
|
||||
Editor & editor = Editor::Instance();
|
||||
auto const matcher = [&](uint32_t featureIndex)
|
||||
{
|
||||
FeatureType ft;
|
||||
VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ());
|
||||
// TODO(AlexZ): Should we match by some feature's metafields too?
|
||||
if (MatchFeatureByPostcode(ft, slice))
|
||||
features.push_back(featureIndex);
|
||||
};
|
||||
|
||||
holder.ForEachModifiedOrCreated(matcher);
|
||||
|
||||
return SortFeaturesAndBuildCBV(move(features));
|
||||
}
|
||||
|
@ -149,42 +242,74 @@ unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeaturesImpl(
|
|||
v2::MwmContext const & context, my::Cancellable const & cancellable,
|
||||
covering::IntervalsT const & coverage, int scale)
|
||||
{
|
||||
uint32_t counter = 0;
|
||||
vector<uint64_t> features;
|
||||
|
||||
context.ForEachIndex(coverage, scale, [&](uint64_t featureId)
|
||||
{
|
||||
if ((++counter & 0xFF) == 0)
|
||||
BailIfCancelled(cancellable);
|
||||
features.push_back(featureId);
|
||||
});
|
||||
FeaturesCollector collector(cancellable, features);
|
||||
|
||||
context.ForEachIndex(coverage, scale, collector);
|
||||
return SortFeaturesAndBuildCBV(move(features));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct RetrieveAddressFeaturesAdaptor
|
||||
{
|
||||
template <typename... TArgs>
|
||||
unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args)
|
||||
{
|
||||
return RetrieveAddressFeaturesImpl<T>(forward<TArgs>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct RetrievePostcodeFeaturesAdaptor
|
||||
{
|
||||
template <typename... TArgs>
|
||||
unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args)
|
||||
{
|
||||
return RetrievePostcodeFeaturesImpl<T>(forward<TArgs>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class T>
|
||||
struct Selector
|
||||
{
|
||||
template <typename... TArgs>
|
||||
unique_ptr<coding::CompressedBitVector> operator()(MwmSet::MwmId const & id, MwmValue & value,
|
||||
TArgs &&... args)
|
||||
{
|
||||
version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
|
||||
|
||||
if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
|
||||
{
|
||||
T<FeatureWithRankAndCenter> t;
|
||||
return t(id, value, forward<TArgs>(args)...);
|
||||
}
|
||||
if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::CompressedBitVector)
|
||||
{
|
||||
T<FeatureIndexValue> t;
|
||||
return t(id, value, forward<TArgs>(args)...);
|
||||
}
|
||||
return unique_ptr<coding::CompressedBitVector>();
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace v2
|
||||
{
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
|
||||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
SearchQueryParams const & params)
|
||||
{
|
||||
version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
|
||||
Selector<RetrieveAddressFeaturesAdaptor> selector;
|
||||
return selector(id, value, cancellable, params);
|
||||
}
|
||||
|
||||
if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
|
||||
{
|
||||
using TValue = FeatureWithRankAndCenter;
|
||||
return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
|
||||
}
|
||||
else if (mwmTraits.GetSearchIndexFormat() ==
|
||||
version::MwmTraits::SearchIndexFormat::CompressedBitVector)
|
||||
{
|
||||
using TValue = FeatureIndexValue;
|
||||
return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
|
||||
}
|
||||
return unique_ptr<coding::CompressedBitVector>();
|
||||
unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures(
|
||||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
TokenSlice const & slice)
|
||||
{
|
||||
Selector<RetrievePostcodeFeaturesAdaptor> selector;
|
||||
return selector(id, value, cancellable, slice);
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
|
||||
|
|
|
@ -21,16 +21,24 @@ namespace search
|
|||
namespace v2
|
||||
{
|
||||
class MwmContext;
|
||||
class TokenSlice;
|
||||
|
||||
// Retrieves from the search index corresponding to |value| all
|
||||
// features matching to |params|.
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
|
||||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
SearchQueryParams const & params);
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmId const & id,
|
||||
MwmValue & value,
|
||||
my::Cancellable const & cancellable,
|
||||
SearchQueryParams const & params);
|
||||
|
||||
// Retrieves from the search index corresponding to |value| all
|
||||
// postcodes matching to |slice|.
|
||||
unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures(
|
||||
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
|
||||
TokenSlice const & slice);
|
||||
|
||||
// Retrieves from the geometry index corresponding to |value| all features belonging to |rect|.
|
||||
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
|
||||
MwmContext const & context, my::Cancellable const & cancellable,
|
||||
m2::RectD const & rect, int scale);
|
||||
MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect,
|
||||
int scale);
|
||||
} // namespace v2
|
||||
} // namespace search
|
||||
|
|
|
@ -54,6 +54,7 @@ HEADERS += \
|
|||
v2/locality_scorer.hpp \
|
||||
v2/mwm_context.hpp \
|
||||
v2/nested_rects_cache.hpp \
|
||||
v2/postcodes_matcher.hpp \
|
||||
v2/pre_ranking_info.hpp \
|
||||
v2/rank_table_cache.hpp \
|
||||
v2/ranking_info.hpp \
|
||||
|
@ -62,6 +63,7 @@ HEADERS += \
|
|||
v2/search_query_v2.hpp \
|
||||
v2/stats_cache.hpp \
|
||||
v2/street_vicinity_loader.hpp \
|
||||
v2/token_slice.hpp \
|
||||
|
||||
SOURCES += \
|
||||
approximate_string_match.cpp \
|
||||
|
@ -99,6 +101,7 @@ SOURCES += \
|
|||
v2/locality_scorer.cpp \
|
||||
v2/mwm_context.cpp \
|
||||
v2/nested_rects_cache.cpp \
|
||||
v2/postcodes_matcher.cpp \
|
||||
v2/pre_ranking_info.cpp \
|
||||
v2/rank_table_cache.cpp \
|
||||
v2/ranking_info.cpp \
|
||||
|
@ -106,3 +109,4 @@ SOURCES += \
|
|||
v2/search_model.cpp \
|
||||
v2/search_query_v2.cpp \
|
||||
v2/street_vicinity_loader.cpp \
|
||||
v2/token_slice.cpp \
|
||||
|
|
|
@ -33,7 +33,14 @@ void SearchTest::RegisterCountry(string const & name, m2::RectD const & rect)
|
|||
bool SearchTest::ResultsMatch(string const & query,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules)
|
||||
{
|
||||
tests_support::TestSearchRequest request(m_engine, query, "en", Mode::Everywhere, m_viewport);
|
||||
return ResultsMatch(query, "en" /* locale */, rules);
|
||||
}
|
||||
|
||||
bool SearchTest::ResultsMatch(string const & query,
|
||||
string const & locale,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules)
|
||||
{
|
||||
tests_support::TestSearchRequest request(m_engine, query, locale, Mode::Everywhere, m_viewport);
|
||||
request.Wait();
|
||||
return MatchResults(m_engine, rules, request.Results());
|
||||
}
|
||||
|
|
|
@ -72,11 +72,26 @@ public:
|
|||
return id;
|
||||
}
|
||||
|
||||
template <typename TBuildFn>
|
||||
MwmSet::MwmId BuildWorld(TBuildFn && fn)
|
||||
{
|
||||
return BuildMwm("testWorld", feature::DataHeader::world, forward<TBuildFn>(fn));
|
||||
}
|
||||
|
||||
template <typename TBuildFn>
|
||||
MwmSet::MwmId BuildCountry(string const & name, TBuildFn && fn)
|
||||
{
|
||||
return BuildMwm(name, feature::DataHeader::country, forward<TBuildFn>(fn));
|
||||
}
|
||||
|
||||
inline void SetViewport(m2::RectD const & viewport) { m_viewport = viewport; }
|
||||
|
||||
bool ResultsMatch(string const & query,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
|
||||
|
||||
bool ResultsMatch(string const & query, string const & locale,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
|
||||
|
||||
bool ResultsMatch(string const & query, Mode mode,
|
||||
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
|
||||
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "search/retrieval.hpp"
|
||||
#include "search/search_integration_tests/helpers.hpp"
|
||||
#include "search/search_tests_support/test_feature.hpp"
|
||||
#include "search/search_tests_support/test_mwm_builder.hpp"
|
||||
#include "search/search_tests_support/test_results_matching.hpp"
|
||||
#include "search/search_tests_support/test_search_request.hpp"
|
||||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
#include "indexer/feature.hpp"
|
||||
#include "indexer/index.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
@ -77,39 +82,38 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke)
|
|||
TestPOI lantern1(m2::PointD(10.0005, 10.0005), "lantern 1", "en");
|
||||
TestPOI lantern2(m2::PointD(10.0006, 10.0005), "lantern 2", "en");
|
||||
|
||||
BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderlandCountry);
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
});
|
||||
auto wonderlandId =
|
||||
BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
builder.Add(longPondVillage);
|
||||
BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderlandCountry);
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
});
|
||||
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
builder.Add(longPondVillage);
|
||||
|
||||
builder.Add(feynmanStreet);
|
||||
builder.Add(bohrStreet1);
|
||||
builder.Add(bohrStreet2);
|
||||
builder.Add(bohrStreet3);
|
||||
builder.Add(firstAprilStreet);
|
||||
builder.Add(feynmanStreet);
|
||||
builder.Add(bohrStreet1);
|
||||
builder.Add(bohrStreet2);
|
||||
builder.Add(bohrStreet3);
|
||||
builder.Add(firstAprilStreet);
|
||||
|
||||
builder.Add(feynmanHouse);
|
||||
builder.Add(bohrHouse);
|
||||
builder.Add(hilbertHouse);
|
||||
builder.Add(descartesHouse);
|
||||
builder.Add(bornHouse);
|
||||
builder.Add(feynmanHouse);
|
||||
builder.Add(bohrHouse);
|
||||
builder.Add(hilbertHouse);
|
||||
builder.Add(descartesHouse);
|
||||
builder.Add(bornHouse);
|
||||
|
||||
builder.Add(busStop);
|
||||
builder.Add(tramStop);
|
||||
builder.Add(quantumTeleport1);
|
||||
builder.Add(quantumTeleport2);
|
||||
builder.Add(quantumCafe);
|
||||
builder.Add(lantern1);
|
||||
builder.Add(lantern2);
|
||||
});
|
||||
builder.Add(busStop);
|
||||
builder.Add(tramStop);
|
||||
builder.Add(quantumTeleport1);
|
||||
builder.Add(quantumTeleport2);
|
||||
builder.Add(quantumCafe);
|
||||
builder.Add(lantern1);
|
||||
builder.Add(lantern2);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
|
||||
{
|
||||
|
@ -182,11 +186,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld)
|
|||
TestCountry wonderland(m2::PointD(0, 0), countryName, "en");
|
||||
TestCity losAlamos(m2::PointD(0, 0), "Los Alamos", "en", 100 /* rank */);
|
||||
|
||||
auto testWorldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderland);
|
||||
builder.Add(losAlamos);
|
||||
});
|
||||
auto testWorldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderland);
|
||||
builder.Add(losAlamos);
|
||||
});
|
||||
RegisterCountry(countryName, m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.5, -0.5)));
|
||||
|
@ -213,16 +217,15 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchByName)
|
|||
"Hyde Park", "en");
|
||||
TestPOI cafe(m2::PointD(1.0, 1.0), "London Cafe", "en");
|
||||
|
||||
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london);
|
||||
});
|
||||
auto wonderlandId =
|
||||
BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(hydePark);
|
||||
builder.Add(cafe);
|
||||
});
|
||||
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london);
|
||||
});
|
||||
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(hydePark);
|
||||
builder.Add(cafe);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.9, -0.9)));
|
||||
{
|
||||
|
@ -248,11 +251,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, DisableSuggests)
|
|||
TestCity london1(m2::PointD(1, 1), "London", "en", 100 /* rank */);
|
||||
TestCity london2(m2::PointD(-1, -1), "London", "en", 100 /* rank */);
|
||||
|
||||
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london1);
|
||||
builder.Add(london2);
|
||||
});
|
||||
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(london1);
|
||||
builder.Add(london2);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(0.5, 0.5), m2::PointD(1.5, 1.5)));
|
||||
{
|
||||
|
@ -299,21 +302,20 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
|
|||
TestPOI cafe2(m2::PointD(-0.99, -0.99), "", "en");
|
||||
cafe2.SetTypes({{"amenity", "cafe"}});
|
||||
|
||||
|
||||
auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(sanFrancisco);
|
||||
builder.Add(lermontovo);
|
||||
});
|
||||
auto wonderlandId = BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(cafe1);
|
||||
builder.Add(cafe2);
|
||||
builder.Add(goldenGateBridge);
|
||||
builder.Add(goldenGateStreet);
|
||||
builder.Add(lermontov);
|
||||
builder.Add(waterfall);
|
||||
});
|
||||
auto worldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(sanFrancisco);
|
||||
builder.Add(lermontovo);
|
||||
});
|
||||
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(cafe1);
|
||||
builder.Add(cafe2);
|
||||
builder.Add(goldenGateBridge);
|
||||
builder.Add(goldenGateStreet);
|
||||
builder.Add(lermontov);
|
||||
builder.Add(waterfall);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
|
||||
{
|
||||
|
@ -359,5 +361,65 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
|
|||
TEST(ResultsMatch("waterfall", rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
|
||||
{
|
||||
string const countryName = "Russia";
|
||||
|
||||
TestCity city(m2::PointD(0, 0), "Долгопрудный", "ru", 100 /* rank */);
|
||||
TestStreet street(
|
||||
vector<m2::PointD>{m2::PointD(-0.5, 0.0), m2::PointD(0, 0), m2::PointD(0.5, 0.0)},
|
||||
"Первомайская", "ru");
|
||||
TestBuilding building(m2::PointD(0.0, 0.00001), "", "28 а", street, "ru");
|
||||
building.SetPostcode("141701");
|
||||
|
||||
BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(city);
|
||||
});
|
||||
auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(street);
|
||||
builder.Add(building);
|
||||
});
|
||||
|
||||
// Tests that postcode is added to the search index.
|
||||
{
|
||||
auto handle = m_engine.GetMwmHandleById(countryId);
|
||||
TEST(handle.IsAlive(), ());
|
||||
my::Cancellable cancellable;
|
||||
|
||||
SearchQueryParams params;
|
||||
params.m_tokens.emplace_back();
|
||||
params.m_tokens.back().push_back(PostcodeToString(strings::MakeUniString("141701")));
|
||||
auto * value = handle.GetValue<MwmValue>();
|
||||
auto features = v2::RetrievePostcodeFeatures(countryId, *value, cancellable,
|
||||
TokenSlice(params, 0, params.m_tokens.size()));
|
||||
TEST_EQUAL(1, features->PopCount(), ());
|
||||
|
||||
uint64_t index = 0;
|
||||
while (!features->GetBit(index))
|
||||
++index;
|
||||
|
||||
Index::FeaturesLoaderGuard loader(m_engine, countryId);
|
||||
FeatureType ft;
|
||||
loader.GetFeatureByIndex(index, ft);
|
||||
|
||||
auto rule = ExactMatch(countryId, building);
|
||||
TEST(rule->Matches(ft), ());
|
||||
}
|
||||
{
|
||||
TRules rules{ExactMatch(countryId, building)};
|
||||
TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru" /* locale */, rules), ());
|
||||
}
|
||||
|
||||
// TODO (@y): uncomment this test and add more tests when postcodes
|
||||
// search will be implemented.
|
||||
//
|
||||
// {
|
||||
// TRules rules{ExactMatch(countryId, building)};
|
||||
// TEST(ResultsMatch("Долгопрудный первомайская 28а, 141701", "ru" /* locale */, rules), ());
|
||||
// }
|
||||
}
|
||||
} // namespace
|
||||
} // namespace search
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "search/v2/pre_ranking_info.hpp"
|
||||
#include "search/v2/ranking_info.hpp"
|
||||
#include "search/v2/ranking_utils.hpp"
|
||||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
#include "storage/country_info_getter.hpp"
|
||||
#include "storage/index.hpp"
|
||||
|
@ -464,10 +465,8 @@ void Query::SetQuery(string const & query)
|
|||
search::Delimiters delims;
|
||||
SplitUniString(NormalizeAndSimplifyString(query), MakeBackInsertFunctor(m_tokens), delims);
|
||||
|
||||
bool checkPrefix = true;
|
||||
|
||||
// Assign prefix with last parsed token.
|
||||
if (checkPrefix && !m_tokens.empty() && !delims(strings::LastUniChar(query)))
|
||||
if (!m_tokens.empty() && !delims(strings::LastUniChar(query)))
|
||||
{
|
||||
m_prefix.swap(m_tokens.back());
|
||||
m_tokens.pop_back();
|
||||
|
@ -623,7 +622,7 @@ class PreResult2Maker
|
|||
|
||||
info.m_nameScore = v2::NAME_SCORE_ZERO;
|
||||
|
||||
v2::TokensSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken);
|
||||
v2::TokenSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken);
|
||||
|
||||
for (auto const & lang : m_params.m_langs)
|
||||
{
|
||||
|
|
72
search/search_tests/postcodes_matcher_tests.cpp
Normal file
72
search/search_tests/postcodes_matcher_tests.cpp
Normal file
|
@ -0,0 +1,72 @@
|
|||
#include "../../testing/testing.hpp"
|
||||
|
||||
#include "search/search_query_params.hpp"
|
||||
#include "search/v2/postcodes_matcher.hpp"
|
||||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/string.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
using namespace strings;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
namespace
|
||||
{
|
||||
bool LooksLikePostcode(string const & s, bool checkPrefix)
|
||||
{
|
||||
vector<UniString> tokens;
|
||||
bool const lastTokenIsPrefix =
|
||||
TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters());
|
||||
|
||||
size_t const numTokens = tokens.size();
|
||||
|
||||
SearchQueryParams params;
|
||||
if (checkPrefix && lastTokenIsPrefix)
|
||||
{
|
||||
params.m_prefixTokens.push_back(tokens.back());
|
||||
tokens.pop_back();
|
||||
}
|
||||
|
||||
for (auto const & token : tokens)
|
||||
{
|
||||
params.m_tokens.emplace_back();
|
||||
params.m_tokens.back().push_back(token);
|
||||
}
|
||||
|
||||
return LooksLikePostcode(TokenSlice(params, 0, numTokens));
|
||||
}
|
||||
|
||||
UNIT_TEST(PostcodesMatcher_Smoke)
|
||||
{
|
||||
TEST(LooksLikePostcode("141701", false /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("141", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA22 9HR", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("BA22", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("DE56 4FW", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("NY 1000", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("AZ 85203", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("AZ", true /* checkPrefix */), ());
|
||||
|
||||
TEST(LooksLikePostcode("803 0271", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("803-0271", true /* checkPrefix */), ());
|
||||
TEST(LooksLikePostcode("〒803-0271", true /* checkPrefix */), ());
|
||||
|
||||
TEST(!LooksLikePostcode("1 мая", true /* checkPrefix */), ());
|
||||
TEST(!LooksLikePostcode("1 мая улица", true /* checkPrefix */), ());
|
||||
TEST(!LooksLikePostcode("москва", true /* checkPrefix */), ());
|
||||
TEST(!LooksLikePostcode("39 с 79", true /* checkPrefix */), ());
|
||||
}
|
||||
} // namespace
|
||||
} // namespace v2
|
||||
} // namespace search
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "search/search_query_params.hpp"
|
||||
#include "search/v2/ranking_utils.hpp"
|
||||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
@ -32,7 +33,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken,
|
|||
params.m_prefixTokens.swap(params.m_tokens.back());
|
||||
params.m_tokens.pop_back();
|
||||
}
|
||||
return GetNameScore(name, TokensSlice(params, startToken, endToken));
|
||||
return GetNameScore(name, TokenSlice(params, startToken, endToken));
|
||||
}
|
||||
|
||||
UNIT_TEST(NameTest_Smoke)
|
||||
|
|
|
@ -27,6 +27,7 @@ SOURCES += \
|
|||
latlon_match_test.cpp \
|
||||
locality_finder_test.cpp \
|
||||
locality_scorer_test.cpp \
|
||||
postcodes_matcher_tests.cpp \
|
||||
query_saver_tests.cpp \
|
||||
ranking_tests.cpp \
|
||||
string_intersection_test.cpp \
|
||||
|
|
|
@ -19,6 +19,7 @@ public:
|
|||
|
||||
bool Matches(FeatureType const & feature) const;
|
||||
inline void SetPostcode(string const & postcode) { m_postcode = postcode; }
|
||||
inline uint64_t GetId() const { return m_id; }
|
||||
inline string const & GetName() const { return m_name; }
|
||||
|
||||
virtual void Serialize(FeatureBuilder1 & fb) const;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
namespace search
|
||||
{
|
||||
static const uint8_t kCategoriesLang = 128;
|
||||
static const uint8_t kPostcodesLang = 129;
|
||||
static const uint8_t kPointCodingBits = 20;
|
||||
} // namespace search
|
||||
|
||||
|
|
|
@ -979,6 +979,8 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter)
|
|||
m_filter = &filter;
|
||||
MY_SCOPE_GUARD(resetFilter, [&]() { m_filter = nullptr; });
|
||||
|
||||
// TODO (@y): implement postcodes matching here.
|
||||
|
||||
// The order is rather important. Match streets first, then all other stuff.
|
||||
GreedilyMatchStreets();
|
||||
MatchPOIsAndBuildings(0 /* curToken */);
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#include "search/v2/locality_scorer.hpp"
|
||||
|
||||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
|
||||
namespace search
|
||||
|
@ -99,8 +101,8 @@ void LocalityScorer::SortByName(vector<ExLocality> & ls) const
|
|||
auto score = NAME_SCORE_ZERO;
|
||||
for (auto const & name : names)
|
||||
{
|
||||
score = max(score, GetNameScore(name, v2::TokensSlice(m_params, l.m_locality.m_startToken,
|
||||
l.m_locality.m_endToken)));
|
||||
score = max(score, GetNameScore(name, v2::TokenSlice(m_params, l.m_locality.m_startToken,
|
||||
l.m_locality.m_endToken)));
|
||||
}
|
||||
l.m_nameScore = score;
|
||||
}
|
||||
|
|
168
search/v2/postcodes_matcher.cpp
Normal file
168
search/v2/postcodes_matcher.cpp
Normal file
|
@ -0,0 +1,168 @@
|
|||
#include "search/v2/postcodes_matcher.hpp"
|
||||
|
||||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/macros.hpp"
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/transform_iterator.hpp"
|
||||
#include "std/unique_ptr.hpp"
|
||||
#include "std/utility.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
using namespace strings;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// Top patterns for postcodes. See
|
||||
// search/search_quality/clusterize_postcodes.lisp for details how
|
||||
// these patterns were constructed.
|
||||
char const * const g_patterns[] = {
|
||||
"aa nnnn", "aa nnnnn", "aaa nnnn", "aan", "aan naa", "aana naa", "aann",
|
||||
"aann naa", "aannaa", "aannnaa", "aannnn", "an naa", "ana naa", "ana nan",
|
||||
"ananan", "ann aann", "ann naa", "annnnaaa", "nn nnn", "nnn", "nnn nn",
|
||||
"nnn nnn", "nnn nnnn", "nnnn", "nnnn aa", "nnnn nnn", "nnnnaa", "nnnnn",
|
||||
"nnnnn nnn", "nnnnn nnnn", "nnnnn nnnnn", "nnnnnn", "nnnnnnn", "nnnnnnnn", "〒nnn nnnn"};
|
||||
|
||||
UniChar SimplifyChar(UniChar const & c)
|
||||
{
|
||||
if (IsASCIIDigit(c))
|
||||
return 'n';
|
||||
if (IsASCIILatin(c))
|
||||
return 'a';
|
||||
return c;
|
||||
}
|
||||
|
||||
struct Node
|
||||
{
|
||||
Node() : m_isLeaf(false) {}
|
||||
|
||||
Node const * Move(UniChar c) const
|
||||
{
|
||||
for (auto const & p : m_moves)
|
||||
{
|
||||
if (p.first == c)
|
||||
return p.second.get();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename TIt>
|
||||
Node const * Move(TIt begin, TIt end) const
|
||||
{
|
||||
Node const * cur = this;
|
||||
for (; begin != end && cur; ++begin)
|
||||
cur = cur->Move(*begin);
|
||||
return cur;
|
||||
}
|
||||
|
||||
Node & MakeMove(UniChar c)
|
||||
{
|
||||
for (auto const & p : m_moves)
|
||||
{
|
||||
if (p.first == c)
|
||||
return *p.second;
|
||||
}
|
||||
m_moves.emplace_back(c, make_unique<Node>());
|
||||
return *m_moves.back().second;
|
||||
}
|
||||
|
||||
template <typename TIt>
|
||||
Node & MakeMove(TIt begin, TIt end)
|
||||
{
|
||||
Node * cur = this;
|
||||
for (; begin != end; ++begin)
|
||||
cur = &cur->MakeMove(*begin);
|
||||
return *cur;
|
||||
}
|
||||
|
||||
buffer_vector<pair<UniChar, unique_ptr<Node>>, 2> m_moves;
|
||||
bool m_isLeaf;
|
||||
|
||||
DISALLOW_COPY(Node);
|
||||
};
|
||||
|
||||
// This class puts all strings from g_patterns to a trie with a low
|
||||
// branching factor and matches queries against these patterns.
|
||||
class PostcodesMatcher
|
||||
{
|
||||
public:
|
||||
PostcodesMatcher() : m_root(), m_maxNumTokensInPostcode(0)
|
||||
{
|
||||
search::Delimiters delimiters;
|
||||
for (auto const * pattern : g_patterns)
|
||||
AddString(MakeUniString(pattern), delimiters);
|
||||
}
|
||||
|
||||
// Checks that given tokens match to at least one of postcodes
|
||||
// patterns.
|
||||
//
|
||||
// Complexity: O(total length of tokens in |slice|).
|
||||
bool HasString(TokenSlice const & slice) const
|
||||
{
|
||||
Node const * cur = &m_root;
|
||||
for (size_t i = 0; i < slice.Size() && cur; ++i)
|
||||
{
|
||||
auto const & s = slice.Get(i).front();
|
||||
cur = cur->Move(make_transform_iterator(s.begin(), &SimplifyChar),
|
||||
make_transform_iterator(s.end(), &SimplifyChar));
|
||||
if (cur && i + 1 < slice.Size())
|
||||
cur = cur->Move(' ');
|
||||
}
|
||||
|
||||
if (!cur)
|
||||
return false;
|
||||
|
||||
if (slice.Size() > 0 && slice.IsPrefix(slice.Size() - 1))
|
||||
return true;
|
||||
|
||||
return cur->m_isLeaf;
|
||||
}
|
||||
|
||||
inline size_t GetMaxNumTokensInPostcode() const { return m_maxNumTokensInPostcode; }
|
||||
|
||||
private:
|
||||
void AddString(UniString const & s, search::Delimiters & delimiters)
|
||||
{
|
||||
vector<UniString> tokens;
|
||||
SplitUniString(s, MakeBackInsertFunctor(tokens), delimiters);
|
||||
m_maxNumTokensInPostcode = max(m_maxNumTokensInPostcode, tokens.size());
|
||||
|
||||
Node * cur = &m_root;
|
||||
for (size_t i = 0; i < tokens.size(); ++i)
|
||||
{
|
||||
cur = &cur->MakeMove(tokens[i].begin(), tokens[i].end());
|
||||
if (i + 1 != tokens.size())
|
||||
cur = &cur->MakeMove(' ');
|
||||
}
|
||||
cur->m_isLeaf = true;
|
||||
}
|
||||
|
||||
Node m_root;
|
||||
|
||||
size_t m_maxNumTokensInPostcode;
|
||||
|
||||
DISALLOW_COPY(PostcodesMatcher);
|
||||
};
|
||||
|
||||
PostcodesMatcher const & GetPostcodesMatcher()
|
||||
{
|
||||
static PostcodesMatcher kMatcher;
|
||||
return kMatcher;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool LooksLikePostcode(TokenSlice const & slice) { return GetPostcodesMatcher().HasString(slice); }
|
||||
|
||||
size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); }
|
||||
} // namespace v2
|
||||
} // namespace search
|
15
search/v2/postcodes_matcher.hpp
Normal file
15
search/v2/postcodes_matcher.hpp
Normal file
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include "std/cstdint.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
class TokenSlice;
|
||||
|
||||
bool LooksLikePostcode(TokenSlice const & slice);
|
||||
|
||||
size_t GetMaxNumTokensInPostcode();
|
||||
} // namespace v2
|
||||
} // namespace search
|
|
@ -7,7 +7,6 @@
|
|||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
|
@ -42,74 +41,6 @@ enum NameScore
|
|||
NAME_SCORE_COUNT
|
||||
};
|
||||
|
||||
class TokensSlice
|
||||
{
|
||||
public:
|
||||
TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
}
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_offset + i);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_size; }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_offset + i == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
size_t const m_offset;
|
||||
size_t const m_size;
|
||||
};
|
||||
|
||||
class TokensSliceNoCategories
|
||||
{
|
||||
public:
|
||||
TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
|
||||
m_indexes.reserve(endToken - startToken);
|
||||
for (size_t i = startToken; i < endToken; ++i)
|
||||
{
|
||||
if (!m_params.m_isCategorySynonym[i])
|
||||
m_indexes.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_indexes[i]);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_indexes.size(); }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_indexes[i] == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
vector<size_t> m_indexes;
|
||||
};
|
||||
|
||||
template <typename TSlice>
|
||||
NameScore GetNameScore(string const & name, TSlice const & slice)
|
||||
{
|
||||
|
|
27
search/v2/token_slice.cpp
Normal file
27
search/v2/token_slice.cpp
Normal file
|
@ -0,0 +1,27 @@
|
|||
#include "search/v2/token_slice.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
TokenSlice::TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
}
|
||||
|
||||
TokenSliceNoCategories::TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken,
|
||||
size_t endToken)
|
||||
: m_params(params)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
|
||||
m_indexes.reserve(endToken - startToken);
|
||||
for (size_t i = startToken; i < endToken; ++i)
|
||||
{
|
||||
if (!m_params.m_isCategorySynonym[i])
|
||||
m_indexes.push_back(i);
|
||||
}
|
||||
}
|
||||
} // namespace v2
|
||||
} // namespace search
|
67
search/v2/token_slice.hpp
Normal file
67
search/v2/token_slice.hpp
Normal file
|
@ -0,0 +1,67 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/search_query_params.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include "std/cstdint.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace v2
|
||||
{
|
||||
class TokenSlice
|
||||
{
|
||||
public:
|
||||
TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken);
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_offset + i);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_size; }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_offset + i == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
size_t const m_offset;
|
||||
size_t const m_size;
|
||||
};
|
||||
|
||||
class TokenSliceNoCategories
|
||||
{
|
||||
public:
|
||||
TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken);
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_indexes[i]);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_indexes.size(); }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_indexes[i] == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
vector<size_t> m_indexes;
|
||||
};
|
||||
} // namespace v2
|
||||
} // namespace search
|
Loading…
Add table
Reference in a new issue