forked from organicmaps/organicmaps
[search] Avoid fancy categories matching (like cafe <-> care).
Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
This commit is contained in:
parent
939811a031
commit
37fd63e05f
4 changed files with 59 additions and 2 deletions
|
@ -55,12 +55,26 @@ size_t GetMaxErrorsForToken(strings::UniString const & token)
|
|||
|
||||
strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s)
|
||||
{
|
||||
ASSERT(!s.empty(), ());
|
||||
// In search we use LevenshteinDFAs for fuzzy matching. But due to
|
||||
// performance reasons, we limit prefix misprints to fixed set of substitutions defined in
|
||||
// kAllowedMisprints and skipped letters.
|
||||
return strings::LevenshteinDFA(s, 1 /* prefixSize */, kAllowedMisprints, GetMaxErrorsForToken(s));
|
||||
}
|
||||
|
||||
strings::LevenshteinDFA BuildLevenshteinDFA_Category(strings::UniString const & s)
|
||||
{
|
||||
// https://github.com/organicmaps/organicmaps/issues/3655
|
||||
// Separate DFA for categories (token's length <= 4 means no errors allowed) to avoid fancy matchings like:
|
||||
// cafe <-> care
|
||||
// ecco -> eco
|
||||
// shop <-> shoe
|
||||
/// @todo "hote" doesn't match "hotel" now. Should allow _adding_ symbols when size == 4.
|
||||
|
||||
ASSERT(!s.empty(), ());
|
||||
return strings::LevenshteinDFA(s, 1 /* prefixSize */, kAllowedMisprints, GetMaxErrorsForTokenLength(s.size() - 1));
|
||||
}
|
||||
|
||||
UniString NormalizeAndSimplifyString(string_view s)
|
||||
{
|
||||
UniString uniString = MakeUniString(s);
|
||||
|
|
|
@ -22,6 +22,7 @@ inline constexpr size_t GetMaxErrorsForTokenLength(size_t length)
|
|||
size_t GetMaxErrorsForToken(strings::UniString const & token);
|
||||
|
||||
strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s);
|
||||
strings::LevenshteinDFA BuildLevenshteinDFA_Category(strings::UniString const & s);
|
||||
|
||||
// This function should be used for all search strings normalization.
|
||||
// It does some magic text transformation which greatly helps us to improve our search.
|
||||
|
|
|
@ -917,7 +917,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestCategorialSearch)
|
|||
}
|
||||
|
||||
{
|
||||
Rules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2),
|
||||
/// @todo We updated fuzzy match for categories: hote -> hotel is not matched now (4 letters input token).
|
||||
Rules const rules = {/*ExactMatch(wonderlandId, hotel1),*/ ExactMatch(wonderlandId, hotel2),
|
||||
ExactMatch(wonderlandId, hotelCafe), ExactMatch(testWorldId, homel),
|
||||
ExactMatch(wonderlandId, hotelDeVille)};
|
||||
// A prefix token.
|
||||
|
@ -3207,4 +3208,45 @@ UNIT_CLASS_TEST(ProcessorTest, Place_Region)
|
|||
TEST(ResultsMatch("carth", rules, "en"), ());
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(ProcessorTest, FuzzyCategories)
|
||||
{
|
||||
TestPOI cafe({0, 0.01}, "xxx", "en");
|
||||
cafe.SetTypes({{"amenity", "cafe"}});
|
||||
|
||||
TestPOI cosmetics({0, 0.02}, "yyy", "en");
|
||||
cosmetics.SetTypes({{"shop", "cosmetics"}});
|
||||
|
||||
TestPOI shoes({0, 0.03}, "ecco", "en");
|
||||
shoes.SetTypes({{"shop", "shoes"}});
|
||||
|
||||
TestPOI organic({0, 0.04}, "zzz", "en");
|
||||
organic.SetTypes({{"shop", "grocery"}, {"organic", "yes"}});
|
||||
|
||||
auto wonderlandId = BuildCountry("Wonderland", [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(cafe);
|
||||
builder.Add(cosmetics);
|
||||
builder.Add(shoes);
|
||||
builder.Add(organic);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(-0.5, -0.5, 0.5, 0.5));
|
||||
|
||||
{
|
||||
Rules const rules = {ExactMatch(wonderlandId, cafe)};
|
||||
TEST(ResultsMatch("cafe", rules), ());
|
||||
}
|
||||
|
||||
{
|
||||
Rules const rules = {ExactMatch(wonderlandId, shoes)};
|
||||
TEST(ResultsMatch("shoe", rules), ());
|
||||
TEST(ResultsMatch("shoes", rules), ());
|
||||
}
|
||||
|
||||
{
|
||||
Rules const rules = {ExactMatch(wonderlandId, shoes)};
|
||||
TEST(ResultsMatch("ecco", rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace processor_test
|
||||
|
|
|
@ -63,7 +63,7 @@ void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, Locales const & loc
|
|||
// A possible optimization is to build each dfa once and save it. Note that
|
||||
// dfas for the prefix tokens differ, i.e. we ignore slice.IsPrefix(i) here.
|
||||
SearchTrieRequest<strings::LevenshteinDFA> request;
|
||||
request.m_names.push_back(BuildLevenshteinDFA(slice.Get(i)));
|
||||
request.m_names.push_back(BuildLevenshteinDFA_Category(slice.Get(i)));
|
||||
request.SetLangs(locales);
|
||||
|
||||
MatchFeaturesInTrie(request, iterator,
|
||||
|
|
Loading…
Add table
Reference in a new issue