forked from organicmaps/organicmaps
[search] Match street synonyms with misprints.
This commit is contained in:
parent
07c656942f
commit
b2a8add9bc
6 changed files with 145 additions and 59 deletions
|
@ -203,10 +203,9 @@ struct FeatureNameInserter
|
|||
|
||||
if (m_hasStreetType)
|
||||
{
|
||||
search::StreetTokensFilter filter([&](strings::UniString const & token, size_t /* tag */)
|
||||
{
|
||||
AddToken(lang, token);
|
||||
});
|
||||
search::StreetTokensFilter filter(
|
||||
[&](strings::UniString const & token, size_t /* tag */) { AddToken(lang, token); },
|
||||
false /* withMisprints */);
|
||||
for (auto const & token : tokens)
|
||||
filter.Put(token, false /* isPrefix */, 0 /* tag */);
|
||||
|
||||
|
|
|
@ -18,16 +18,15 @@ namespace
|
|||
class Utf8StreetTokensFilter
|
||||
{
|
||||
public:
|
||||
explicit Utf8StreetTokensFilter(vector<pair<string, size_t>> & cont)
|
||||
explicit Utf8StreetTokensFilter(vector<pair<string, size_t>> & cont, bool withMisprints = false)
|
||||
: m_cont(cont)
|
||||
, m_filter([&](UniString const & token, size_t tag)
|
||||
{
|
||||
m_cont.emplace_back(ToUtf8(token), tag);
|
||||
})
|
||||
, m_filter(
|
||||
[&](UniString const & token, size_t tag) { m_cont.emplace_back(ToUtf8(token), tag); },
|
||||
withMisprints)
|
||||
{
|
||||
}
|
||||
|
||||
inline void Put(string const & token, bool isPrefix, size_t tag)
|
||||
void Put(string const & token, bool isPrefix, size_t tag)
|
||||
{
|
||||
m_filter.Put(MakeUniString(token), isPrefix, tag);
|
||||
}
|
||||
|
@ -162,11 +161,11 @@ UNIT_TEST(StreetPrefixMatch)
|
|||
|
||||
UNIT_TEST(StreetTokensFilter)
|
||||
{
|
||||
using TList = vector<pair<string, size_t>>;
|
||||
using List = vector<pair<string, size_t>>;
|
||||
|
||||
{
|
||||
TList expected = {};
|
||||
TList actual;
|
||||
List expected = {};
|
||||
List actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("ули", true /* isPrefix */, 0 /* tag */);
|
||||
|
@ -175,8 +174,8 @@ UNIT_TEST(StreetTokensFilter)
|
|||
}
|
||||
|
||||
{
|
||||
TList expected = {};
|
||||
TList actual;
|
||||
List expected = {};
|
||||
List actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("улица", false /* isPrefix */, 0 /* tag */);
|
||||
|
@ -185,8 +184,8 @@ UNIT_TEST(StreetTokensFilter)
|
|||
}
|
||||
|
||||
{
|
||||
TList expected = {{"генерала", 1}, {"антонова", 2}};
|
||||
TList actual;
|
||||
List expected = {{"генерала", 1}, {"антонова", 2}};
|
||||
List actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("ул", false /* isPrefix */, 0 /* tag */);
|
||||
|
@ -197,8 +196,8 @@ UNIT_TEST(StreetTokensFilter)
|
|||
}
|
||||
|
||||
{
|
||||
TList expected = {{"улица", 100}, {"набережная", 50}};
|
||||
TList actual;
|
||||
List expected = {{"улица", 100}, {"набережная", 50}};
|
||||
List actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("улица", false /* isPrefix */, 100 /* tag */);
|
||||
|
@ -208,8 +207,8 @@ UNIT_TEST(StreetTokensFilter)
|
|||
}
|
||||
|
||||
{
|
||||
TList expected = {{"улица", 0}, {"набережная", 1}, {"проспект", 2}};
|
||||
TList actual;
|
||||
List expected = {{"улица", 0}, {"набережная", 1}, {"проспект", 2}};
|
||||
List actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("улица", false /* isPrefix */, 0 /* tag */);
|
||||
|
@ -218,6 +217,42 @@ UNIT_TEST(StreetTokensFilter)
|
|||
|
||||
TEST_EQUAL(expected, actual, ());
|
||||
}
|
||||
|
||||
{
|
||||
List expectedWithMisprints = {{"ленинский", 0}};
|
||||
List expectedWithoutMisprints = {{"ленинский", 0}, {"пропект", 1}};
|
||||
List actualWithMisprints;
|
||||
List actualWithoutMisprints;
|
||||
|
||||
Utf8StreetTokensFilter filterWithMisprints(actualWithMisprints, true /* withMisprints */);
|
||||
Utf8StreetTokensFilter filterWithoutMisprints(actualWithoutMisprints,
|
||||
false /* withMisprints */);
|
||||
filterWithMisprints.Put("ленинский", false /* isPrefix */, 0 /* tag */);
|
||||
filterWithoutMisprints.Put("ленинский", false /* isPrefix */, 0 /* tag */);
|
||||
filterWithMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */);
|
||||
filterWithoutMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */);
|
||||
|
||||
TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ());
|
||||
TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ());
|
||||
}
|
||||
|
||||
{
|
||||
List expectedWithMisprints = {{"улица", 0}, {"набрежная", 1}};
|
||||
List expectedWithoutMisprints = {{"набрежная", 1}};
|
||||
List actualWithMisprints;
|
||||
List actualWithoutMisprints;
|
||||
|
||||
Utf8StreetTokensFilter filterWithMisprints(actualWithMisprints, true /* withMisprints */);
|
||||
Utf8StreetTokensFilter filterWithoutMisprints(actualWithoutMisprints,
|
||||
false /* withMisprints */);
|
||||
filterWithMisprints.Put("улица", false /* isPrefix */, 0 /* tag */);
|
||||
filterWithoutMisprints.Put("улица", false /* isPrefix */, 0 /* tag */);
|
||||
filterWithMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */);
|
||||
filterWithoutMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */);
|
||||
|
||||
TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ());
|
||||
TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(NormalizeAndSimplifyString_Numero)
|
||||
|
|
|
@ -435,7 +435,14 @@ bool ContainsNormalized(string const & str, string const & substr)
|
|||
// StreetTokensFilter ------------------------------------------------------------------------------
|
||||
void StreetTokensFilter::Put(strings::UniString const & token, bool isPrefix, size_t tag)
|
||||
{
|
||||
if ((isPrefix && IsStreetSynonymPrefix(token)) || (!isPrefix && IsStreetSynonym(token)))
|
||||
using IsStreetChecker = std::function<bool(strings::UniString const &)>;
|
||||
|
||||
IsStreetChecker isStreet = m_withMisprints ? IsStreetSynonymWithMisprints : IsStreetSynonym;
|
||||
IsStreetChecker isStreetPrefix =
|
||||
m_withMisprints ? IsStreetSynonymPrefixWithMisprints : IsStreetSynonymPrefix;
|
||||
|
||||
auto const isStreetSynonym = isStreet(token);
|
||||
if ((isPrefix && isStreetPrefix(token)) || (!isPrefix && isStreetSynonym))
|
||||
{
|
||||
++m_numSynonyms;
|
||||
if (m_numSynonyms == 1)
|
||||
|
@ -446,7 +453,7 @@ void StreetTokensFilter::Put(strings::UniString const & token, bool isPrefix, si
|
|||
}
|
||||
|
||||
// Do not emit delayed token for incomplete street synonym.
|
||||
if ((!isPrefix || IsStreetSynonym(token)) && m_numSynonyms == 2)
|
||||
if ((!isPrefix || isStreetSynonym) && m_numSynonyms == 2)
|
||||
EmitToken(m_delayedToken, m_delayedTag);
|
||||
}
|
||||
EmitToken(token, tag);
|
||||
|
|
|
@ -103,8 +103,9 @@ class StreetTokensFilter
|
|||
public:
|
||||
using Callback = std::function<void(strings::UniString const & token, size_t tag)>;
|
||||
|
||||
template <typename TC>
|
||||
explicit StreetTokensFilter(TC && callback) : m_callback(std::forward<TC>(callback))
|
||||
template <typename C>
|
||||
StreetTokensFilter(C && callback, bool withMisprints)
|
||||
: m_callback(std::forward<C>(callback)), m_withMisprints(withMisprints)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -125,5 +126,6 @@ private:
|
|||
size_t m_numSynonyms = 0;
|
||||
|
||||
Callback m_callback;
|
||||
bool m_withMisprints = false;
|
||||
};
|
||||
} // namespace search
|
||||
|
|
|
@ -2018,5 +2018,41 @@ UNIT_CLASS_TEST(ProcessorTest, Strasse)
|
|||
checkNoErrors("xyz", rules);
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(ProcessorTest, StreetSynonymsWithMisprints)
|
||||
{
|
||||
string const countryName = "Wonderland";
|
||||
|
||||
TestStreet leninsky(vector<m2::PointD>{m2::PointD(0.0, -1.0), m2::PointD(0.0, 1.0)},
|
||||
"Ленинский проспект", "ru");
|
||||
TestStreet nabrezhnaya(vector<m2::PointD>{m2::PointD(1.0, -1.0), m2::PointD(1.0, 1.0)},
|
||||
"улица набрежная", "ru");
|
||||
TestStreet naberezhnaya(vector<m2::PointD>{m2::PointD(2.0, -1.0), m2::PointD(2.0, 1.0)},
|
||||
"улица набережная", "ru");
|
||||
|
||||
auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder) {
|
||||
builder.Add(leninsky);
|
||||
builder.Add(nabrezhnaya);
|
||||
builder.Add(naberezhnaya);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(0.0, -1.0), m2::PointD(2.0, 1.0)));
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, leninsky)};
|
||||
TEST(ResultsMatch("ленинский проспект", rules), ());
|
||||
TEST(ResultsMatch("ленинский пропект", rules), ());
|
||||
TEST(ResultsMatch("ленинский", rules), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, nabrezhnaya), ExactMatch(countryId, naberezhnaya)};
|
||||
TEST(ResultsMatch("улица набрежная", rules), ());
|
||||
TEST(ResultsMatch("набрежная", rules), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, naberezhnaya)};
|
||||
TEST(ResultsMatch("улица набережная", rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace search
|
||||
|
|
|
@ -124,48 +124,55 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const &
|
|||
}
|
||||
};
|
||||
|
||||
StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag)
|
||||
{
|
||||
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
|
||||
if (tag < curToken)
|
||||
auto findStreets = [&](bool withMisprints)
|
||||
{
|
||||
StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag)
|
||||
{
|
||||
// This is the case for delayed
|
||||
// street synonym. Therefore,
|
||||
// |streets| is temporarily in the
|
||||
// incomplete state.
|
||||
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
|
||||
if (tag < curToken)
|
||||
{
|
||||
// This is the case for delayed
|
||||
// street synonym. Therefore,
|
||||
// |streets| is temporarily in the
|
||||
// incomplete state.
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
|
||||
incomplete = true;
|
||||
return;
|
||||
}
|
||||
ASSERT_EQUAL(tag, curToken, ());
|
||||
|
||||
// |streets| will become empty after
|
||||
// the intersection. Therefore we need
|
||||
// to create streets layer right now.
|
||||
if (buffer.IsEmpty())
|
||||
emit();
|
||||
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
incomplete = false;
|
||||
},
|
||||
withMisprints);
|
||||
|
||||
incomplete = true;
|
||||
return;
|
||||
}
|
||||
ASSERT_EQUAL(tag, curToken, ());
|
||||
for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty();
|
||||
++curToken)
|
||||
{
|
||||
auto const & token = params.GetToken(curToken).GetOriginal();
|
||||
bool const isPrefix = params.IsPrefixToken(curToken);
|
||||
|
||||
// |streets| will become empty after
|
||||
// the intersection. Therefore we need
|
||||
// to create streets layer right now.
|
||||
if (buffer.IsEmpty())
|
||||
emit();
|
||||
if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
|
||||
emit();
|
||||
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
incomplete = false;
|
||||
});
|
||||
filter.Put(token, isPrefix, curToken);
|
||||
}
|
||||
emit();
|
||||
};
|
||||
|
||||
for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty();
|
||||
++curToken)
|
||||
{
|
||||
auto const & token = params.GetToken(curToken).GetOriginal();
|
||||
bool const isPrefix = params.IsPrefixToken(curToken);
|
||||
|
||||
if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
|
||||
emit();
|
||||
|
||||
filter.Put(token, isPrefix, curToken);
|
||||
}
|
||||
emit();
|
||||
findStreets(false /* withMisprints */);
|
||||
findStreets(true /* withMisprints */);
|
||||
}
|
||||
}
|
||||
} // namespace search
|
||||
|
|
Loading…
Add table
Reference in a new issue