forked from organicmaps/organicmaps
[search] Fixed streets matching.
This commit is contained in:
parent
01db5017dc
commit
683250079a
7 changed files with 212 additions and 60 deletions
|
@ -176,33 +176,19 @@ public:
|
|||
tokens.resize(maxTokensCount);
|
||||
}
|
||||
|
||||
// Streets are a special case: we do not add the token "street" and its
|
||||
// synonyms when the feature's name contains it because in
|
||||
// the search phase this part of the query will be matched against the
|
||||
// "street" in the categories branch of the search index.
|
||||
// However, we still add it when there are two or more street tokens
|
||||
// ("avenue st", "улица набережная").
|
||||
|
||||
size_t const tokensCount = tokens.size();
|
||||
size_t numStreetTokens = 0;
|
||||
vector<bool> isStreet(tokensCount);
|
||||
for (size_t i = 0; i < tokensCount; ++i)
|
||||
if (m_hasStreetType)
|
||||
{
|
||||
if (search::IsStreetSynonym(tokens[i]))
|
||||
{
|
||||
isStreet[i] = true;
|
||||
++numStreetTokens;
|
||||
}
|
||||
search::StreetTokensFilter filter([&](strings::UniString const & token, size_t /* tag */)
|
||||
{
|
||||
AddToken(lang, token);
|
||||
});
|
||||
for (auto const & token : tokens)
|
||||
filter.Put(token, false /* isPrefix */, 0 /* tag */);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < tokensCount; ++i)
|
||||
else
|
||||
{
|
||||
if (numStreetTokens == 1 && isStreet[i] && m_hasStreetType)
|
||||
{
|
||||
//LOG(LDEBUG, ("Skipping token:", tokens[i], "in", name));
|
||||
continue;
|
||||
}
|
||||
AddToken(lang, tokens[i]);
|
||||
for (auto const & token : tokens)
|
||||
AddToken(lang, token);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -9,6 +9,28 @@ using namespace strings;
|
|||
|
||||
namespace
|
||||
{
|
||||
class Utf8StreetTokensFilter
|
||||
{
|
||||
public:
|
||||
Utf8StreetTokensFilter(vector<pair<string, size_t>> & cont)
|
||||
: m_cont(cont)
|
||||
, m_filter([&](UniString const & token, size_t tag)
|
||||
{
|
||||
m_cont.emplace_back(ToUtf8(token), tag);
|
||||
})
|
||||
{
|
||||
}
|
||||
|
||||
inline void Put(string const & token, bool isPrefix, size_t tag)
|
||||
{
|
||||
m_filter.Put(MakeUniString(token), isPrefix, tag);
|
||||
}
|
||||
|
||||
private:
|
||||
vector<pair<string, size_t>> & m_cont;
|
||||
StreetTokensFilter m_filter;
|
||||
};
|
||||
|
||||
bool TestStreetPrefixMatch(char const * s)
|
||||
{
|
||||
return IsStreetSynonymPrefix(MakeUniString(s));
|
||||
|
@ -74,3 +96,51 @@ UNIT_TEST(StreetPrefixMatch)
|
|||
TEST(TestStreetPrefixMatch("проезд"), ());
|
||||
TEST(!TestStreetPrefixMatch("проездд"), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(StreetTokensFilter)
|
||||
{
|
||||
using TList = vector<pair<string, size_t>>;
|
||||
|
||||
{
|
||||
TList expected = {};
|
||||
TList actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("ули", true /* isPrefix */, 0 /* tag */);
|
||||
|
||||
TEST_EQUAL(expected, actual, ());
|
||||
}
|
||||
|
||||
{
|
||||
TList expected = {};
|
||||
TList actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("улица", false /* isPrefix */, 0 /* tag */);
|
||||
|
||||
TEST_EQUAL(expected, actual, ());
|
||||
}
|
||||
|
||||
{
|
||||
TList expected = {{"генерала", 1}, {"антонова", 2}};
|
||||
TList actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("ул", false /* isPrefix */, 0 /* tag */);
|
||||
filter.Put("генерала", false /* isPrefix */, 1 /* tag */);
|
||||
filter.Put("антонова", false /* isPrefix */, 2 /* tag */);
|
||||
|
||||
TEST_EQUAL(expected, actual, ());
|
||||
}
|
||||
|
||||
{
|
||||
TList expected = {{"улица", 100}, {"набережная", 50}};
|
||||
TList actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
filter.Put("улица", false /* isPrefix */, 100 /* tag */);
|
||||
filter.Put("набережная", true /* isPrefix */, 50 /* tag */);
|
||||
|
||||
TEST_EQUAL(expected, actual, ());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -223,4 +223,27 @@ bool ContainsNormalized(string const & str, string const & substr)
|
|||
UniString const usubstr = NormalizeAndSimplifyString(substr);
|
||||
return std::search(ustr.begin(), ustr.end(), usubstr.begin(), usubstr.end()) != ustr.end();
|
||||
}
|
||||
|
||||
// StreetTokensFilter ------------------------------------------------------------------------------
|
||||
void StreetTokensFilter::Put(strings::UniString const & token, bool isPrefix, size_t tag)
|
||||
{
|
||||
if ((isPrefix && IsStreetSynonymPrefix(token)) || (!isPrefix && IsStreetSynonym(token)))
|
||||
{
|
||||
++m_numSynonyms;
|
||||
if (m_numSynonyms == 1)
|
||||
{
|
||||
m_delayedToken = token;
|
||||
m_delayedTag = tag;
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitToken(m_delayedToken, m_delayedTag);
|
||||
EmitToken(token, tag);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitToken(token, tag);
|
||||
}
|
||||
}
|
||||
} // namespace search
|
||||
|
|
|
@ -54,4 +54,37 @@ bool IsStreetSynonymPrefix(strings::UniString const & s);
|
|||
/// Normalizes both str and substr, and then returns true if substr is found in str.
|
||||
/// Used in native platform code for search in localized strings (cuisines, categories, strings etc.).
|
||||
bool ContainsNormalized(string const & str, string const & substr);
|
||||
|
||||
// This class can be used as a filter for street tokens. As there can
|
||||
// be street synonyms in the street name, single street synonym is
|
||||
// skipped, but multiple synonyms are left as is.
|
||||
class StreetTokensFilter
|
||||
{
|
||||
public:
|
||||
using TCallback = function<void(strings::UniString const & token, size_t tag)>;
|
||||
|
||||
template <typename TC>
|
||||
StreetTokensFilter(TC && callback)
|
||||
: m_callback(forward<TC>(callback))
|
||||
{
|
||||
}
|
||||
|
||||
// Puts token to the filter. Filter checks following cases:
|
||||
// * if |token| is the first street synonym met so far, it's delayed
|
||||
// * if |token| is a street synonym, but not the first, callback is called
|
||||
// for the |token| and for the previously delayed token
|
||||
// * if |token| is not a street synonym, callback is called for the |token|
|
||||
void Put(strings::UniString const & token, bool isPrefix, size_t tag);
|
||||
|
||||
private:
|
||||
using TCell = pair<strings::UniString, size_t>;
|
||||
|
||||
inline void EmitToken(strings::UniString const & token, size_t tag) { m_callback(token, tag); }
|
||||
|
||||
strings::UniString m_delayedToken;
|
||||
size_t m_delayedTag = 0;
|
||||
size_t m_numSynonyms = 0;
|
||||
|
||||
TCallback m_callback;
|
||||
};
|
||||
} // namespace search
|
||||
|
|
|
@ -466,6 +466,8 @@ void Geocoder::SetParams(Params const & params)
|
|||
}
|
||||
}
|
||||
|
||||
LOG(LDEBUG, ("Tokens = ", m_params.m_tokens));
|
||||
LOG(LDEBUG, ("Prefix tokens = ", m_params.m_prefixTokens));
|
||||
LOG(LDEBUG, ("Languages =", m_params.m_langs));
|
||||
}
|
||||
|
||||
|
@ -1075,61 +1077,82 @@ void Geocoder::GreedilyMatchStreets()
|
|||
continue;
|
||||
|
||||
// Here we try to match as many tokens as possible while
|
||||
// intersection is a non-empty bit vector of streets. All tokens
|
||||
// that are synonyms to streets are ignored. Moreover, each time
|
||||
// a token that looks like a beginning of a house number is met,
|
||||
// we try to use current intersection of tokens as a street layer
|
||||
// and try to match buildings or pois.
|
||||
unique_ptr<coding::CompressedBitVector> allFeatures;
|
||||
// intersection is a non-empty bit vector of streets. Single
|
||||
// tokens that are synonyms to streets are ignored. Moreover,
|
||||
// each time a token that looks like a beginning of a house number
|
||||
// is met, we try to use current intersection of tokens as a
|
||||
// street layer and try to match BUILDINGs or POIs.
|
||||
CBVPtr allFeatures(m_streets, false /* isOwner */);
|
||||
|
||||
size_t curToken = startToken;
|
||||
|
||||
// This variable is used for prevention of duplicate calls to
|
||||
// CreateStreetsLayerAndMatchLowerLayers() with the same
|
||||
// arguments.
|
||||
size_t lastStopToken = curToken;
|
||||
size_t lastToken = startToken;
|
||||
|
||||
for (; curToken < m_numTokens && !m_usedTokens[curToken]; ++curToken)
|
||||
bool emptyIntersection = true;
|
||||
bool incomplete = false;
|
||||
|
||||
auto createStreetsLayerAndMatchLowerLayers = [&]()
|
||||
{
|
||||
if (!allFeatures.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken)
|
||||
{
|
||||
CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, *allFeatures);
|
||||
lastToken = curToken;
|
||||
}
|
||||
};
|
||||
|
||||
StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag)
|
||||
{
|
||||
auto buffer = coding::CompressedBitVector::Intersect(
|
||||
*allFeatures, *m_addressFeatures[tag]);
|
||||
if (tag < curToken)
|
||||
{
|
||||
allFeatures.Set(move(buffer));
|
||||
emptyIntersection = false;
|
||||
incomplete = true;
|
||||
return;
|
||||
}
|
||||
ASSERT_EQUAL(tag, curToken, ());
|
||||
|
||||
// |allFeatures| will become empty
|
||||
// after the intersection. Therefore
|
||||
// we need to create streets layer
|
||||
// right now.
|
||||
if (coding::CompressedBitVector::IsEmpty(buffer))
|
||||
createStreetsLayerAndMatchLowerLayers();
|
||||
|
||||
allFeatures.Set(move(buffer));
|
||||
emptyIntersection = false;
|
||||
incomplete = false;
|
||||
});
|
||||
|
||||
for (; curToken < m_numTokens && !m_usedTokens[curToken] && !allFeatures.IsEmpty(); ++curToken)
|
||||
{
|
||||
auto const & token = m_params.GetTokens(curToken).front();
|
||||
if (IsStreetSynonymPrefix(token))
|
||||
continue;
|
||||
|
||||
bool const isPrefix = curToken >= m_params.m_tokens.size();
|
||||
|
||||
if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
|
||||
{
|
||||
CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, allFeatures);
|
||||
lastStopToken = curToken;
|
||||
}
|
||||
createStreetsLayerAndMatchLowerLayers();
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> buffer;
|
||||
if (startToken == curToken || coding::CompressedBitVector::IsEmpty(allFeatures))
|
||||
buffer = coding::CompressedBitVector::Intersect(*m_streets, *m_addressFeatures[curToken]);
|
||||
else
|
||||
buffer = coding::CompressedBitVector::Intersect(*allFeatures, *m_addressFeatures[curToken]);
|
||||
|
||||
if (coding::CompressedBitVector::IsEmpty(buffer))
|
||||
break;
|
||||
|
||||
allFeatures.swap(buffer);
|
||||
filter.Put(token, isPrefix, curToken);
|
||||
}
|
||||
|
||||
if (curToken != lastStopToken)
|
||||
CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, allFeatures);
|
||||
createStreetsLayerAndMatchLowerLayers();
|
||||
}
|
||||
}
|
||||
|
||||
void Geocoder::CreateStreetsLayerAndMatchLowerLayers(
|
||||
size_t startToken, size_t endToken, unique_ptr<coding::CompressedBitVector> const & features)
|
||||
size_t startToken, size_t endToken, coding::CompressedBitVector const & features)
|
||||
{
|
||||
ASSERT(m_layers.empty(), ());
|
||||
|
||||
if (coding::CompressedBitVector::IsEmpty(features))
|
||||
if (coding::CompressedBitVector::IsEmpty(&features))
|
||||
return;
|
||||
|
||||
CBVPtr filtered(features.get(), false /* isOwner */);
|
||||
if (m_filter->NeedToFilter(*features))
|
||||
filtered.Set(m_filter->Filter(*features).release(), true /* isOwner */);
|
||||
CBVPtr filtered(&features, false /* isOwner */);
|
||||
if (m_filter->NeedToFilter(features))
|
||||
filtered.Set(m_filter->Filter(features).release(), true /* isOwner */);
|
||||
|
||||
m_layers.emplace_back();
|
||||
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &m_layers));
|
||||
|
@ -1138,7 +1161,7 @@ void Geocoder::CreateStreetsLayerAndMatchLowerLayers(
|
|||
InitLayer(SearchModel::SEARCH_TYPE_STREET, startToken, endToken, layer);
|
||||
|
||||
vector<uint32_t> sortedFeatures;
|
||||
sortedFeatures.reserve(features->PopCount());
|
||||
sortedFeatures.reserve(features.PopCount());
|
||||
filtered.ForEach(MakeBackInsertFunctor(sortedFeatures));
|
||||
layer.m_sortedFeatures = &sortedFeatures;
|
||||
|
||||
|
|
|
@ -245,8 +245,8 @@ private:
|
|||
// then performs geocoding in street vicinities.
|
||||
void GreedilyMatchStreets();
|
||||
|
||||
void CreateStreetsLayerAndMatchLowerLayers(
|
||||
size_t startToken, size_t endToken, unique_ptr<coding::CompressedBitVector> const & features);
|
||||
void CreateStreetsLayerAndMatchLowerLayers(size_t startToken, size_t endToken,
|
||||
coding::CompressedBitVector const & features);
|
||||
|
||||
// Tries to find all paths in a search tree, where each edge is
|
||||
// marked with some substring of the query tokens. These paths are
|
||||
|
|
|
@ -59,6 +59,8 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
|
|||
|
||||
TestCity losAlamosCity(m2::PointD(10, 10), "Los Alamos", "en", 100 /* rank */);
|
||||
TestCity mskCity(m2::PointD(0, 0), "Moscow", "en", 100 /* rank */);
|
||||
TestCity torontoCity(m2::PointD(-10, -10), "Toronto", "en", 100 /* rank */);
|
||||
|
||||
TestVillage longPondVillage(m2::PointD(15, 15), "Long Pond Village", "en", 10 /* rank */);
|
||||
TestStreet feynmanStreet(
|
||||
vector<m2::PointD>{m2::PointD(9.999, 9.999), m2::PointD(10, 10), m2::PointD(10.001, 10.001)},
|
||||
|
@ -94,16 +96,23 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
|
|||
TestPOI lantern1(m2::PointD(10.0005, 10.0005), "lantern 1", "en");
|
||||
TestPOI lantern2(m2::PointD(10.0006, 10.0005), "lantern 2", "en");
|
||||
|
||||
TestStreet stradaDrive(vector<m2::PointD>{m2::PointD(-10.001, -10.001), m2::PointD(-10, -10),
|
||||
m2::PointD(-9.999, -9.999)},
|
||||
"Strada drive", "en");
|
||||
TestBuilding terranceHouse(m2::PointD(-10, -10), "", "155", stradaDrive, "en");
|
||||
|
||||
BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(wonderlandCountry);
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
builder.Add(torontoCity);
|
||||
});
|
||||
auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(losAlamosCity);
|
||||
builder.Add(mskCity);
|
||||
builder.Add(torontoCity);
|
||||
builder.Add(longPondVillage);
|
||||
|
||||
builder.Add(feynmanStreet);
|
||||
|
@ -125,6 +134,9 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
|
|||
builder.Add(quantumCafe);
|
||||
builder.Add(lantern1);
|
||||
builder.Add(lantern2);
|
||||
|
||||
builder.Add(stradaDrive);
|
||||
builder.Add(terranceHouse);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
|
||||
|
@ -190,6 +202,11 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
|
|||
TRules rules = {ExactMatch(wonderlandId, bornHouse)};
|
||||
TEST(ResultsMatch("long pond 1st april street 8", rules), ());
|
||||
}
|
||||
|
||||
{
|
||||
TRules rules = {ExactMatch(wonderlandId, terranceHouse)};
|
||||
TEST(ResultsMatch("Toronto strada drive 155", rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(ProcessorTest, SearchInWorld)
|
||||
|
|
Loading…
Add table
Reference in a new issue