diff --git a/search/streets_matcher.cpp b/search/streets_matcher.cpp index b6eaf463d1..541a9f46df 100644 --- a/search/streets_matcher.cpp +++ b/search/streets_matcher.cpp @@ -67,6 +67,112 @@ bool EqualsByHashAndMisprints(StreetsMatcher::Prediction const & lhs, { return lhs.m_withMisprints == rhs.m_withMisprints && lhs.m_hash == rhs.m_hash; } + +void FindStreets(BaseContext const & ctx, FeaturesFilter const & filter, QueryParams const & params, + size_t startToken, bool withMisprints, + vector & predictions) +{ + // Here we try to match as many tokens as possible while + // intersection is a non-empty bit vector of streets. Single + // tokens that are synonyms to streets are ignored. Moreover, + // each time a token that looks like a beginning of a house number + // is met, we try to use current intersection of tokens as a + // street layer and try to match BUILDINGs or POIs. + CBV streets(ctx.m_streets); + + CBV all; + all.SetFull(); + + size_t curToken = startToken; + + // This variable is used for prevention of duplicate calls to + // CreateStreetsLayerAndMatchLowerLayers() with the same + // arguments. + size_t lastToken = startToken; + + // When true, no bit vectors were intersected with |streets| at all. + bool emptyIntersection = true; + + // When true, |streets| is in the incomplete state and can't be + // used for creation of street layers. + bool incomplete = false; + + auto emit = [&]() { + if (streets.IsEmpty() || emptyIntersection || incomplete || lastToken == curToken) + return; + + CBV fs(streets); + CBV fa(all); + + ASSERT(!fs.IsFull(), ()); + ASSERT(!fa.IsFull(), ()); + + if (filter.NeedToFilter(fs)) + fs = filter.Filter(fs); + + if (fs.IsEmpty()) + return; + + if (filter.NeedToFilter(fa)) + fa = filter.Filter(fa).Union(fs); + + predictions.emplace_back(); + auto & prediction = predictions.back(); + + prediction.m_tokenRange = TokenRange(startToken, curToken); + + ASSERT_NOT_EQUAL(fs.PopCount(), 0, ()); + ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ()); + prediction.m_prob = static_cast(fs.PopCount()) / static_cast(fa.PopCount()); + + prediction.m_features = move(fs); + prediction.m_hash = prediction.m_features.Hash(); + prediction.m_withMisprints = withMisprints; + }; + + StreetTokensFilter streetsFilter( + [&](strings::UniString const & /* token */, size_t tag) { + auto buffer = streets.Intersect(ctx.m_features[tag].m_features); + if (tag < curToken) + { + // This is the case for delayed + // street synonym. Therefore, + // |streets| is temporarily in the + // incomplete state. + streets = buffer; + all = all.Intersect(ctx.m_features[tag].m_features); + emptyIntersection = false; + + incomplete = true; + return; + } + ASSERT_EQUAL(tag, curToken, ()); + + // |streets| will become empty after + // the intersection. Therefore we need + // to create streets layer right now. + if (buffer.IsEmpty()) + emit(); + + streets = buffer; + all = all.Intersect(ctx.m_features[tag].m_features); + emptyIntersection = false; + incomplete = false; + }, + withMisprints); + + for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty(); ++curToken) + { + auto const & token = params.GetToken(curToken).GetOriginal(); + bool const isPrefix = params.IsPrefixToken(curToken); + + if (house_numbers::LooksLikeHouseNumber(token, isPrefix)) + emit(); + + streetsFilter.Put(token, isPrefix, curToken); + } + emit(); +} } // namespace // static @@ -87,9 +193,10 @@ void StreetsMatcher::Go(BaseContext const & ctx, FeaturesFilter const & filter, // Leave the most probable and longest prediction for predictions with the same m_hash (features) // and m_withMisprints. - // We will still distinguish parses with the same m_hash (features) but different range and m_withMisprints. - // For example, for "Paramount dive" we will have two parses: - // STREET UNUSED (can be matched to poi later) + // We will still distinguish parses with the same m_hash (features) but different range and + // m_withMisprints. For example, for "Paramount dive" we will have two parses: + // + // STREET UNUSED (can be matched to poi later) // Paramount dive // // STREET STREET ("drive" with misprints) @@ -115,114 +222,8 @@ void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & if (ctx.IsTokenUsed(startToken)) continue; - auto emit = [&](CBV const & streets, CBV const & all, size_t curToken, size_t lastToken, - bool emptyIntersection, bool incomplete, bool withMisprints) { - if (!streets.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken) - { - CBV fs(streets); - CBV fa(all); - - ASSERT(!fs.IsFull(), ()); - ASSERT(!fa.IsFull(), ()); - - if (filter.NeedToFilter(fs)) - fs = filter.Filter(fs); - - if (fs.IsEmpty()) - return; - - if (filter.NeedToFilter(fa)) - fa = filter.Filter(fa).Union(fs); - - predictions.emplace_back(); - auto & prediction = predictions.back(); - - prediction.m_tokenRange = TokenRange(startToken, curToken); - - ASSERT_NOT_EQUAL(fs.PopCount(), 0, ()); - ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ()); - prediction.m_prob = static_cast(fs.PopCount()) / static_cast(fa.PopCount()); - - prediction.m_features = move(fs); - prediction.m_hash = prediction.m_features.Hash(); - prediction.m_withMisprints = withMisprints; - } - }; - - auto findStreets = [&](bool withMisprints) { - // Here we try to match as many tokens as possible while - // intersection is a non-empty bit vector of streets. Single - // tokens that are synonyms to streets are ignored. Moreover, - // each time a token that looks like a beginning of a house number - // is met, we try to use current intersection of tokens as a - // street layer and try to match BUILDINGs or POIs. - CBV streets(ctx.m_streets); - - CBV all; - all.SetFull(); - - size_t curToken = startToken; - - // This variable is used for prevention of duplicate calls to - // CreateStreetsLayerAndMatchLowerLayers() with the same - // arguments. - size_t lastToken = startToken; - - // When true, no bit vectors were intersected with |streets| at all. - bool emptyIntersection = true; - - // When true, |streets| is in the incomplete state and can't be - // used for creation of street layers. - bool incomplete = false; - - StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag) - { - auto buffer = streets.Intersect(ctx.m_features[tag].m_features); - if (tag < curToken) - { - // This is the case for delayed - // street synonym. Therefore, - // |streets| is temporarily in the - // incomplete state. - streets = buffer; - all = all.Intersect(ctx.m_features[tag].m_features); - emptyIntersection = false; - - incomplete = true; - return; - } - ASSERT_EQUAL(tag, curToken, ()); - - // |streets| will become empty after - // the intersection. Therefore we need - // to create streets layer right now. - if (buffer.IsEmpty()) - emit(streets, all, curToken, lastToken, emptyIntersection, - incomplete, withMisprints); - - streets = buffer; - all = all.Intersect(ctx.m_features[tag].m_features); - emptyIntersection = false; - incomplete = false; - }, - withMisprints); - - for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty(); - ++curToken) - { - auto const & token = params.GetToken(curToken).GetOriginal(); - bool const isPrefix = params.IsPrefixToken(curToken); - - if (house_numbers::LooksLikeHouseNumber(token, isPrefix)) - emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints); - - filter.Put(token, isPrefix, curToken); - } - emit(streets, all, curToken, lastToken, emptyIntersection, incomplete, withMisprints); - }; - - findStreets(false /* withMisprints */); - findStreets(true /* withMisprints */); + ::search::FindStreets(ctx, filter, params, startToken, false /* withMisprints */, predictions); + ::search::FindStreets(ctx, filter, params, startToken, true /* withMisprints */, predictions); } } } // namespace search