diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index bdb66db8f3..feebcbed86 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -166,7 +166,7 @@ struct FeatureNameInserter strings::UniString const uniName = search::NormalizeAndSimplifyString(name); // split input string on tokens - buffer_vector tokens; + search::QueryTokens tokens; SplitUniString(uniName, MakeBackInsertFunctor(tokens), search::Delimiters()); // add synonyms for input native string diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 9a651056ff..c1ecfaa816 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -5,6 +5,8 @@ #include "base/macros.hpp" #include "base/mem_trie.hpp" +#include "3party/utfcpp/source/utf8/unchecked.h" + using namespace std; using namespace strings; @@ -296,6 +298,28 @@ private: StreetsSynonymsHolder g_streets; } // namespace +void GetStringPrefix(string const & str, string & res) +{ + search::Delimiters delims; + using Iter = utf8::unchecked::iterator; + + // Find start iterator of prefix in input query. + Iter iter(str.end()); + while (iter.base() != str.begin()) + { + Iter prev = iter; + --prev; + + if (delims(*prev)) + break; + + iter = prev; + } + + // Assign the input string without prefix to result. + res.assign(str.begin(), iter.base()); +} + UniString GetStreetNameAsKey(string const & name) { if (name.empty()) diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 8d1ddcd35e..cad9a1e361 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -55,6 +55,9 @@ bool TokenizeStringAndCheckIfLastTokenIsPrefix(std::string const & s, Tokens & t return TokenizeStringAndCheckIfLastTokenIsPrefix(NormalizeAndSimplifyString(s), tokens, delims); } +// Chops off the last query token (the "prefix" one) from |str| and stores the result in |res|. +void GetStringPrefix(std::string const & str, std::string & res); + strings::UniString GetStreetNameAsKey(std::string const & name); // *NOTE* The argument string must be normalized and simplified. @@ -99,5 +102,4 @@ private: Callback m_callback; }; - } // namespace search diff --git a/search/CMakeLists.txt b/search/CMakeLists.txt index 286e26172c..2594b4895a 100644 --- a/search/CMakeLists.txt +++ b/search/CMakeLists.txt @@ -50,6 +50,8 @@ set( geometry_cache.hpp geometry_utils.cpp geometry_utils.hpp + highlighting.cpp + highlighting.hpp hotels_classifier.cpp hotels_classifier.hpp hotels_filter.cpp @@ -126,7 +128,7 @@ set( street_vicinity_loader.hpp streets_matcher.cpp streets_matcher.hpp - string_intersection.hpp + suggest.cpp suggest.hpp token_range.hpp token_slice.cpp diff --git a/search/common.hpp b/search/common.hpp index abd8a88c71..c735c4e0ad 100644 --- a/search/common.hpp +++ b/search/common.hpp @@ -1,7 +1,20 @@ #pragma once +#include "indexer/categories_holder.hpp" + +#include "base/buffer_vector.hpp" +#include "base/small_set.hpp" +#include "base/string_utils.hpp" + namespace search { +// The prefix is stored separately. +// todo(@m, @y) Find a way (similar to TokenSlice maybe?) to unify +// the prefix and non-prefix tokens. +using QueryTokens = buffer_vector; + +using Locales = base::SafeSmallSet; + /// Upper bound for max count of tokens for indexing and scoring. int constexpr MAX_TOKENS = 32; int constexpr MAX_SUGGESTS_COUNT = 5; diff --git a/search/geocoder.hpp b/search/geocoder.hpp index f99fc6fcb5..2ff77e0556 100644 --- a/search/geocoder.hpp +++ b/search/geocoder.hpp @@ -29,7 +29,6 @@ #include "geometry/rect2d.hpp" -#include "base/buffer_vector.hpp" #include "base/cancellable.hpp" #include "base/dfa_helpers.hpp" #include "base/levenshtein_dfa.hpp" @@ -57,7 +56,6 @@ class PreRanker; class FeaturesFilter; class FeaturesLayerMatcher; -class SearchModel; class TokenSlice; // This class is used to retrieve all features corresponding to a @@ -90,7 +88,6 @@ public: Geocoder(Index const & index, storage::CountryInfoGetter const & infoGetter, PreRanker & preRanker, VillagesCache & villagesCache, my::Cancellable const & cancellable); - ~Geocoder(); // Sets search query params. diff --git a/search/highlighting.cpp b/search/highlighting.cpp new file mode 100644 index 0000000000..bfaee0e4ab --- /dev/null +++ b/search/highlighting.cpp @@ -0,0 +1,61 @@ +#include "search/highlighting.hpp" + +using namespace std; + +namespace +{ +// Makes continuous range for tokens and prefix. +template +class CombinedIterator +{ + Iter m_cur; + Iter m_end; + Value const * m_val; + +public: + CombinedIterator(Iter cur, Iter end, Value const * val) : m_cur(cur), m_end(end), m_val(val) {} + + Value const & operator*() const + { + ASSERT(m_val != nullptr || m_cur != m_end, ("dereferencing of an empty iterator")); + if (m_cur != m_end) + return *m_cur; + + return *m_val; + } + + CombinedIterator & operator++() + { + if (m_cur != m_end) + ++m_cur; + else + m_val = nullptr; + return *this; + } + + bool operator==(CombinedIterator const & other) const + { + return m_val == other.m_val && m_cur == other.m_cur; + } + + bool operator!=(CombinedIterator const & other) const { return !(*this == other); } +}; +} // namespace + +namespace search +{ +// static +void HighlightResult(QueryTokens const & tokens, strings::UniString const & prefix, Result & res) +{ + using Iter = QueryTokens::const_iterator; + using CombinedIter = CombinedIterator; + + CombinedIter beg(tokens.begin(), tokens.end(), prefix.empty() ? 0 : &prefix); + CombinedIter end(tokens.end() /* cur */, tokens.end() /* end */, 0); + auto assignHighlightRange = [&](pair const & range) { + res.AddHighlightRange(range); + }; + + SearchStringTokensIntersectionRanges(res.GetString(), beg, end, assignHighlightRange); +} +} // namespace search diff --git a/search/string_intersection.hpp b/search/highlighting.hpp similarity index 66% rename from search/string_intersection.hpp rename to search/highlighting.hpp index 52b329dc3f..6580cdcf6b 100644 --- a/search/string_intersection.hpp +++ b/search/highlighting.hpp @@ -2,12 +2,18 @@ #include "indexer/search_delimiters.hpp" -#include "std/utility.hpp" +#include "search/common.hpp" +#include "search/result.hpp" + +#include "base/string_utils.hpp" + +#include +#include namespace search { template -void SearchStringTokensIntersectionRanges(string const & s, LowTokensIterType itLowBeg, +void SearchStringTokensIntersectionRanges(std::string const & s, LowTokensIterType itLowBeg, LowTokensIterType itLowEnd, F f) { // split input query by tokens and prefix @@ -31,7 +37,7 @@ void SearchStringTokensIntersectionRanges(string const & s, LowTokensIterType it strings::UniString subStr; subStr.assign(str.begin() + beg, str.begin() + pos); size_t maxCount = 0; - pair result(0, 0); + std::pair result(0, 0); for (LowTokensIterType itLow = itLowBeg; itLow != itLowEnd; ++itLow) { @@ -49,4 +55,8 @@ void SearchStringTokensIntersectionRanges(string const & s, LowTokensIterType it f(result); } } -} + +// Adds to |res| the ranges that match the query tokens and, therefore, should be highlighted. +// The query is passed in |tokens| and |prefix|. +void HighlightResult(QueryTokens const & tokens, strings::UniString const & prefix, Result & res); +} // namespace search diff --git a/search/intermediate_result.cpp b/search/intermediate_result.cpp index b2591173b9..71f55a89ac 100644 --- a/search/intermediate_result.cpp +++ b/search/intermediate_result.cpp @@ -23,7 +23,39 @@ namespace search { -double const kDistSameStreetMeters = 5000.0; +namespace +{ +class SkipRegionInfo +{ + static size_t const m_count = 2; + uint32_t m_types[m_count]; + +public: + SkipRegionInfo() + { + char const * arr[][2] = { + {"place", "continent"}, + {"place", "country"} + }; + static_assert(m_count == ARRAY_SIZE(arr), ""); + + Classificator const & c = classif(); + for (size_t i = 0; i < m_count; ++i) + m_types[i] = c.GetTypeByPath(vector(arr[i], arr[i] + 2)); + } + + bool IsSkip(uint32_t type) const + { + for (uint32_t t : m_types) + { + if (t == type) + return true; + } + return false; + } +}; +} // namespace + char const * const kEmptyRatingSymbol = "-"; char const * const kPricingSymbol = "$"; @@ -76,13 +108,14 @@ void ProcessMetadata(FeatureType const & ft, Result::Metadata & meta) meta.m_isInitialized = true; } -PreResult1::PreResult1(FeatureID const & fID, PreRankingInfo const & info) : m_id(fID), m_info(info) +PreRankerResult::PreRankerResult(FeatureID const & fID, PreRankingInfo const & info) + : m_id(fID), m_info(info) { ASSERT(m_id.IsValid(), ()); } // static -bool PreResult1::LessRank(PreResult1 const & r1, PreResult1 const & r2) +bool PreRankerResult::LessRank(PreRankerResult const & r1, PreRankerResult const & r2) { if (r1.m_info.m_rank != r2.m_info.m_rank) return r1.m_info.m_rank > r2.m_info.m_rank; @@ -90,19 +123,20 @@ bool PreResult1::LessRank(PreResult1 const & r1, PreResult1 const & r2) } // static -bool PreResult1::LessDistance(PreResult1 const & r1, PreResult1 const & r2) +bool PreRankerResult::LessDistance(PreRankerResult const & r1, PreRankerResult const & r2) { if (r1.m_info.m_distanceToPivot != r2.m_info.m_distanceToPivot) return r1.m_info.m_distanceToPivot < r2.m_info.m_distanceToPivot; return r1.m_info.m_rank > r2.m_info.m_rank; } -PreResult2::PreResult2(FeatureType const & f, m2::PointD const & center, m2::PointD const & pivot, - string const & displayName, string const & fileName) +RankerResult::RankerResult(FeatureType const & f, m2::PointD const & center, + m2::PointD const & pivot, string const & displayName, + string const & fileName) : m_id(f.GetID()) , m_types(f) , m_str(displayName) - , m_resultType(ftypes::IsBuildingChecker::Instance()(m_types) ? RESULT_BUILDING : RESULT_FEATURE) + , m_resultType(ftypes::IsBuildingChecker::Instance()(m_types) ? TYPE_BUILDING : TYPE_FEATURE) , m_geomType(f.GetFeatureType()) { ASSERT(m_id.IsValid(), ()); @@ -116,47 +150,14 @@ PreResult2::PreResult2(FeatureType const & f, m2::PointD const & center, m2::Poi ProcessMetadata(f, m_metadata); } -PreResult2::PreResult2(double lat, double lon) - : m_str("(" + measurement_utils::FormatLatLon(lat, lon) + ")"), m_resultType(RESULT_LATLON) +RankerResult::RankerResult(double lat, double lon) + : m_str("(" + measurement_utils::FormatLatLon(lat, lon) + ")"), m_resultType(TYPE_LATLON) { m_region.SetParams(string(), MercatorBounds::FromLatLon(lat, lon)); } -namespace -{ - class SkipRegionInfo - { - static size_t const m_count = 2; - uint32_t m_types[m_count]; - - public: - SkipRegionInfo() - { - char const * arr[][2] = { - { "place", "continent" }, - { "place", "country" } - }; - static_assert(m_count == ARRAY_SIZE(arr), ""); - - Classificator const & c = classif(); - for (size_t i = 0; i < m_count; ++i) - m_types[i] = c.GetTypeByPath(vector(arr[i], arr[i] + 2)); - } - - bool IsSkip(uint32_t type) const - { - for (uint32_t t : m_types) - { - if (t == type) - return true; - } - return false; - } - }; -} - -string PreResult2::GetRegionName(storage::CountryInfoGetter const & infoGetter, - uint32_t fType) const +string RankerResult::GetRegionName(storage::CountryInfoGetter const & infoGetter, + uint32_t fType) const { static SkipRegionInfo const checker; if (checker.IsSkip(fType)) @@ -167,141 +168,29 @@ string PreResult2::GetRegionName(storage::CountryInfoGetter const & infoGetter, return info.m_name; } -namespace -{ -// TODO: Format street and house number according to local country's rules. -string FormatStreetAndHouse(ReverseGeocoder::Address const & addr) -{ - ASSERT_GREATER_OR_EQUAL(addr.GetDistance(), 0, ()); - return addr.GetStreetName() + ", " + addr.GetHouseNumber(); -} - -// TODO: Share common formatting code for search results and place page. -string FormatFullAddress(ReverseGeocoder::Address const & addr, string const & region) -{ - // TODO: Print "near" for not exact addresses. - if (addr.GetDistance() != 0) - return region; - - return FormatStreetAndHouse(addr) + (region.empty() ? "" : ", ") + region; -} -} // namespace - -Result PreResult2::GenerateFinalResult(storage::CountryInfoGetter const & infoGetter, - CategoriesHolder const * pCat, - set const * pTypes, int8_t locale, - ReverseGeocoder const * coder) const -{ - ReverseGeocoder::Address addr; - bool addrComputed = false; - - string name = m_str; - if (coder && name.empty()) - { - // Insert exact address (street and house number) instead of empty result name. - if (!addrComputed) - { - coder->GetNearbyAddress(GetCenter(), addr); - addrComputed = true; - } - if (addr.GetDistance() == 0) - name = FormatStreetAndHouse(addr); - } - - uint32_t const type = GetBestType(pTypes); - - // Format full address only for suitable results. - string address; - if (coder) - { - address = GetRegionName(infoGetter, type); - if (ftypes::IsAddressObjectChecker::Instance()(m_types)) - { - if (!addrComputed) - { - coder->GetNearbyAddress(GetCenter(), addr); - addrComputed = true; - } - address = FormatFullAddress(addr, address); - } - } - - switch (m_resultType) - { - case RESULT_FEATURE: - case RESULT_BUILDING: - return Result(m_id, GetCenter(), name, address, pCat->GetReadableFeatureType(type, locale), - type, m_metadata); - default: - ASSERT_EQUAL(m_resultType, RESULT_LATLON, ()); - return Result(GetCenter(), name, address); - } -} - -PreResult2::StrictEqualF::StrictEqualF(PreResult2 const & r, double const epsMeters) - : m_r(r), m_epsMeters(epsMeters) -{ -} - -bool PreResult2::StrictEqualF::operator()(PreResult2 const & r) const -{ - if (m_r.m_resultType == r.m_resultType && m_r.m_resultType == RESULT_FEATURE) - { - if (m_r.IsEqualCommon(r)) - return PointDistance(m_r.GetCenter(), r.GetCenter()) < m_epsMeters; - } - - return false; -} - -bool PreResult2::LessLinearTypesF::operator() (PreResult2 const & r1, PreResult2 const & r2) const -{ - if (r1.m_geomType != r2.m_geomType) - return (r1.m_geomType < r2.m_geomType); - - if (r1.m_str != r2.m_str) - return (r1.m_str < r2.m_str); - - uint32_t const t1 = r1.GetBestType(); - uint32_t const t2 = r2.GetBestType(); - if (t1 != t2) - return (t1 < t2); - - // Should stay the best feature, after unique, so add this criteria: - return r1.m_distance < r2.m_distance; -} - -bool PreResult2::EqualLinearTypesF::operator() (PreResult2 const & r1, PreResult2 const & r2) const -{ - // Note! Do compare for distance when filtering linear objects. - // Otherwise we will skip the results for different parts of the map. - return r1.m_geomType == feature::GEOM_LINE && r1.IsEqualCommon(r2) && - PointDistance(r1.GetCenter(), r2.GetCenter()) < kDistSameStreetMeters; -} - -bool PreResult2::IsEqualCommon(PreResult2 const & r) const +bool RankerResult::IsEqualCommon(RankerResult const & r) const { return m_geomType == r.m_geomType && GetBestType() == r.GetBestType() && m_str == r.m_str; } -bool PreResult2::IsStreet() const +bool RankerResult::IsStreet() const { return m_geomType == feature::GEOM_LINE && ftypes::IsStreetChecker::Instance()(m_types); } -string PreResult2::DebugPrint() const +string RankerResult::DebugPrint() const { stringstream ss; ss << "IntermediateResult [ " << "Name: " << m_str << "; Type: " << GetBestType() - << "; Ranking info: " << search::DebugPrint(m_info) + << "; " << search::DebugPrint(m_info) << "; Linear model rank: " << m_info.GetLinearModelRank() << " ]"; return ss.str(); } -uint32_t PreResult2::GetBestType(set const * pPrefferedTypes) const +uint32_t RankerResult::GetBestType(set const * pPrefferedTypes) const { if (pPrefferedTypes) { @@ -319,8 +208,8 @@ uint32_t PreResult2::GetBestType(set const * pPrefferedTypes) const return type; } -void PreResult2::RegionInfo::GetRegion(storage::CountryInfoGetter const & infoGetter, - storage::CountryInfo & info) const +void RankerResult::RegionInfo::GetRegion(storage::CountryInfoGetter const & infoGetter, + storage::CountryInfo & info) const { if (!m_file.empty()) infoGetter.GetRegionInfo(m_file, info); diff --git a/search/intermediate_result.hpp b/search/intermediate_result.hpp index e7cc3e5960..c16fa1e062 100644 --- a/search/intermediate_result.hpp +++ b/search/intermediate_result.hpp @@ -22,15 +22,15 @@ namespace search { class ReverseGeocoder; -/// First pass results class. Objects are creating during search in trie. -/// Works fast without feature loading and provide ranking. -class PreResult1 +// First pass results class. Objects are created during search in trie. +// Works fast because it does not load features. +class PreRankerResult { public: - PreResult1(FeatureID const & fID, PreRankingInfo const & info); + PreRankerResult(FeatureID const & fID, PreRankingInfo const & info); - static bool LessRank(PreResult1 const & r1, PreResult1 const & r2); - static bool LessDistance(PreResult1 const & r1, PreResult1 const & r2); + static bool LessRank(PreRankerResult const & r1, PreRankerResult const & r2); + static bool LessDistance(PreRankerResult const & r1, PreRankerResult const & r2); inline FeatureID GetId() const { return m_id; } inline double GetDistance() const { return m_info.m_distanceToPivot; } @@ -39,32 +39,30 @@ public: inline PreRankingInfo const & GetInfo() const { return m_info; } private: - friend class PreResult2; + friend class RankerResult; FeatureID m_id; PreRankingInfo m_info; }; -/// Second result class. Objects are creating during reading of features. -/// Read and fill needed info for ranking and getting final results. -class PreResult2 +// Second result class. Objects are created during reading of features. +// Read and fill needed info for ranking and getting final results. +class RankerResult { - friend class PreResult2Maker; - public: - enum ResultType + enum Type { - RESULT_LATLON, - RESULT_FEATURE, - RESULT_BUILDING //!< Buildings are not filtered out in duplicates filter. + TYPE_LATLON, + TYPE_FEATURE, + TYPE_BUILDING //!< Buildings are not filtered out in duplicates filter. }; /// For RESULT_FEATURE and RESULT_BUILDING. - PreResult2(FeatureType const & f, m2::PointD const & center, m2::PointD const & pivot, - string const & displayName, string const & fileName); + RankerResult(FeatureType const & f, m2::PointD const & center, m2::PointD const & pivot, + string const & displayName, string const & fileName); /// For RESULT_LATLON. - PreResult2(double lat, double lon); + RankerResult(double lat, double lon); inline search::RankingInfo const & GetRankingInfo() const { return m_info; } @@ -74,41 +72,6 @@ public: m_info = forward(info); } - /// @param[in] infoGetter Need to get region for result. - /// @param[in] pCat Categories need to display readable type string. - /// @param[in] pTypes Set of preffered types that match input tokens by categories. - /// @param[in] lang Current system language. - /// @param[in] coder May be nullptr - no need to calculate address. - Result GenerateFinalResult(storage::CountryInfoGetter const & infoGetter, - CategoriesHolder const * pCat, set const * pTypes, - int8_t locale, ReverseGeocoder const * coder) const; - - /// Filter equal features for different mwm's. - class StrictEqualF - { - public: - StrictEqualF(PreResult2 const & r, double const epsMeters); - - bool operator()(PreResult2 const & r) const; - - private: - PreResult2 const & m_r; - double const m_epsMeters; - }; - - /// To filter equal linear objects. - //@{ - struct LessLinearTypesF - { - bool operator() (PreResult2 const & r1, PreResult2 const & r2) const; - }; - class EqualLinearTypesF - { - public: - bool operator() (PreResult2 const & r1, PreResult2 const & r2) const; - }; - //@} - string DebugPrint() const; bool IsStreet() const; @@ -116,17 +79,23 @@ public: inline FeatureID const & GetID() const { return m_id; } inline string const & GetName() const { return m_str; } inline feature::TypesHolder const & GetTypes() const { return m_types; } + inline Type const & GetResultType() const { return m_resultType; } inline m2::PointD GetCenter() const { return m_region.m_point; } + inline double GetDistance() const { return m_distance; } + inline feature::EGeomType GetGeomType() const { return m_geomType; } + inline Result::Metadata GetMetadata() const { return m_metadata; } -private: - bool IsEqualCommon(PreResult2 const & r) const; + inline double GetDistanceToPivot() const { return m_info.m_distanceToPivot; } + inline double GetLinearModelRank() const { return m_info.GetLinearModelRank(); } - FeatureID m_id; - feature::TypesHolder m_types; + string GetRegionName(storage::CountryInfoGetter const & infoGetter, uint32_t fType) const; + + bool IsEqualCommon(RankerResult const & r) const; uint32_t GetBestType(set const * pPrefferedTypes = 0) const; - string m_str; +private: + friend class RankerResultMaker; struct RegionInfo { @@ -141,60 +110,20 @@ private: void GetRegion(storage::CountryInfoGetter const & infoGetter, storage::CountryInfo & info) const; - } m_region; - - string GetRegionName(storage::CountryInfoGetter const & infoGetter, uint32_t fType) const; + }; + RegionInfo m_region; + FeatureID m_id; + feature::TypesHolder m_types; + string m_str; double m_distance; - ResultType m_resultType; + Type m_resultType; RankingInfo m_info; feature::EGeomType m_geomType; - Result::Metadata m_metadata; }; -inline string DebugPrint(PreResult2 const & t) -{ - return t.DebugPrint(); -} +inline string DebugPrint(RankerResult const & t) { return t.DebugPrint(); } void ProcessMetadata(FeatureType const & ft, Result::Metadata & meta); - -class IndexedValue -{ - /// @todo Do not use shared_ptr for optimization issues. - /// Need to rewrite std::unique algorithm. - unique_ptr m_value; - - double m_rank; - double m_distanceToPivot; - - friend string DebugPrint(IndexedValue const & value) - { - ostringstream os; - os << "IndexedValue ["; - if (value.m_value) - os << DebugPrint(*value.m_value); - os << "]"; - return os.str(); - } - -public: - explicit IndexedValue(unique_ptr value) - : m_value(move(value)), m_rank(0.0), m_distanceToPivot(numeric_limits::max()) - { - if (!m_value) - return; - - auto const & info = m_value->GetRankingInfo(); - m_rank = info.GetLinearModelRank(); - m_distanceToPivot = info.m_distanceToPivot; - } - - PreResult2 const & operator*() const { return *m_value; } - - inline double GetRank() const { return m_rank; } - - inline double GetDistanceToPivot() const { return m_distanceToPivot; } -}; } // namespace search diff --git a/search/pre_ranker.cpp b/search/pre_ranker.cpp index 024a5b5f29..4b470c4cca 100644 --- a/search/pre_ranker.cpp +++ b/search/pre_ranker.cpp @@ -19,36 +19,7 @@ namespace search { namespace { -struct LessFeatureID -{ - using TValue = PreResult1; - - inline bool operator()(TValue const & lhs, TValue const & rhs) const - { - return lhs.GetId() < rhs.GetId(); - } -}; - -// Orders PreResult1 by following criterion: -// 1. Feature Id (increasing), if same... -// 2. Number of matched tokens from the query (decreasing), if same... -// 3. Index of the first matched token from the query (increasing). -struct ComparePreResult1 -{ - bool operator()(PreResult1 const & lhs, PreResult1 const & rhs) const - { - if (lhs.GetId() != rhs.GetId()) - return lhs.GetId() < rhs.GetId(); - - auto const & linfo = lhs.GetInfo(); - auto const & rinfo = rhs.GetInfo(); - if (linfo.GetNumTokens() != rinfo.GetNumTokens()) - return linfo.GetNumTokens() > rinfo.GetNumTokens(); - return linfo.InnermostTokenRange().Begin() < rinfo.InnermostTokenRange().Begin(); - } -}; - -void SweepNearbyResults(double eps, vector & results) +void SweepNearbyResults(double eps, vector & results) { m2::NearbyPointsSweeper sweeper(eps); for (size_t i = 0; i < results.size(); ++i) @@ -57,7 +28,7 @@ void SweepNearbyResults(double eps, vector & results) sweeper.Add(p.x, p.y, i); } - vector filtered; + vector filtered; sweeper.Sweep([&filtered, &results](size_t i) { filtered.push_back(results[i]); @@ -92,7 +63,7 @@ void PreRanker::FillMissingFieldsInPreResults() if (fillCenters) m_pivotFeatures.SetPosition(m_params.m_accuratePivotCenter, m_params.m_scale); - ForEach([&](PreResult1 & r) { + ForEach([&](PreRankerResult & r) { FeatureID const & id = r.GetId(); PreRankingInfo & info = r.GetInfo(); if (id.m_mwmId != mwmId) @@ -132,18 +103,35 @@ void PreRanker::FillMissingFieldsInPreResults() void PreRanker::Filter(bool viewportSearch) { - using TSet = set; - TSet filtered; + struct LessFeatureID + { + inline bool operator()(PreRankerResult const & lhs, PreRankerResult const & rhs) const + { + return lhs.GetId() < rhs.GetId(); + } + }; - sort(m_results.begin(), m_results.end(), ComparePreResult1()); - m_results.erase(unique(m_results.begin(), m_results.end(), my::EqualsBy(&PreResult1::GetId)), + auto comparePreRankerResults = [](PreRankerResult const & lhs, + PreRankerResult const & rhs) -> bool { + if (lhs.GetId() != rhs.GetId()) + return lhs.GetId() < rhs.GetId(); + + auto const & linfo = lhs.GetInfo(); + auto const & rinfo = rhs.GetInfo(); + if (linfo.GetNumTokens() != rinfo.GetNumTokens()) + return linfo.GetNumTokens() > rinfo.GetNumTokens(); + return linfo.InnermostTokenRange().Begin() < rinfo.InnermostTokenRange().Begin(); + }; + + sort(m_results.begin(), m_results.end(), comparePreRankerResults); + m_results.erase(unique(m_results.begin(), m_results.end(), my::EqualsBy(&PreRankerResult::GetId)), m_results.end()); if (m_results.size() > BatchSize()) { bool const centersLoaded = all_of(m_results.begin(), m_results.end(), - [](PreResult1 const & result) { return result.GetInfo().m_centerLoaded; }); + [](PreRankerResult const & result) { return result.GetInfo().m_centerLoaded; }); if (viewportSearch && centersLoaded) { FilterForViewportSearch(); @@ -153,7 +141,7 @@ void PreRanker::Filter(bool viewportSearch) } else { - sort(m_results.begin(), m_results.end(), &PreResult1::LessDistance); + sort(m_results.begin(), m_results.end(), &PreRankerResult::LessDistance); // Priority is some kind of distance from the viewport or // position, therefore if we have a bunch of results with the same @@ -186,12 +174,17 @@ void PreRanker::Filter(bool viewportSearch) shuffle(b, e, m_rng); } } + + using TSet = set; + TSet filtered; + filtered.insert(m_results.begin(), m_results.begin() + min(m_results.size(), BatchSize())); if (!viewportSearch) { size_t n = min(m_results.size(), BatchSize()); - nth_element(m_results.begin(), m_results.begin() + n, m_results.end(), &PreResult1::LessRank); + nth_element(m_results.begin(), m_results.begin() + n, m_results.end(), + &PreRankerResult::LessRank); filtered.insert(m_results.begin(), m_results.begin() + n); } @@ -203,7 +196,7 @@ void PreRanker::UpdateResults(bool lastUpdate) FillMissingFieldsInPreResults(); Filter(m_viewportSearch); m_numSentResults += m_results.size(); - m_ranker.SetPreResults1(move(m_results)); + m_ranker.SetPreRankerResults(move(m_results)); m_results.clear(); m_ranker.UpdateResults(lastUpdate); @@ -222,7 +215,7 @@ void PreRanker::FilterForViewportSearch() { auto const & viewport = m_params.m_viewport; - my::EraseIf(m_results, [&viewport](PreResult1 const & result) { + my::EraseIf(m_results, [&viewport](PreRankerResult const & result) { auto const & info = result.GetInfo(); return !viewport.IsPointInside(info.m_center); }); @@ -254,7 +247,7 @@ void PreRanker::FilterForViewportSearch() buckets[dx * kNumYSlots + dy].push_back(i); } - vector results; + vector results; double const density = static_cast(BatchSize()) / static_cast(n); for (auto & bucket : buckets) { diff --git a/search/pre_ranker.hpp b/search/pre_ranker.hpp index 1a3a51dad7..5c30542f78 100644 --- a/search/pre_ranker.hpp +++ b/search/pre_ranker.hpp @@ -87,7 +87,7 @@ private: Index const & m_index; Ranker & m_ranker; - vector m_results; + vector m_results; size_t const m_limit; Params m_params; bool m_viewportSearch = false; diff --git a/search/processor.cpp b/search/processor.cpp index da192a371f..ce6b8dc8ca 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -184,8 +184,8 @@ Processor::Processor(Index const & index, CategoriesHolder const & categories, , m_viewportSearch(false) , m_villagesCache(static_cast(*this)) , m_citiesBoundaries(index) - , m_ranker(index, m_citiesBoundaries, infoGetter, m_emitter, categories, suggests, - m_villagesCache, static_cast(*this)) + , m_ranker(index, m_citiesBoundaries, infoGetter, m_keywordsScorer, m_emitter, categories, + suggests, m_villagesCache, static_cast(*this)) , m_preRanker(index, m_ranker, kPreResultsCount) , m_geocoder(index, infoGetter, m_preRanker, m_villagesCache, static_cast(*this)) @@ -198,7 +198,7 @@ Processor::Processor(Index const & index, CategoriesHolder const & categories, {StringUtf8Multilang::kInternationalCode, StringUtf8Multilang::kEnglishCode}, {StringUtf8Multilang::kDefaultCode}}; - m_ranker.SetLanguages(langPriorities); + m_keywordsScorer.SetLanguages(langPriorities); SetPreferredLocale("en"); } @@ -229,6 +229,7 @@ void Processor::SetPreferredLocale(string const & locale) // Default initialization. // If you want to reset input language, call SetInputLocale before search. SetInputLocale(locale); + m_ranker.SetLocalityLanguage(code); } void Processor::SetInputLocale(string const & locale) @@ -262,7 +263,7 @@ void Processor::SetQuery(string const & query) search::Delimiters delims; { - buffer_vector subTokens; + QueryTokens subTokens; for (auto const & token : tokens) { size_t numHashes = 0; @@ -298,7 +299,7 @@ void Processor::SetQuery(string const & query) m_tokens.resize(maxTokensCount); // Assign tokens and prefix to scorer. - m_ranker.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix); + m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix); // Get preferred types to show in results. m_preferredTypes.clear(); @@ -323,12 +324,12 @@ void Processor::SetRankPivot(m2::PointD const & pivot) void Processor::SetLanguage(int id, int8_t lang) { - m_ranker.SetLanguage(GetLangIndex(id), lang); + m_keywordsScorer.SetLanguage(GetLangIndex(id), lang); } int8_t Processor::GetLanguage(int id) const { - return m_ranker.GetLanguage(GetLangIndex(id)); + return m_keywordsScorer.GetLanguage(GetLangIndex(id)); } m2::PointD Processor::GetPivotPoint() const @@ -518,7 +519,7 @@ void Processor::SearchCoordinates() double lat, lon; if (!MatchLatLonDegree(m_query, lat, lon)) return; - m_emitter.AddResultNoChecks(m_ranker.MakeResult(PreResult2(lat, lon))); + m_emitter.AddResultNoChecks(m_ranker.MakeResult(RankerResult(lat, lon))); m_emitter.Emit(); } diff --git a/search/processor.hpp b/search/processor.hpp index 63ece13107..c09df8b3f8 100644 --- a/search/processor.hpp +++ b/search/processor.hpp @@ -22,7 +22,6 @@ #include "geometry/rect2d.hpp" -#include "base/buffer_vector.hpp" #include "base/cancellable.hpp" #include "base/limited_priority_queue.hpp" #include "base/string_utils.hpp" @@ -50,14 +49,7 @@ class CountryInfoGetter; namespace search { -struct Locality; -struct Region; - -class DoFindLocality; -class FeatureLoader; class Geocoder; -class HouseCompFactory; -class PreResult2Maker; // todo(@m) merge with Ranker class QueryParams; class Ranker; class ReverseGeocoder; @@ -125,13 +117,6 @@ protected: friend string DebugPrint(ViewportID viewportId); - friend class BestNameFinder; - friend class DoFindLocality; - friend class FeatureLoader; - friend class HouseCompFactory; - friend class PreResult2Maker; - friend class Ranker; - using TMWMVector = vector>; using TOffsetsVector = map>; using TFHeader = feature::DataHeader; @@ -150,12 +135,21 @@ protected: void SetViewportByIndex(m2::RectD const & viewport, size_t idx, bool forceUpdate); void ClearCache(size_t ind); + /// @name Get ranking params. + //@{ + /// @return Rect for viewport-distance calculation. + m2::RectD const & GetViewport(ViewportID vID = DEFAULT_V) const; + //@} + + void SetLanguage(int id, int8_t lang); + int8_t GetLanguage(int id) const; + CategoriesHolder const & m_categories; storage::CountryInfoGetter const & m_infoGetter; string m_region; string m_query; - buffer_vector m_tokens; + QueryTokens m_tokens; strings::UniString m_prefix; set m_preferredTypes; @@ -169,21 +163,12 @@ protected: bool m_cianMode = false; SearchParams::OnResults m_onResults; - /// @name Get ranking params. - //@{ - /// @return Rect for viewport-distance calculation. - m2::RectD const & GetViewport(ViewportID vID = DEFAULT_V) const; - //@} - - void SetLanguage(int id, int8_t lang); - int8_t GetLanguage(int id) const; - -protected: bool m_viewportSearch; VillagesCache m_villagesCache; CitiesBoundariesTable m_citiesBoundaries; + KeywordLangMatcher m_keywordsScorer; Emitter m_emitter; Ranker m_ranker; PreRanker m_preRanker; diff --git a/search/ranker.cpp b/search/ranker.cpp index 63e4afb33e..ae341a4b5c 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -1,11 +1,13 @@ #include "search/ranker.hpp" #include "search/emitter.hpp" -#include "search/string_intersection.hpp" +#include "search/geometry_utils.hpp" +#include "search/highlighting.hpp" #include "search/token_slice.hpp" #include "search/utils.hpp" #include "indexer/feature_algo.hpp" +#include "indexer/search_string_utils.hpp" #include "base/logging.hpp" #include "base/string_utils.hpp" @@ -65,44 +67,35 @@ NameScores GetNameScores(FeatureType const & ft, Geocoder::Params const & params return bestScores; } -void RemoveDuplicatingLinear(vector & values) +void RemoveDuplicatingLinear(vector & results) { - PreResult2::LessLinearTypesF lessCmp; - PreResult2::EqualLinearTypesF equalCmp; + double constexpr kDistSameStreetMeters = 5000.0; - sort(values.begin(), values.end(), [&lessCmp](IndexedValue const & lhs, IndexedValue const & rhs) - { - return lessCmp(*lhs, *rhs); - }); + auto lessCmp = [](RankerResult const & r1, RankerResult const & r2) -> bool { + if (r1.GetGeomType() != r2.GetGeomType()) + return r1.GetGeomType() < r2.GetGeomType(); - values.erase(unique(values.begin(), values.end(), - [&equalCmp](IndexedValue const & lhs, IndexedValue const & rhs) - { - return equalCmp(*lhs, *rhs); - }), - values.end()); -} + if (r1.GetName() != r2.GetName()) + return r1.GetName() < r2.GetName(); -// Chops off the last query token (the "prefix" one) from |str| and stores the result in |res|. -void GetStringPrefix(string const & str, string & res) -{ - search::Delimiters delims; - // Find start iterator of prefix in input query. - using TIter = utf8::unchecked::iterator; - TIter iter(str.end()); - while (iter.base() != str.begin()) - { - TIter prev = iter; - --prev; + uint32_t const t1 = r1.GetBestType(); + uint32_t const t2 = r2.GetBestType(); + if (t1 != t2) + return t1 < t2; - if (delims(*prev)) - break; + // Should stay the best feature, after unique, so add this criteria: + return r1.GetDistance() < r2.GetDistance(); + }; - iter = prev; - } + auto equalCmp = [](RankerResult const & r1, RankerResult const & r2) -> bool { + // Note! Do compare for distance when filtering linear objects. + // Otherwise we will skip the results for different parts of the map. + return r1.GetGeomType() == feature::GEOM_LINE && r1.IsEqualCommon(r2) && + PointDistance(r1.GetCenter(), r2.GetCenter()) < kDistSameStreetMeters; + }; - // Assign result with input string without prefix. - res.assign(str.begin(), iter.base()); + sort(results.begin(), results.end(), lessCmp); + results.erase(unique(results.begin(), results.end(), equalCmp), results.end()); } ftypes::Type GetLocalityIndex(feature::TypesHolder const & types) @@ -123,48 +116,44 @@ ftypes::Type GetLocalityIndex(feature::TypesHolder const & types) } } -/// Makes continuous range for tokens and prefix. -template -class CombinedIter +// TODO: Format street and house number according to local country's rules. +string FormatStreetAndHouse(ReverseGeocoder::Address const & addr) { - TIter m_cur; - TIter m_end; - TValue const * m_val; + ASSERT_GREATER_OR_EQUAL(addr.GetDistance(), 0, ()); + return addr.GetStreetName() + ", " + addr.GetHouseNumber(); +} -public: - CombinedIter(TIter cur, TIter end, TValue const * val) : m_cur(cur), m_end(end), m_val(val) {} +// TODO: Share common formatting code for search results and place page. +string FormatFullAddress(ReverseGeocoder::Address const & addr, string const & region) +{ + // TODO: Print "near" for not exact addresses. + if (addr.GetDistance() != 0) + return region; - TValue const & operator*() const - { - ASSERT(m_val != 0 || m_cur != m_end, ("dereferencing of empty iterator")); - if (m_cur != m_end) - return *m_cur; + return FormatStreetAndHouse(addr) + (region.empty() ? "" : ", ") + region; +} - return *m_val; - } +bool ResultExists(RankerResult const & p, vector const & results, + double minDistanceOnMapBetweenResults) +{ + // Filter equal features in different mwms. + auto equalCmp = [&p, &minDistanceOnMapBetweenResults](RankerResult const & r) -> bool { + if (p.GetResultType() == r.GetResultType() && + p.GetResultType() == RankerResult::Type::TYPE_FEATURE) + { + if (p.IsEqualCommon(r)) + return PointDistance(p.GetCenter(), r.GetCenter()) < minDistanceOnMapBetweenResults; + } - CombinedIter & operator++() - { - if (m_cur != m_end) - ++m_cur; - else - m_val = 0; - return *this; - } + return false; + }; - bool operator==(CombinedIter const & other) const - { - return m_val == other.m_val && m_cur == other.m_cur; - } - - bool operator!=(CombinedIter const & other) const - { - return m_val != other.m_val || m_cur != other.m_cur; - } -}; + // Do not insert duplicating results. + return find_if(results.begin(), results.end(), equalCmp) != results.cend(); +} } // namespace -class PreResult2Maker +class RankerResultMaker { Ranker & m_ranker; Index const & m_index; @@ -205,8 +194,8 @@ class PreResult2Maker return true; } - void InitRankingInfo(FeatureType const & ft, m2::PointD const & center, PreResult1 const & res, - search::RankingInfo & info) + void InitRankingInfo(FeatureType const & ft, m2::PointD const & center, + PreRankerResult const & res, search::RankingInfo & info) { auto const & preInfo = res.GetInfo(); @@ -287,44 +276,43 @@ class PreResult2Maker } public: - explicit PreResult2Maker(Ranker & ranker, Index const & index, - storage::CountryInfoGetter const & infoGetter, - Geocoder::Params const & params) + explicit RankerResultMaker(Ranker & ranker, Index const & index, + storage::CountryInfoGetter const & infoGetter, + Geocoder::Params const & params) : m_ranker(ranker), m_index(index), m_params(params), m_infoGetter(infoGetter) { } - unique_ptr operator()(PreResult1 const & res1) + unique_ptr operator()(PreRankerResult const & preRankerResult) { FeatureType ft; m2::PointD center; string name; string country; - if (!LoadFeature(res1.GetId(), ft, center, name, country)) - return unique_ptr(); + if (!LoadFeature(preRankerResult.GetId(), ft, center, name, country)) + return {}; - auto res2 = make_unique(ft, center, m_ranker.m_params.m_position /* pivot */, name, - country); + auto p = make_unique(ft, center, m_ranker.m_params.m_position /* pivot */, name, + country); search::RankingInfo info; - InitRankingInfo(ft, center, res1, info); + InitRankingInfo(ft, center, preRankerResult, info); info.m_rank = NormalizeRank(info.m_rank, info.m_type, center, country); - res2->SetRankingInfo(move(info)); + p->SetRankingInfo(move(info)); - return res2; + return p; } }; -// static -size_t const Ranker::kBatchSize = 10; - Ranker::Ranker(Index const & index, CitiesBoundariesTable const & boundariesTable, - storage::CountryInfoGetter const & infoGetter, Emitter & emitter, - CategoriesHolder const & categories, vector const & suggests, - VillagesCache & villagesCache, my::Cancellable const & cancellable) + storage::CountryInfoGetter const & infoGetter, KeywordLangMatcher & keywordsScorer, + Emitter & emitter, CategoriesHolder const & categories, + vector const & suggests, VillagesCache & villagesCache, + my::Cancellable const & cancellable) : m_reverseGeocoder(index) , m_cancellable(cancellable) + , m_keywordsScorer(keywordsScorer) , m_localities(index, boundariesTable, villagesCache) , m_index(index) , m_infoGetter(infoGetter) @@ -338,46 +326,14 @@ void Ranker::Init(Params const & params, Geocoder::Params const & geocoderParams { m_params = params; m_geocoderParams = geocoderParams; - m_preResults1.clear(); + m_preRankerResults.clear(); m_tentativeResults.clear(); } -bool Ranker::IsResultExists(PreResult2 const & p, vector const & values) +Result Ranker::MakeResult(RankerResult const & r) const { - PreResult2::StrictEqualF equalCmp(p, m_params.m_minDistanceOnMapBetweenResults); - - // Do not insert duplicating results. - return values.end() != find_if(values.begin(), values.end(), [&equalCmp](IndexedValue const & iv) - { - return equalCmp(*iv); - }); -} - -void Ranker::MakePreResult2(Geocoder::Params const & geocoderParams, vector & cont) -{ - PreResult2Maker maker(*this, m_index, m_infoGetter, geocoderParams); - for (auto const & r : m_preResults1) - { - auto p = maker(r); - if (!p) - continue; - - if (geocoderParams.m_mode == Mode::Viewport && - !geocoderParams.m_pivot.IsPointInside(p->GetCenter())) - { - continue; - } - - if (!IsResultExists(*p, cont)) - cont.push_back(IndexedValue(move(p))); - }; -} - -Result Ranker::MakeResult(PreResult2 const & r) const -{ - Result res = r.GenerateFinalResult(m_infoGetter, &m_categories, &m_params.m_preferredTypes, - m_params.m_currentLocaleCode, &m_reverseGeocoder); - MakeResultHighlight(res); + Result res = GenerateFinalResult(r, true /* needAddress */); + HighlightResult(m_params.m_tokens, m_params.m_prefix, res); if (ftypes::IsLocalityChecker::Instance().GetType(r.GetTypes()) == ftypes::NONE) { m_localities.GetLocality(res.GetFeatureCenter(), [&](LocalityItem const & item) { @@ -391,67 +347,6 @@ Result Ranker::MakeResult(PreResult2 const & r) const return res; } -void Ranker::MakeResultHighlight(Result & res) const -{ - using TIter = buffer_vector::const_iterator; - using TCombinedIter = CombinedIter; - - TCombinedIter beg(m_params.m_tokens.begin(), m_params.m_tokens.end(), - m_params.m_prefix.empty() ? 0 : &m_params.m_prefix); - TCombinedIter end(m_params.m_tokens.end(), m_params.m_tokens.end(), 0); - auto assignHighlightRange = [&](pair const & range) - { - res.AddHighlightRange(range); - }; - - SearchStringTokensIntersectionRanges(res.GetString(), beg, end, assignHighlightRange); -} - -void Ranker::GetSuggestion(string const & name, string & suggest) const -{ - // Splits result's name. - search::Delimiters delims; - vector tokens; - SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), delims); - - // Finds tokens that are already present in the input query. - vector tokensMatched(tokens.size()); - bool prefixMatched = false; - bool fullPrefixMatched = false; - - for (size_t i = 0; i < tokens.size(); ++i) - { - auto const & token = tokens[i]; - - if (find(m_params.m_tokens.begin(), m_params.m_tokens.end(), token) != m_params.m_tokens.end()) - { - tokensMatched[i] = true; - } - else if (StartsWith(token, m_params.m_prefix)) - { - prefixMatched = true; - fullPrefixMatched = token.size() == m_params.m_prefix.size(); - } - } - - // When |name| does not match prefix or when prefix equals to some - // token of the |name| (for example, when user entered "Moscow" - // without space at the end), we should not suggest anything. - if (!prefixMatched || fullPrefixMatched) - return; - - GetStringPrefix(m_params.m_query, suggest); - - // Appends unmatched result's tokens to the suggestion. - for (size_t i = 0; i < tokens.size(); ++i) - { - if (tokensMatched[i]) - continue; - suggest.append(strings::ToUtf8(tokens[i])); - suggest.push_back(' '); - } -} - void Ranker::SuggestStrings() { if (m_params.m_prefix.empty() || !m_params.m_suggestsEnabled) @@ -464,22 +359,135 @@ void Ranker::SuggestStrings() MatchForSuggestions(m_params.m_prefix, locale, prologue); } -void Ranker::MatchForSuggestions(strings::UniString const & token, int8_t locale, - string const & prologue) +void Ranker::UpdateResults(bool lastUpdate) { - for (auto const & suggest : m_suggests) + BailIfCancelled(); + + MakeRankerResults(m_geocoderParams, m_tentativeResults); + RemoveDuplicatingLinear(m_tentativeResults); + if (m_tentativeResults.empty()) + return; + + if (m_params.m_viewportSearch) { - strings::UniString const & s = suggest.m_name; - if ((suggest.m_prefixLength <= token.size()) && - (token != s) && // do not push suggestion if it already equals to token - (suggest.m_locale == locale) && // push suggestions only for needed language - strings::StartsWith(s.begin(), s.end(), token.begin(), token.end())) + sort(m_tentativeResults.begin(), m_tentativeResults.end(), + my::LessBy(&RankerResult::GetDistanceToPivot)); + } + else + { + // *NOTE* GetLinearModelRank is calculated on the fly + // but the model is lightweight enough and the slowdown + // is negligible. + sort(m_tentativeResults.rbegin(), m_tentativeResults.rend(), + my::LessBy(&RankerResult::GetLinearModelRank)); + ProcessSuggestions(m_tentativeResults); + } + + // Emit feature results. + size_t count = m_emitter.GetResults().GetCount(); + size_t i = 0; + for (; i < m_tentativeResults.size(); ++i) + { + if (!lastUpdate && i >= m_params.m_batchSize && !m_params.m_viewportSearch) + break; + + BailIfCancelled(); + + if (m_params.m_viewportSearch) { - string const utf8Str = strings::ToUtf8(s); - Result r(utf8Str, prologue + utf8Str + " "); - MakeResultHighlight(r); - m_emitter.AddResult(move(r)); + // Viewport results don't need calculated address. + Result res = GenerateFinalResult(m_tentativeResults[i], false /* needAddress */); + m_emitter.AddResultNoChecks(move(res)); } + else + { + if (count >= m_params.m_limit) + break; + + LOG(LDEBUG, (m_tentativeResults[i])); + + auto const & rankerResult = m_tentativeResults[i]; + if (m_emitter.AddResult(MakeResult(rankerResult))) + ++count; + } + } + m_tentativeResults.erase(m_tentativeResults.begin(), m_tentativeResults.begin() + i); + + m_preRankerResults.clear(); + + BailIfCancelled(); + m_emitter.Emit(); +} + +void Ranker::ClearCaches() { m_localities.ClearCache(); } + +void Ranker::MakeRankerResults(Geocoder::Params const & geocoderParams, + vector & results) +{ + RankerResultMaker maker(*this, m_index, m_infoGetter, geocoderParams); + for (auto const & r : m_preRankerResults) + { + auto p = maker(r); + if (!p) + continue; + + if (geocoderParams.m_mode == Mode::Viewport && + !geocoderParams.m_pivot.IsPointInside(p->GetCenter())) + { + continue; + } + + if (!ResultExists(*p, results, m_params.m_minDistanceOnMapBetweenResults)) + results.push_back(move(*p.release())); + }; +} + +Result Ranker::GenerateFinalResult(RankerResult const & rankerResult, bool needAddress) const +{ + ReverseGeocoder::Address addr; + bool addrComputed = false; + + string name = rankerResult.GetName(); + if (needAddress && name.empty()) + { + // Insert exact address (street and house number) instead of empty result name. + if (!addrComputed) + { + m_reverseGeocoder.GetNearbyAddress(rankerResult.GetCenter(), addr); + addrComputed = true; + } + if (addr.GetDistance() == 0) + name = FormatStreetAndHouse(addr); + } + + uint32_t const type = rankerResult.GetBestType(&m_params.m_preferredTypes); + + // Format full address only for suitable results. + string address; + if (needAddress) + { + address = rankerResult.GetRegionName(m_infoGetter, type); + if (ftypes::IsAddressObjectChecker::Instance()(rankerResult.GetTypes())) + { + if (!addrComputed) + { + m_reverseGeocoder.GetNearbyAddress(rankerResult.GetCenter(), addr); + addrComputed = true; + } + address = FormatFullAddress(addr, address); + } + } + + switch (rankerResult.GetResultType()) + { + case RankerResult::Type::TYPE_FEATURE: + case RankerResult::Type::TYPE_BUILDING: + return Result(rankerResult.GetID(), rankerResult.GetCenter(), name, address, + m_categories.GetReadableFeatureType(type, m_params.m_currentLocaleCode), type, + rankerResult.GetMetadata()); + default: + ASSERT_EQUAL(rankerResult.GetResultType(), RankerResult::Type::TYPE_LATLON, ()); + return Result(rankerResult.GetCenter(), name, address); } } @@ -499,7 +507,26 @@ void Ranker::GetBestMatchName(FeatureType const & f, string & name) const UNUSED_VALUE(f.ForEachName(bestNameFinder)); } -void Ranker::ProcessSuggestions(vector & vec) const +void Ranker::MatchForSuggestions(strings::UniString const & token, int8_t locale, + string const & prologue) +{ + for (auto const & suggest : m_suggests) + { + strings::UniString const & s = suggest.m_name; + if (suggest.m_prefixLength <= token.size() + && token != s // do not push suggestion if it already equals to token + && suggest.m_locale == locale // push suggestions only for needed language + && strings::StartsWith(s.begin(), s.end(), token.begin(), token.end())) + { + string const utf8Str = strings::ToUtf8(s); + Result r(utf8Str, prologue + utf8Str + " "); + HighlightResult(m_params.m_tokens, m_params.m_prefix, r); + m_emitter.AddResult(move(r)); + } + } +} + +void Ranker::ProcessSuggestions(vector & vec) const { if (m_params.m_prefix.empty() || !m_params.m_suggestsEnabled) return; @@ -507,16 +534,17 @@ void Ranker::ProcessSuggestions(vector & vec) const int added = 0; for (auto i = vec.begin(); i != vec.end();) { - PreResult2 const & r = **i; + RankerResult const & r = *i; ftypes::Type const type = GetLocalityIndex(r.GetTypes()); - if ((type == ftypes::COUNTRY || type == ftypes::CITY) || r.IsStreet()) + if (type == ftypes::COUNTRY || type == ftypes::CITY || r.IsStreet()) { - string suggest; - GetSuggestion(r.GetName(), suggest); - if (!suggest.empty() && added < MAX_SUGGESTS_COUNT) + string suggestion; + GetSuggestion(r, m_params.m_query, m_params.m_tokens, m_params.m_prefix, suggestion); + if (!suggestion.empty() && added < MAX_SUGGESTS_COUNT) { - if (m_emitter.AddResult(Result(MakeResult(r), suggest))) + // todo(@m) RankingInfo is not set here. Should it be? + if (m_emitter.AddResult(Result(MakeResult(r), suggestion))) ++added; i = vec.erase(i); @@ -526,67 +554,4 @@ void Ranker::ProcessSuggestions(vector & vec) const ++i; } } - -void Ranker::UpdateResults(bool lastUpdate) -{ - BailIfCancelled(); - - MakePreResult2(m_geocoderParams, m_tentativeResults); - RemoveDuplicatingLinear(m_tentativeResults); - if (m_tentativeResults.empty()) - return; - - if (m_params.m_viewportSearch) - { - sort(m_tentativeResults.begin(), m_tentativeResults.end(), - my::LessBy(&IndexedValue::GetDistanceToPivot)); - } - else - { - sort(m_tentativeResults.rbegin(), m_tentativeResults.rend(), - my::LessBy(&IndexedValue::GetRank)); - ProcessSuggestions(m_tentativeResults); - } - - // Emit feature results. - size_t count = m_emitter.GetResults().GetCount(); - size_t i = 0; - for (; i < m_tentativeResults.size(); ++i) - { - if (!lastUpdate && i >= kBatchSize && !m_params.m_viewportSearch) - break; - BailIfCancelled(); - - if (m_params.m_viewportSearch) - { - m_emitter.AddResultNoChecks( - (*m_tentativeResults[i]) - .GenerateFinalResult(m_infoGetter, &m_categories, &m_params.m_preferredTypes, - m_params.m_currentLocaleCode, - nullptr /* Viewport results don't need calculated address */)); - } - else - { - if (count >= m_params.m_limit) - break; - - LOG(LDEBUG, (m_tentativeResults[i])); - - auto const & preResult2 = *m_tentativeResults[i]; - if (m_emitter.AddResult(MakeResult(preResult2))) - ++count; - } - } - m_tentativeResults.erase(m_tentativeResults.begin(), m_tentativeResults.begin() + i); - - m_preResults1.clear(); - - BailIfCancelled(); - m_emitter.Emit(); -} - -void Ranker::ClearCaches() -{ - m_localities.ClearCache(); -} } // namespace search diff --git a/search/ranker.hpp b/search/ranker.hpp index b369f8e777..4086d4a112 100644 --- a/search/ranker.hpp +++ b/search/ranker.hpp @@ -37,7 +37,7 @@ namespace search { class CitiesBoundariesTable; class Emitter; -class PreResult2Maker; +class RankerResultMaker; class VillagesCache; class Ranker @@ -54,7 +54,7 @@ public: bool m_viewportSearch = false; string m_query; - buffer_vector m_tokens; + QueryTokens m_tokens; // Prefix of the last token in the query. // We need it here to make suggestions. strings::UniString m_prefix; @@ -62,75 +62,59 @@ public: m2::PointD m_accuratePivotCenter = m2::PointD(0, 0); // A minimum distance between search results in meters, needed for - // filtering of indentical search results. + // filtering of identical search results. double m_minDistanceOnMapBetweenResults = 0.0; Locales m_categoryLocales; + // Default batch size. Override if needed. + size_t m_batchSize = 10; + + // The maximum total number of results to be emitted in all batches. size_t m_limit = 0; }; - static size_t const kBatchSize; - Ranker(Index const & index, CitiesBoundariesTable const & boundariesTable, - storage::CountryInfoGetter const & infoGetter, Emitter & emitter, - CategoriesHolder const & categories, vector const & suggests, + storage::CountryInfoGetter const & infoGetter, KeywordLangMatcher & keywordsScorer, + Emitter & emitter, CategoriesHolder const & categories, vector const & suggests, VillagesCache & villagesCache, my::Cancellable const & cancellable); virtual ~Ranker() = default; void Init(Params const & params, Geocoder::Params const & geocoderParams); - bool IsResultExists(PreResult2 const & p, vector const & values); + Result MakeResult(RankerResult const & r) const; - void MakePreResult2(Geocoder::Params const & params, vector & cont); - - Result MakeResult(PreResult2 const & r) const; - void MakeResultHighlight(Result & res) const; - - void GetSuggestion(string const & name, string & suggest) const; void SuggestStrings(); - void MatchForSuggestions(strings::UniString const & token, int8_t locale, string const & prolog); - void GetBestMatchName(FeatureType const & f, string & name) const; - void ProcessSuggestions(vector & vec) const; - virtual void SetPreResults1(vector && preResults1) { m_preResults1 = move(preResults1); } + virtual void SetPreRankerResults(vector && preRankerResults) + { + m_preRankerResults = move(preRankerResults); + } virtual void UpdateResults(bool lastUpdate); void ClearCaches(); - inline void SetLocalityLanguage(int8_t code) { m_localityLang = code; } - - inline void SetLanguage(pair const & ind, int8_t lang) - { - m_keywordsScorer.SetLanguage(ind, lang); - } - - inline int8_t GetLanguage(pair const & ind) const - { - return m_keywordsScorer.GetLanguage(ind); - } - - inline void SetLanguages(vector> const & languagePriorities) - { - m_keywordsScorer.SetLanguages(languagePriorities); - } - - inline void SetKeywords(KeywordMatcher::StringT const * keywords, size_t count, - KeywordMatcher::StringT const & prefix) - { - m_keywordsScorer.SetKeywords(keywords, count, prefix); - } - inline void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); } + inline void SetLocalityLanguage(int8_t code) { m_localityLang = code; } + private: - friend class PreResult2Maker; + friend class RankerResultMaker; + + void MakeRankerResults(Geocoder::Params const & params, vector & results); + + // todo(@m) Can we unify this and MakeResult? + Result GenerateFinalResult(RankerResult const & rankerResult, bool needAddress) const; + + void GetBestMatchName(FeatureType const & f, string & name) const; + void MatchForSuggestions(strings::UniString const & token, int8_t locale, string const & prolog); + void ProcessSuggestions(vector & vec) const; Params m_params; Geocoder::Params m_geocoderParams; ReverseGeocoder const m_reverseGeocoder; my::Cancellable const & m_cancellable; - KeywordLangMatcher m_keywordsScorer; + KeywordLangMatcher & m_keywordsScorer; mutable LocalityFinder m_localities; int8_t m_localityLang = StringUtf8Multilang::kDefaultCode; @@ -141,7 +125,7 @@ private: CategoriesHolder const & m_categories; vector const & m_suggests; - vector m_preResults1; - vector m_tentativeResults; + vector m_preRankerResults; + vector m_tentativeResults; }; } // namespace search diff --git a/search/result.hpp b/search/result.hpp index b583b5032c..3fbd313b7f 100644 --- a/search/result.hpp +++ b/search/result.hpp @@ -117,7 +117,9 @@ public: private: FeatureID m_id; m2::PointD m_center; - string m_str, m_address, m_type; + string m_str; + string m_address; + string m_type; uint32_t m_featureType; string m_suggestionStr; buffer_vector, 4> m_hightlightRanges; @@ -199,7 +201,11 @@ private: struct AddressInfo { - string m_country, m_city, m_street, m_house, m_name; + string m_country; + string m_city; + string m_street; + string m_house; + string m_name; vector m_types; double m_distanceMeters = -1.0; diff --git a/search/search.pro b/search/search.pro index 0266988080..ca83706afa 100644 --- a/search/search.pro +++ b/search/search.pro @@ -37,6 +37,7 @@ HEADERS += \ geocoder_locality.hpp \ geometry_cache.hpp \ geometry_utils.hpp \ + highlighting.hpp \ hotels_classifier.hpp \ hotels_filter.hpp \ house_detector.hpp \ @@ -78,7 +79,6 @@ HEADERS += \ stats_cache.hpp \ street_vicinity_loader.hpp \ streets_matcher.hpp \ - string_intersection.hpp \ suggest.hpp \ token_range.hpp \ token_slice.hpp \ @@ -108,6 +108,7 @@ SOURCES += \ geocoder_locality.cpp \ geometry_cache.cpp \ geometry_utils.cpp \ + highlighting.cpp \ hotels_classifier.cpp \ hotels_filter.cpp \ house_detector.cpp \ @@ -143,6 +144,7 @@ SOURCES += \ segment_tree.cpp \ street_vicinity_loader.cpp \ streets_matcher.cpp \ + suggest.cpp \ token_slice.cpp \ types_skipper.cpp \ utils.cpp \ diff --git a/search/search_integration_tests/pre_ranker_test.cpp b/search/search_integration_tests/pre_ranker_test.cpp index 2e86d3941b..b73eee0e3b 100644 --- a/search/search_integration_tests/pre_ranker_test.cpp +++ b/search/search_integration_tests/pre_ranker_test.cpp @@ -46,10 +46,11 @@ class TestRanker : public Ranker { public: TestRanker(TestSearchEngine & engine, CitiesBoundariesTable const & boundariesTable, - Emitter & emitter, vector const & suggests, VillagesCache & villagesCache, - my::Cancellable const & cancellable, vector & results) + KeywordLangMatcher & keywordsScorer, Emitter & emitter, + vector const & suggests, VillagesCache & villagesCache, + my::Cancellable const & cancellable, vector & results) : Ranker(static_cast(engine), boundariesTable, engine.GetCountryInfoGetter(), - emitter, GetDefaultCategories(), suggests, villagesCache, cancellable) + keywordsScorer, emitter, GetDefaultCategories(), suggests, villagesCache, cancellable) , m_results(results) { } @@ -57,11 +58,11 @@ public: inline bool Finished() const { return m_finished; } // Ranker overrides: - void SetPreResults1(vector && preResults1) override + void SetPreRankerResults(vector && preRankerResults) override { CHECK(!Finished(), ()); - move(preResults1.begin(), preResults1.end(), back_inserter(m_results)); - preResults1.clear(); + move(preRankerResults.begin(), preRankerResults.end(), back_inserter(m_results)); + preRankerResults.clear(); } void UpdateResults(bool lastUpdate) override @@ -72,7 +73,7 @@ public: } private: - vector & m_results; + vector & m_results; bool m_finished = false; }; @@ -112,12 +113,13 @@ UNIT_CLASS_TEST(PreRankerTest, Smoke) builder.Add(poi); }); - vector results; + vector results; Emitter emitter; CitiesBoundariesTable boundariesTable(m_engine); VillagesCache villagesCache(m_cancellable); - TestRanker ranker(m_engine, boundariesTable, emitter, m_suggests, villagesCache, m_cancellable, - results); + KeywordLangMatcher keywordsScorer; + TestRanker ranker(m_engine, boundariesTable, keywordsScorer, emitter, m_suggests, villagesCache, + m_cancellable, results); PreRanker preRanker(m_engine, ranker, pois.size()); PreRanker::Params params; diff --git a/search/search_quality/search_quality_tool/search_quality_tool.cpp b/search/search_quality/search_quality_tool/search_quality_tool.cpp index 2989bb3f7d..66ab53db41 100644 --- a/search/search_quality/search_quality_tool/search_quality_tool.cpp +++ b/search/search_quality/search_quality_tool/search_quality_tool.cpp @@ -379,6 +379,7 @@ int main(int argc, char * argv[]) params.m_locale = FLAGS_locale; params.m_numThreads = FLAGS_num_threads; TestSearchEngine engine(move(infoGetter), make_unique(), Engine::Params{}); + engine.SetLocale(FLAGS_locale); vector mwms; if (!FLAGS_mwm_list_path.empty()) diff --git a/search/search_tests/CMakeLists.txt b/search/search_tests/CMakeLists.txt index 4b7db35308..7b34a3d12f 100644 --- a/search/search_tests/CMakeLists.txt +++ b/search/search_tests/CMakeLists.txt @@ -5,6 +5,7 @@ include_directories(${OMIM_ROOT}/3party/jansson/src) set( SRC algos_tests.cpp + highlighting_tests.cpp house_detector_tests.cpp house_numbers_matcher_test.cpp interval_set_test.cpp @@ -19,7 +20,6 @@ set( query_saver_tests.cpp ranking_tests.cpp segment_tree_tests.cpp - string_intersection_test.cpp string_match_test.cpp ) diff --git a/search/search_tests/string_intersection_test.cpp b/search/search_tests/highlighting_tests.cpp similarity index 61% rename from search/search_tests/string_intersection_test.cpp rename to search/search_tests/highlighting_tests.cpp index d8401e904d..24090f11f7 100644 --- a/search/search_tests/string_intersection_test.cpp +++ b/search/search_tests/highlighting_tests.cpp @@ -1,6 +1,6 @@ #include "testing/testing.hpp" -#include "search/string_intersection.hpp" +#include "search/highlighting.hpp" #include "indexer/feature_covering.hpp" @@ -8,7 +8,6 @@ #include "std/cstdarg.hpp" - namespace { typedef pair TestResult; @@ -21,7 +20,7 @@ struct TestData TokensVector m_lowTokens; TestResultVector m_results; - TestData(char const * inp, char const **lToks, size_t lowTokCount, size_t resCount, ...) + TestData(char const * inp, char const ** lToks, size_t lowTokCount, size_t resCount, ...) { m_input = inp; for (size_t i = 0; i < lowTokCount; ++i) @@ -38,11 +37,7 @@ struct TestData va_end(ap); } - void AddResult(uint16_t pos, uint16_t len) - { - m_results.push_back(TestResult(pos, len)); - } - + void AddResult(uint16_t pos, uint16_t len) { m_results.push_back(TestResult(pos, len)); } }; typedef vector TestVector; @@ -50,31 +45,23 @@ class CheckRange { size_t m_idx; TestResultVector const & m_results; + public: - CheckRange(TestResultVector const & results) - : m_idx(0) - , m_results(results) - { - } + CheckRange(TestResultVector const & results) : m_idx(0), m_results(results) {} - ~CheckRange() - { - TEST_EQUAL(m_idx, m_results.size(), ()); - } + ~CheckRange() { TEST_EQUAL(m_idx, m_results.size(), ()); } - void operator() (pair const & range) + void operator()(pair const & range) { ASSERT(m_idx < m_results.size(), ()); TEST_EQUAL(range, m_results[m_idx], ()); ++m_idx; } }; - } UNIT_TEST(SearchStringTokensIntersectionRange) { - char const * str0 = "улица Карла Маркса"; char const * str1 = "ул. Карла Маркса"; char const * str2 = "Карлов Мост"; @@ -101,29 +88,29 @@ UNIT_TEST(SearchStringTokensIntersectionRange) TestVector tests; // fill test data - tests.push_back(TestData(str0, lowTokens0, 2, 2, 6,5, 12,6)); - tests.push_back(TestData(str1, lowTokens0, 2, 2, 4,5, 10,6)); + tests.push_back(TestData(str0, lowTokens0, 2, 2, 6, 5, 12, 6)); + tests.push_back(TestData(str1, lowTokens0, 2, 2, 4, 5, 10, 6)); tests.push_back(TestData(str2, lowTokens0, 2, 0)); - tests.push_back(TestData(str10, lowTokens8, 2, 2, 0,2, 6,3)); - tests.push_back(TestData(str4, lowTokens1, 2, 2, 9,5, 15,4)); + tests.push_back(TestData(str10, lowTokens8, 2, 2, 0, 2, 6, 3)); + tests.push_back(TestData(str4, lowTokens1, 2, 2, 9, 5, 15, 4)); tests.push_back(TestData(str0, lowTokens2, 1, 0)); tests.push_back(TestData(str2, lowTokens2, 1, 0)); - tests.push_back(TestData(str0, lowTokens3, 2, 2, 6,5, 12,1)); - tests.push_back(TestData(str1, lowTokens3, 2, 2, 4,5, 10,1)); - tests.push_back(TestData(str0, lowTokens4, 2, 2, 6,3, 12,3)); + tests.push_back(TestData(str0, lowTokens3, 2, 2, 6, 5, 12, 1)); + tests.push_back(TestData(str1, lowTokens3, 2, 2, 4, 5, 10, 1)); + tests.push_back(TestData(str0, lowTokens4, 2, 2, 6, 3, 12, 3)); - tests.push_back(TestData(str3, lowTokens1, 2, 1, 0,4)); - tests.push_back(TestData(str5, lowTokens5, 2, 2, 10,4, 16,5)); + tests.push_back(TestData(str3, lowTokens1, 2, 1, 0, 4)); + tests.push_back(TestData(str5, lowTokens5, 2, 2, 10, 4, 16, 5)); tests.push_back(TestData(str6, lowTokens6, 1, 0)); tests.push_back(TestData(str6, lowTokens7, 2, 0)); - tests.push_back(TestData(str5, lowTokens7, 2, 1, 10,4)); + tests.push_back(TestData(str5, lowTokens7, 2, 1, 10, 4)); - tests.push_back(TestData(str8, lowTokens3, 2, 2, 2,5, 9,1)); - tests.push_back(TestData(str7, lowTokens0, 2, 2, 2,5, 8,6)); - tests.push_back(TestData(str0, lowTokens8, 2, 2, 0,2, 6,3)); - tests.push_back(TestData(str9, lowTokens8, 2, 2, 0,2, 6,3)); - tests.push_back(TestData(str11, lowTokens9, 2, 2, 0,2, 14,3)); + tests.push_back(TestData(str8, lowTokens3, 2, 2, 2, 5, 9, 1)); + tests.push_back(TestData(str7, lowTokens0, 2, 2, 2, 5, 8, 6)); + tests.push_back(TestData(str0, lowTokens8, 2, 2, 0, 2, 6, 3)); + tests.push_back(TestData(str9, lowTokens8, 2, 2, 0, 2, 6, 3)); + tests.push_back(TestData(str11, lowTokens9, 2, 2, 0, 2, 14, 3)); // run tests size_t count = 0; @@ -131,9 +118,7 @@ UNIT_TEST(SearchStringTokensIntersectionRange) { TestData const & data = *it; - search::SearchStringTokensIntersectionRanges(data.m_input, - data.m_lowTokens.begin(), - data.m_lowTokens.end(), - CheckRange(data.m_results)); + search::SearchStringTokensIntersectionRanges( + data.m_input, data.m_lowTokens.begin(), data.m_lowTokens.end(), CheckRange(data.m_results)); } } diff --git a/search/search_tests/search_tests.pro b/search/search_tests/search_tests.pro index 54277f39e5..be5e248c06 100644 --- a/search/search_tests/search_tests.pro +++ b/search/search_tests/search_tests.pro @@ -25,6 +25,7 @@ win32*|linux* { SOURCES += \ ../../testing/testingmain.cpp \ algos_tests.cpp \ + highlighting_test.cpp \ hotels_filter_test.cpp \ house_detector_tests.cpp \ house_numbers_matcher_test.cpp \ @@ -39,7 +40,6 @@ SOURCES += \ query_saver_tests.cpp \ ranking_tests.cpp \ segment_tree_tests.cpp \ - string_intersection_test.cpp \ string_match_test.cpp \ HEADERS += \ diff --git a/search/suggest.cpp b/search/suggest.cpp new file mode 100644 index 0000000000..a26ca57342 --- /dev/null +++ b/search/suggest.cpp @@ -0,0 +1,61 @@ +#include "search/suggest.hpp" + +#include "indexer/search_delimiters.hpp" +#include "indexer/search_string_utils.hpp" + +#include "search/common.hpp" + +#include "base/stl_add.hpp" + +#include + +using namespace std; + +namespace search +{ +void GetSuggestion(RankerResult const & res, string const & query, QueryTokens const & paramTokens, + strings::UniString const & prefix, string & suggest) +{ + // Splits result's name. + search::Delimiters delims; + vector tokens; + SplitUniString(NormalizeAndSimplifyString(res.GetName()), MakeBackInsertFunctor(tokens), delims); + + // Finds tokens that are already present in the input query. + vector tokensMatched(tokens.size()); + bool prefixMatched = false; + bool fullPrefixMatched = false; + + for (size_t i = 0; i < tokens.size(); ++i) + { + auto const & token = tokens[i]; + + if (find(paramTokens.begin(), paramTokens.end(), token) != paramTokens.end()) + { + tokensMatched[i] = true; + } + else if (StartsWith(token, prefix)) + { + prefixMatched = true; + fullPrefixMatched = token.size() == prefix.size(); + } + } + + // When |name| does not match prefix or when prefix equals to some + // token of the |name| (for example, when user entered "Moscow" + // without space at the end), we should not suggest anything. + if (!prefixMatched || fullPrefixMatched) + return; + + GetStringPrefix(query, suggest); + + // Appends unmatched result's tokens to the suggestion. + for (size_t i = 0; i < tokens.size(); ++i) + { + if (tokensMatched[i]) + continue; + suggest.append(strings::ToUtf8(tokens[i])); + suggest.push_back(' '); + } +} +} // namespace search diff --git a/search/suggest.hpp b/search/suggest.hpp index 92c9208b0e..0908f42067 100644 --- a/search/suggest.hpp +++ b/search/suggest.hpp @@ -1,8 +1,12 @@ #pragma once +#include "search/common.hpp" +#include "search/intermediate_result.hpp" + #include "base/string_utils.hpp" -#include "std/cstdint.hpp" +#include +#include namespace search { @@ -17,4 +21,7 @@ struct Suggest uint8_t m_prefixLength; int8_t m_locale; }; + +void GetSuggestion(RankerResult const & res, string const & query, QueryTokens const & paramTokens, + strings::UniString const & prefix, std::string & suggest); } // namespace search diff --git a/search/utils.hpp b/search/utils.hpp index 1f67187b87..171f3730a1 100644 --- a/search/utils.hpp +++ b/search/utils.hpp @@ -1,5 +1,6 @@ #pragma once +#include "search/common.hpp" #include "search/token_slice.hpp" #include "indexer/categories_holder.hpp" @@ -7,9 +8,7 @@ #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" -#include "base/buffer_vector.hpp" #include "base/levenshtein_dfa.hpp" -#include "base/small_set.hpp" #include "base/stl_helpers.hpp" #include "base/string_utils.hpp" @@ -24,8 +23,6 @@ class MwmInfo; namespace search { -using Locales = base::SafeSmallSet; - // todo(@m, @y). Unite with the similar function in search/feature_offset_match.hpp. template bool MatchInTrie(TrieIt const & trieStartIt, DFA const & dfa, ToDo && toDo) diff --git a/xcode/search/search.xcodeproj/project.pbxproj b/xcode/search/search.xcodeproj/project.pbxproj index 8754432c00..3364d668ec 100644 --- a/xcode/search/search.xcodeproj/project.pbxproj +++ b/xcode/search/search.xcodeproj/project.pbxproj @@ -70,8 +70,14 @@ 34F558451DBF2E7600A4FC11 /* libopening_hours.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 34F558441DBF2E7600A4FC11 /* libopening_hours.a */; }; 34F558471DBF2E8100A4FC11 /* libsuccinct.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 34F558461DBF2E8100A4FC11 /* libsuccinct.a */; }; 34F558491DBF2EC700A4FC11 /* libz.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 34F558481DBF2EC700A4FC11 /* libz.tbd */; }; + 3913DA511F9FCC88004AA681 /* suggest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3913DA501F9FCC88004AA681 /* suggest.cpp */; }; 397AFE061D6C9AC700F583E7 /* downloader_search_callback.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 397AFE041D6C9AC700F583E7 /* downloader_search_callback.cpp */; }; 397AFE071D6C9AC700F583E7 /* downloader_search_callback.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 397AFE051D6C9AC700F583E7 /* downloader_search_callback.hpp */; }; + 39BBC13B1F9FD65C009D1687 /* highlighting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 39BBC1391F9FD65C009D1687 /* highlighting.cpp */; }; + 39BBC13C1F9FD65C009D1687 /* highlighting.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 39BBC13A1F9FD65C009D1687 /* highlighting.hpp */; }; + 39BBC13E1F9FD679009D1687 /* segment_tree_tests.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 39BBC13D1F9FD679009D1687 /* segment_tree_tests.cpp */; }; + 39BBC1401F9FD683009D1687 /* point_rect_matcher_tests.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 39BBC13F1F9FD683009D1687 /* point_rect_matcher_tests.cpp */; }; + 39BBC1421F9FD68C009D1687 /* highlighting_tests.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 39BBC1411F9FD68C009D1687 /* highlighting_tests.cpp */; }; 3DF37FAA1EA11B380012CB31 /* everywhere_search_callback.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3DF37FA81EA11B380012CB31 /* everywhere_search_callback.cpp */; }; 3DF37FAB1EA11B380012CB31 /* everywhere_search_callback.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 3DF37FA91EA11B380012CB31 /* everywhere_search_callback.hpp */; }; 3DFEBF761EF2D55800317D5C /* city_finder.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 3DFEBF751EF2D55800317D5C /* city_finder.hpp */; }; @@ -89,7 +95,6 @@ 671C621F1AE9227C00076BD0 /* keyword_matcher_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 671C62161AE9227C00076BD0 /* keyword_matcher_test.cpp */; }; 671C62201AE9227C00076BD0 /* latlon_match_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 671C62171AE9227C00076BD0 /* latlon_match_test.cpp */; }; 671C62211AE9227C00076BD0 /* locality_finder_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 671C62181AE9227C00076BD0 /* locality_finder_test.cpp */; }; - 671C62221AE9227C00076BD0 /* string_intersection_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 671C621A1AE9227C00076BD0 /* string_intersection_test.cpp */; }; 671C62231AE9227C00076BD0 /* string_match_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 671C621B1AE9227C00076BD0 /* string_match_test.cpp */; }; 671C62251AE9229A00076BD0 /* testingmain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 671C62241AE9229A00076BD0 /* testingmain.cpp */; }; 671C62261AE9232900076BD0 /* libsearch.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 675346B01A4055CF00A0A8C3 /* libsearch.a */; }; @@ -170,7 +175,6 @@ F652D90A1CFDE21900FC29A0 /* stats_cache.hpp in Headers */ = {isa = PBXBuildFile; fileRef = F652D8E31CFDE21900FC29A0 /* stats_cache.hpp */; }; F652D90B1CFDE21900FC29A0 /* street_vicinity_loader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F652D8E41CFDE21900FC29A0 /* street_vicinity_loader.cpp */; }; F652D90C1CFDE21900FC29A0 /* street_vicinity_loader.hpp in Headers */ = {isa = PBXBuildFile; fileRef = F652D8E51CFDE21900FC29A0 /* street_vicinity_loader.hpp */; }; - F652D90D1CFDE21900FC29A0 /* string_intersection.hpp in Headers */ = {isa = PBXBuildFile; fileRef = F652D8E61CFDE21900FC29A0 /* string_intersection.hpp */; }; F652D90E1CFDE21900FC29A0 /* token_slice.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F652D8E71CFDE21900FC29A0 /* token_slice.cpp */; }; F652D90F1CFDE21900FC29A0 /* token_slice.hpp in Headers */ = {isa = PBXBuildFile; fileRef = F652D8E81CFDE21900FC29A0 /* token_slice.hpp */; }; F659FC6D1CF4A30B000A06B1 /* pre_ranker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F659FC6B1CF4A30B000A06B1 /* pre_ranker.cpp */; }; @@ -257,8 +261,14 @@ 34F558441DBF2E7600A4FC11 /* libopening_hours.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libopening_hours.a; path = "../../../omim-xcode-build/Debug/libopening_hours.a"; sourceTree = ""; }; 34F558461DBF2E8100A4FC11 /* libsuccinct.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libsuccinct.a; path = "../../../omim-xcode-build/Debug/libsuccinct.a"; sourceTree = ""; }; 34F558481DBF2EC700A4FC11 /* libz.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libz.tbd; path = usr/lib/libz.tbd; sourceTree = SDKROOT; }; + 3913DA501F9FCC88004AA681 /* suggest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suggest.cpp; sourceTree = ""; }; 397AFE041D6C9AC700F583E7 /* downloader_search_callback.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = downloader_search_callback.cpp; sourceTree = ""; }; 397AFE051D6C9AC700F583E7 /* downloader_search_callback.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = downloader_search_callback.hpp; sourceTree = ""; }; + 39BBC1391F9FD65C009D1687 /* highlighting.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = highlighting.cpp; sourceTree = ""; }; + 39BBC13A1F9FD65C009D1687 /* highlighting.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = highlighting.hpp; sourceTree = ""; }; + 39BBC13D1F9FD679009D1687 /* segment_tree_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = segment_tree_tests.cpp; sourceTree = ""; }; + 39BBC13F1F9FD683009D1687 /* point_rect_matcher_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = point_rect_matcher_tests.cpp; sourceTree = ""; }; + 39BBC1411F9FD68C009D1687 /* highlighting_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = highlighting_tests.cpp; sourceTree = ""; }; 3DF37FA81EA11B380012CB31 /* everywhere_search_callback.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = everywhere_search_callback.cpp; sourceTree = ""; }; 3DF37FA91EA11B380012CB31 /* everywhere_search_callback.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = everywhere_search_callback.hpp; sourceTree = ""; }; 3DFEBF751EF2D55800317D5C /* city_finder.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = city_finder.hpp; sourceTree = ""; }; @@ -278,7 +288,6 @@ 671C62171AE9227C00076BD0 /* latlon_match_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = latlon_match_test.cpp; sourceTree = ""; }; 671C62181AE9227C00076BD0 /* locality_finder_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = locality_finder_test.cpp; sourceTree = ""; }; 671C62191AE9227C00076BD0 /* match_cost_mock.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = match_cost_mock.hpp; sourceTree = ""; }; - 671C621A1AE9227C00076BD0 /* string_intersection_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = string_intersection_test.cpp; sourceTree = ""; }; 671C621B1AE9227C00076BD0 /* string_match_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = string_match_test.cpp; sourceTree = ""; }; 671C62241AE9229A00076BD0 /* testingmain.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testingmain.cpp; path = ../../testing/testingmain.cpp; sourceTree = ""; }; 671C62271AE9233200076BD0 /* libindexer.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libindexer.a; path = "../../../omim-xcode-build/Debug-iphonesimulator/libindexer.a"; sourceTree = ""; }; @@ -363,7 +372,6 @@ F652D8E31CFDE21900FC29A0 /* stats_cache.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = stats_cache.hpp; sourceTree = ""; }; F652D8E41CFDE21900FC29A0 /* street_vicinity_loader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = street_vicinity_loader.cpp; sourceTree = ""; }; F652D8E51CFDE21900FC29A0 /* street_vicinity_loader.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = street_vicinity_loader.hpp; sourceTree = ""; }; - F652D8E61CFDE21900FC29A0 /* string_intersection.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = string_intersection.hpp; sourceTree = ""; }; F652D8E71CFDE21900FC29A0 /* token_slice.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = token_slice.cpp; sourceTree = ""; }; F652D8E81CFDE21900FC29A0 /* token_slice.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = token_slice.hpp; sourceTree = ""; }; F659FC6B1CF4A30B000A06B1 /* pre_ranker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pre_ranker.cpp; sourceTree = ""; }; @@ -446,6 +454,9 @@ 671C620D1AE9225100076BD0 /* search_tests */ = { isa = PBXGroup; children = ( + 39BBC1411F9FD68C009D1687 /* highlighting_tests.cpp */, + 39BBC13F1F9FD683009D1687 /* point_rect_matcher_tests.cpp */, + 39BBC13D1F9FD679009D1687 /* segment_tree_tests.cpp */, 34586B821DCB1E8300CF7FC9 /* hotels_filter_test.cpp */, 34586B831DCB1E8300CF7FC9 /* house_numbers_matcher_test.cpp */, 34586B841DCB1E8300CF7FC9 /* interval_set_test.cpp */, @@ -462,7 +473,6 @@ 671C62171AE9227C00076BD0 /* latlon_match_test.cpp */, 671C62181AE9227C00076BD0 /* locality_finder_test.cpp */, 671C62191AE9227C00076BD0 /* match_cost_mock.hpp */, - 671C621A1AE9227C00076BD0 /* string_intersection_test.cpp */, 671C621B1AE9227C00076BD0 /* string_match_test.cpp */, ); name = search_tests; @@ -495,6 +505,9 @@ 675346B21A4055CF00A0A8C3 /* search */ = { isa = PBXGroup; children = ( + 39BBC1391F9FD65C009D1687 /* highlighting.cpp */, + 39BBC13A1F9FD65C009D1687 /* highlighting.hpp */, + 3913DA501F9FCC88004AA681 /* suggest.cpp */, 456E1B411F9A3CF5009C32E1 /* localities_source.cpp */, 456E1B401F9A3CF5009C32E1 /* localities_source.hpp */, 456E1B3C1F9A3C8D009C32E1 /* cities_boundaries_table.cpp */, @@ -616,7 +629,6 @@ F652D8E51CFDE21900FC29A0 /* street_vicinity_loader.hpp */, 345C8DAD1D2D15A50037E3A6 /* streets_matcher.cpp */, 345C8DAE1D2D15A50037E3A6 /* streets_matcher.hpp */, - F652D8E61CFDE21900FC29A0 /* string_intersection.hpp */, 347F33151C4540A8009758CC /* suggest.hpp */, 34EEAD6F1E55AE4300E95575 /* token_range.hpp */, F652D8E71CFDE21900FC29A0 /* token_slice.cpp */, @@ -684,6 +696,7 @@ 56D5456F1C74A48C00E3719C /* mode.hpp in Headers */, 0810EC371D6D9D2E00ABFEE7 /* displayed_categories.hpp in Headers */, 347F332A1C4540A8009758CC /* search_index_values.hpp in Headers */, + 39BBC13C1F9FD65C009D1687 /* highlighting.hpp in Headers */, 347F33161C4540A8009758CC /* cancel_exception.hpp in Headers */, 342D83351D5233B3000D8AEA /* hotels_classifier.hpp in Headers */, 3441CE4F1CFC1D7000CF30D4 /* processor_factory.hpp in Headers */, @@ -708,7 +721,6 @@ 347F331B1C4540A8009758CC /* interval_set.hpp in Headers */, F652D9051CFDE21900FC29A0 /* rank_table_cache.hpp in Headers */, 3469FAD31D6C5D9C00F35A88 /* everywhere_search_params.hpp in Headers */, - F652D90D1CFDE21900FC29A0 /* string_intersection.hpp in Headers */, 3465B2861D5DE71A0021E14D /* viewport_search_params.hpp in Headers */, F652D8C01CFDE1E800FC29A0 /* engine.hpp in Headers */, 675346DF1A40560D00A0A8C3 /* feature_offset_match.hpp in Headers */, @@ -841,7 +853,6 @@ buildActionMask = 2147483647; files = ( 671C62231AE9227C00076BD0 /* string_match_test.cpp in Sources */, - 671C62221AE9227C00076BD0 /* string_intersection_test.cpp in Sources */, 671C62201AE9227C00076BD0 /* latlon_match_test.cpp in Sources */, 671C621F1AE9227C00076BD0 /* keyword_matcher_test.cpp in Sources */, 671C62251AE9229A00076BD0 /* testingmain.cpp in Sources */, @@ -859,12 +870,14 @@ F652D8FA1CFDE21900FC29A0 /* locality_scorer.cpp in Sources */, 349B65891D4F21E5001798E2 /* lazy_centers_table.cpp in Sources */, F652D8FE1CFDE21900FC29A0 /* mwm_context.cpp in Sources */, + 39BBC13E1F9FD679009D1687 /* segment_tree_tests.cpp in Sources */, F652D8F01CFDE21900FC29A0 /* geocoder.cpp in Sources */, F652D8F21CFDE21900FC29A0 /* geometry_cache.cpp in Sources */, 34586B8C1DCB1E8300CF7FC9 /* locality_scorer_test.cpp in Sources */, 34EEAD721E55AE5C00E95575 /* utils.cpp in Sources */, 345C8DB11D2D15A50037E3A6 /* geocoder_context.cpp in Sources */, 3461C9A31D79949600E6E6F5 /* editor_delegate.cpp in Sources */, + 39BBC13B1F9FD65C009D1687 /* highlighting.cpp in Sources */, F652D8BF1CFDE1E800FC29A0 /* engine.cpp in Sources */, 675346DD1A40560D00A0A8C3 /* approximate_string_match.cpp in Sources */, 34586B8B1DCB1E8300CF7FC9 /* interval_set_test.cpp in Sources */, @@ -900,9 +913,11 @@ 34586B8A1DCB1E8300CF7FC9 /* house_numbers_matcher_test.cpp in Sources */, F659FC6D1CF4A30B000A06B1 /* pre_ranker.cpp in Sources */, 34586B8F1DCB1E8300CF7FC9 /* ranking_tests.cpp in Sources */, + 39BBC1401F9FD683009D1687 /* point_rect_matcher_tests.cpp in Sources */, F652D9081CFDE21900FC29A0 /* ranking_utils.cpp in Sources */, 3465B2821D5DE71A0021E14D /* search_params.cpp in Sources */, 347F33241C4540A8009758CC /* retrieval.cpp in Sources */, + 3913DA511F9FCC88004AA681 /* suggest.cpp in Sources */, A1347D551B8758E9009050FF /* query_saver_tests.cpp in Sources */, F652D90B1CFDE21900FC29A0 /* street_vicinity_loader.cpp in Sources */, 3459A7A71E4C4D0200ED235F /* geocoder_locality.cpp in Sources */, @@ -921,6 +936,7 @@ 3453BD5A1DAF91C100380ECB /* hotels_filter.cpp in Sources */, 345C8DB31D2D15A50037E3A6 /* streets_matcher.cpp in Sources */, 456E1B3E1F9A3C8E009C32E1 /* cities_boundaries_table.cpp in Sources */, + 39BBC1421F9FD68C009D1687 /* highlighting_tests.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; };