From 2929b89ced4dbc23279a6fc568b7a9ec5d1c0d7f Mon Sep 17 00:00:00 2001 From: Yury Melnichek Date: Sun, 29 May 2011 16:35:06 +0200 Subject: [PATCH] [search] Factor out SplitAndNormalizeAndSimplifyString(). --- search/keyword_matcher.cpp | 52 ++++++++++++++++++++------------------ search/keyword_matcher.hpp | 1 + search/query.cpp | 10 ++++---- search/string_match.hpp | 8 +++++- 4 files changed, 40 insertions(+), 31 deletions(-) diff --git a/search/keyword_matcher.cpp b/search/keyword_matcher.cpp index 711516abda..37e0be3fc0 100644 --- a/search/keyword_matcher.cpp +++ b/search/keyword_matcher.cpp @@ -1,6 +1,8 @@ #include "keyword_matcher.hpp" #include "delimiters.hpp" +#include "string_match.hpp" #include "../base/string_utils.hpp" +#include "../std/bind.hpp" #include "../std/numeric.hpp" namespace search @@ -25,36 +27,36 @@ KeywordMatcher::KeywordMatcher(strings::UniString * pKeywords, void KeywordMatcher::ProcessName(string const & name) { - search::Delimiters delims; - for (strings::TokenizeIterator iter(name, delims); iter; ++iter) + SplitAndNormalizeAndSimplifyString( + name, bind(&KeywordMatcher::ProcessNameToken, this, cref(name), _1), Delimiters()); +} + +void KeywordMatcher::ProcessNameToken(string const & name, strings::UniString const & s) +{ + for (size_t i = 0; i < m_minKeywordMatchCost.size(); ++i) { - strings::UniString const s = strings::MakeLowerCase(iter.GetUniString()); + m_minKeywordMatchCost[i] = min(m_minKeywordMatchCost[i], + m_keywordMatchFn(&m_pKewords[i][0], m_pKewords[i].size(), + &s[0], s.size(), + m_minKeywordMatchCost[i])); + } - for (size_t i = 0; i < m_minKeywordMatchCost.size(); ++i) + if (!m_prefix.empty()) + { + uint32_t const matchCost = m_prefixMatchFn(&m_prefix[0], m_prefix.size(), + &s[0], s.size(), m_minPrefixMatchCost); + if (matchCost < m_minPrefixMatchCost) { - m_minKeywordMatchCost[i] = min(m_minKeywordMatchCost[i], - m_keywordMatchFn(&m_pKewords[i][0], m_pKewords[i].size(), - &s[0], s.size(), - m_minKeywordMatchCost[i])); + m_bestPrefixMatch = name; + m_minPrefixMatchCost = matchCost; } - - if (!m_prefix.empty()) + } + else + { + if (m_bestPrefixMatch.empty()) { - uint32_t const matchCost = m_prefixMatchFn(&m_prefix[0], m_prefix.size(), - &s[0], s.size(), m_minPrefixMatchCost); - if (matchCost < m_minPrefixMatchCost) - { - m_bestPrefixMatch = name; - m_minPrefixMatchCost = matchCost; - } - } - else - { - if (m_bestPrefixMatch.empty()) - { - m_bestPrefixMatch = name; - m_minPrefixMatchCost = 0; - } + m_bestPrefixMatch = name; + m_minPrefixMatchCost = 0; } } } diff --git a/search/keyword_matcher.hpp b/search/keyword_matcher.hpp index 0d9e2ee549..7bb0457dd9 100644 --- a/search/keyword_matcher.hpp +++ b/search/keyword_matcher.hpp @@ -33,6 +33,7 @@ public: StringMatchFn keywordMatchFn, StringMatchFn prefixMatchFn); void ProcessName(string const & name); + void ProcessNameToken(string const & name, strings::UniString const & token); // Useful for FeatureType.ForEachName(), calls ProcessName() and always returns true. bool operator () (int /*lang*/, string const & name) diff --git a/search/query.cpp b/search/query.cpp index ca3a76a7eb..6390f0917a 100644 --- a/search/query.cpp +++ b/search/query.cpp @@ -2,6 +2,7 @@ #include "delimiters.hpp" #include "keyword_matcher.hpp" #include "string_match.hpp" +#include "../base/stl_add.hpp" namespace search { @@ -43,12 +44,11 @@ Query::Query(string const & query, m2::RectD const & rect, IndexType const * pIn : m_queryText(query), m_rect(rect), m_pIndex(pIndex) { search::Delimiters delims; - for (strings::TokenizeIterator iter(query, delims); iter; ++iter) + SplitAndNormalizeAndSimplifyString(query, MakeBackInsertFunctor(m_keywords), delims); + if (!m_keywords.empty() && !delims(strings::LastUniChar(query))) { - if (iter.IsLast() && !delims(strings::LastUniChar(query))) - m_prefix = strings::MakeLowerCase(iter.GetUniString()); - else - m_keywords.push_back(strings::MakeLowerCase(iter.GetUniString())); + m_prefix.swap(m_keywords.back()); + m_keywords.pop_back(); } } diff --git a/search/string_match.hpp b/search/string_match.hpp index 4e780383b7..97e31f808b 100644 --- a/search/string_match.hpp +++ b/search/string_match.hpp @@ -1,5 +1,4 @@ #pragma once - #include "../base/base.hpp" #include "../base/buffer_vector.hpp" #include "../base/string_utils.hpp" @@ -13,6 +12,13 @@ using strings::UniChar; namespace impl { +template +void SplitAndNormalizeAndSimplifyString(string const & s, F f, DelimsT const & delims) +{ + for (strings::TokenizeIterator iter(s, delims); iter; ++iter) + f(strings::MakeLowerCase(iter.GetUniString())); +} + struct MatchCostData { uint32_t m_A, m_B;