From 4154928d448cbe47b2f2057cdb761d8ff0fdcd70 Mon Sep 17 00:00:00 2001 From: vng Date: Sun, 4 Mar 2012 01:54:45 +0300 Subject: [PATCH] [search] Check for full math with tokens, Closed #703. --- search/feature_offset_match.hpp | 77 +++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index 0e5f473395..5fd2ea09d1 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -4,6 +4,7 @@ #include "../indexer/search_trie.hpp" #include "../base/string_utils.hpp" +//#include "../base/logging.hpp" #include "../std/algorithm.hpp" #include "../std/scoped_ptr.hpp" @@ -29,10 +30,14 @@ size_t CalcEqualLength(SrcIterT b, SrcIterT e, CompIterT bC, CompIterT eC) TrieIterator * MoveTrieIteratorToString(TrieIterator const & trieRoot, strings::UniString const & queryS, - size_t & symbolsMatched) + size_t & symbolsMatched, + bool & bFullEdgeMatched) { - scoped_ptr pIter(trieRoot.Clone()); symbolsMatched = 0; + bFullEdgeMatched = false; + + scoped_ptr pIter(trieRoot.Clone()); + size_t const szQuery = queryS.size(); while (symbolsMatched < szQuery) { @@ -42,14 +47,17 @@ TrieIterator * MoveTrieIteratorToString(TrieIterator const & trieRoot, { size_t const szEdge = pIter->m_edge[i].m_str.size(); - size_t const count = CalcEqualLength(pIter->m_edge[i].m_str.begin(), - pIter->m_edge[i].m_str.end(), - queryS.begin() + symbolsMatched, - queryS.end()); + size_t const count = CalcEqualLength( + pIter->m_edge[i].m_str.begin(), + pIter->m_edge[i].m_str.end(), + queryS.begin() + symbolsMatched, + queryS.end()); if ((count > 0) && (count == szEdge || szQuery == count + symbolsMatched)) { scoped_ptr(pIter->GoToEdge(i)).swap(pIter); + + bFullEdgeMatched = (count == szEdge); symbolsMatched += count; bMatched = true; break; @@ -62,6 +70,25 @@ TrieIterator * MoveTrieIteratorToString(TrieIterator const & trieRoot, return pIter->Clone(); } +namespace +{ + bool CheckMatchString(strings::UniChar const * rootPrefix, + size_t rootPrefixSize, + strings::UniString & s) + { + if (rootPrefixSize > 0) + { + if (s.size() < rootPrefixSize || + !StartsWith(s.begin(), s.end(), rootPrefix, rootPrefix + rootPrefixSize)) + return false; + + s = strings::UniString(s.begin() + rootPrefixSize, s.end()); + } + + return true; + } +} + template void FullMatchInTrie(TrieIterator const & trieRoot, strings::UniChar const * rootPrefix, @@ -69,18 +96,18 @@ void FullMatchInTrie(TrieIterator const & trieRoot, strings::UniString s, F & f) { - if (rootPrefixSize > 0) - { - if (s.size() < rootPrefixSize || - !StartsWith(s.begin(), s.end(), rootPrefix, rootPrefix + rootPrefixSize)) + if (!CheckMatchString(rootPrefix, rootPrefixSize, s)) return; - s = strings::UniString(s.begin() + rootPrefixSize, s.end()); - } size_t symbolsMatched = 0; - scoped_ptr pIter(MoveTrieIteratorToString(trieRoot, s, symbolsMatched)); - if (!pIter || symbolsMatched != s.size()) + bool bFullEdgeMatched; + scoped_ptr pIter( + MoveTrieIteratorToString(trieRoot, s, symbolsMatched, bFullEdgeMatched)); + + if (!pIter || !bFullEdgeMatched || symbolsMatched != s.size()) return; + + ASSERT_EQUAL ( symbolsMatched, s.size(), () ); for (size_t i = 0; i < pIter->m_value.size(); ++i) f(pIter->m_value[i]); } @@ -92,20 +119,22 @@ void PrefixMatchInTrie(TrieIterator const & trieRoot, strings::UniString s, F & f) { - if (rootPrefixSize > 0) - { - if (s.size() < rootPrefixSize || - !StartsWith(s.begin(), s.end(), rootPrefix, rootPrefix + rootPrefixSize)) + if (!CheckMatchString(rootPrefix, rootPrefixSize, s)) return; - s = strings::UniString(s.begin() + rootPrefixSize, s.end()); - } stack trieQueue; { size_t symbolsMatched = 0; - search::TrieIterator * const pRootIter = MoveTrieIteratorToString(trieRoot, s, symbolsMatched); + bool bFullEdgeMatched; + search::TrieIterator * const pRootIter = + MoveTrieIteratorToString(trieRoot, s, symbolsMatched, bFullEdgeMatched); + + UNUSED_VALUE(symbolsMatched); + UNUSED_VALUE(bFullEdgeMatched); + if (!pRootIter) return; + trieQueue.push(pRootIter); } @@ -113,8 +142,10 @@ void PrefixMatchInTrie(TrieIterator const & trieRoot, { scoped_ptr pIter(trieQueue.top()); trieQueue.pop(); + for (size_t i = 0; i < pIter->m_value.size(); ++i) f(pIter->m_value[i]); + for (size_t i = 0; i < pIter->m_edge.size(); ++i) trieQueue.push(pIter->GoToEdge(i)); } @@ -213,6 +244,9 @@ void MatchFeaturesInTrie(vector > const & tokens, FilterT const & filter, ToDo & toDo) { + //LOG(LDEBUG, ("Tokens: ", tokens)); + //LOG(LDEBUG, ("Prefix: ", prefixTokens)); + impl::OffsetIntersecter intersecter(filter); // Match tokens. @@ -251,5 +285,4 @@ void MatchFeaturesInTrie(vector > const & tokens, intersecter.ForEachResult(toDo); } - } // namespace search