diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp index 6e8c36a6b7..564e51fcbe 100644 --- a/base/base_tests/string_utils_test.cpp +++ b/base/base_tests/string_utils_test.cpp @@ -348,14 +348,14 @@ void TestIter(string const & s, char const * delims, vector const & toke strings::SimpleTokenizer it(s, delims); for (size_t i = 0; i < tokens.size(); ++i) { - TEST_EQUAL(true, it, (s, delims, i)); + TEST(it, (s, delims, i)); TEST_EQUAL(*it, tokens[i], (s, delims, i)); ++it; } - TEST_EQUAL(false, it, (s, delims)); + TEST(!it, (s, delims)); size_t counter = 0; - FunctorTester f = FunctorTester(counter, tokens); + FunctorTester f(counter, tokens); strings::Tokenize(s, delims, f); TEST_EQUAL(counter, tokens.size(), ()); } @@ -366,11 +366,11 @@ void TestIterWithEmptyTokens(string const & s, char const * delims, vector tokens = {""}; + vector const tokens = {""}; TestIterWithEmptyTokens(s, ",", tokens); } { - string const s = ","; - vector tokens = {"", ""}; - TestIterWithEmptyTokens(s, ",", tokens); + string const s = ";"; + vector const tokens = {"", ""}; + TestIterWithEmptyTokens(s, ";", tokens); } { - string const s = ",,"; - vector tokens = {"", "", ""}; - TestIterWithEmptyTokens(s, ",", tokens); + string const s = ";;"; + vector const tokens = {"", "", ""}; + TestIterWithEmptyTokens(s, ";", tokens); } { string const s = "Hello, World!"; - vector tokens = {s}; + vector const tokens = {s}; TestIterWithEmptyTokens(s, "", tokens); } { string const s = "Hello, World!"; - vector tokens = {"Hello", " World", ""}; + vector const tokens = {"Hello", " World", ""}; TestIterWithEmptyTokens(s, ",!", tokens); } { - string const s = ",a,b,,c,d,"; - vector tokens = {"", "a", "b", "", "c", "d", ""}; - TestIterWithEmptyTokens(s, ",", tokens); + string const s = ";a;b;;c;d;"; + vector const tokens = {"", "a", "b", "", "c", "d", ""}; + TestIterWithEmptyTokens(s, ";", tokens); } } diff --git a/base/string_utils.hpp b/base/string_utils.hpp index 33b7af7d2d..c114748369 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -113,8 +113,8 @@ public: Move(); } - // Use default-constructed iterator for operator == to determine an - // end of a token stream. + // Use default-constructed iterator for operator == to determine the + // end of the token stream. TokenizeIterator() = default; string operator*() const @@ -137,11 +137,16 @@ public: return *this; } - // Same as operator bool() in expression it == end(...). - bool operator==(TokenizeIterator const &) { return !(*this); } + bool operator==(TokenizeIterator const & rhs) const + { + if (!*this && !rhs) + return true; + if (*this && rhs) + return m_start == rhs.m_start && m_end == rhs.m_end && m_finish == rhs.m_finish; + return false; + } - // Same as operator bool() in expression it != end(...). - bool operator!=(TokenizeIterator const &) { return (*this); } + bool operator!=(TokenizeIterator const & rhs) const { return !(*this == rhs); } private: void Move() @@ -155,9 +160,21 @@ private: ++m_end; } + // Token is defined as a pair (|m_start|, |m_end|), where: + // + // * m_start < m_end + // * m_start == begin or m_delimFn(m_start - 1) + // * m_end == m_finish or m_delimFn(m_end) + // * for all i from [m_start, m_end): !m_delimFn(i) + // + // This version of TokenizeIterator iterates over all tokens and + // keeps the invariant above. TIt m_start; TIt m_end; + + // The end of the string the iterator iterates over. TIt m_finish; + TDelimFn m_delimFn; }; @@ -179,8 +196,16 @@ public: ++m_end; } - // Use default-constructed iterator for operator == to determine an - // end of a token stream. + // *NOTE* |s| must be not temporary! + TokenizeIterator(UniString const & s, TDelimFn const & delimFn) + : m_start(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFn(delimFn), m_finished(false) + { + while (m_end != m_finish && !m_delimFn(*m_end)) + ++m_end; + } + + // Use default-constructed iterator for operator == to determine the + // end of the token stream. TokenizeIterator() = default; string operator*() const @@ -203,11 +228,19 @@ public: return *this; } - // Same as operator bool() in expression it == end(...). - bool operator==(TokenizeIterator const &) { return !(*this); } + bool operator==(TokenizeIterator const & rhs) const + { + if (!*this && !rhs) + return true; + if (*this && rhs) + { + return m_start == rhs.m_start && m_end == rhs.m_end && m_finish == rhs.m_finish && + m_finished == rhs.m_finished; + } + return false; + } - // Same as operator bool() in expression it != end(...). - bool operator!=(TokenizeIterator const &) { return (*this); } + bool operator!=(TokenizeIterator const & rhs) const { return !(*this == rhs); } private: void Move() @@ -228,10 +261,25 @@ private: ++m_end; } + // Token is defined as a pair (|m_start|, |m_end|), where: + // + // * m_start <= m_end + // * m_start == begin or m_delimFn(m_start - 1) + // * m_end == m_finish or m_delimFn(m_end) + // * for all i from [m_start, m_end): !m_delimFn(i) + // + // This version of TokenizeIterator iterates over all tokens and + // keeps the invariant above. TIt m_start; TIt m_end; + + // The end of the string the iterator iterates over. TIt m_finish; + TDelimFn m_delimFn; + + // When true, iterator is at the end position and is not valid + // anymore. bool m_finished; };