From 64e16dfb5ef2624479444a5d309a68deb6e61161 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Fri, 27 Jan 2006 00:24:30 +0000 Subject: [PATCH] ICU-4152 UnicodeSetIterator::getString() works with code points X-SVN-Rev: 19028 --- icu4c/source/common/unicode/usetiter.h | 63 ++++++++++--------- icu4c/source/test/intltest/usettest.cpp | 84 ++++++++++++++++++++++++- icu4c/source/test/intltest/usettest.h | 4 +- 3 files changed, 121 insertions(+), 30 deletions(-) diff --git a/icu4c/source/common/unicode/usetiter.h b/icu4c/source/common/unicode/usetiter.h index 01a49c1140b..defa75cd7ed 100644 --- a/icu4c/source/common/unicode/usetiter.h +++ b/icu4c/source/common/unicode/usetiter.h @@ -32,19 +32,19 @@ class UnicodeString; * or methods declared as "protected" to be private. The use of * protected in this class is an artifact of history. * - *

To iterate over code points, use a loop like this: + *

To iterate over code points and strings, use a loop like this: *

  * UnicodeSetIterator it(set);
  * while (set.next()) {
- *   if (set.isString()) {
- *     processString(set.getString());
- *   } else {
- *     processCodepoint(set.getCodepoint());
- *   }
+ *     processItem(set.getString());
  * }
  * 
+ *

Each item in the set is accessed as a string. Set elements + * consisting of single code points are returned as strings containing + * just the one code point. * - *

To iterate over code point ranges, use a loop like this: + *

To iterate over code point ranges, instead of individual code points, + * use a loop like this: *

  * UnicodeSetIterator it(set);
  * while (it.nextRange()) {
@@ -125,9 +125,14 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
      * caller can retrieve it with getString().  If this
      * method returns false, the current element is a code point or
      * code point range, depending on whether next() or
-     * nextRange() was called, and the caller can retrieve it
-     * with getCodepoint() and, for a range,
-     * getCodepointEnd().
+     * nextRange() was called.
+     * Elements of types string and codepoint can both be retrieved
+     * with the function getString().
+     * Elements of type codepoint can also be retrieved with
+     * getCodepoint().
+     * For ranges, getCodepoint() returns the starting codepoint
+     * of the range, and getCodepointEnd() returns the end
+     * of the range.
      * @stable ICU 2.4
      */
     inline UBool isString() const;
@@ -161,22 +166,25 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
     const UnicodeString& getString();
 
     /**
-     * Returns the next element in the set, either a single code point
-     * or a string.  If there are no more elements in the set, return
-     * false.  If codepoint == IS_STRING, the value is a
-     * string in the string field.  Otherwise the value is a
-     * single code point in the codepoint field.
+     * Advances the iteration position to the next element in the set, 
+     * which can be either a single code point or a string.  
+     * If there are no more elements in the set, return false.
+     *
+     * 

+ * If isString() == TRUE, the value is a + * string, otherwise the value is a + * single code point. Elements of either type can be retrieved + * with the function getString(), while elements of + * consisting of a single code point can be retrieved with + * getCodepoint() * *

The order of iteration is all code points in sorted order, - * followed by all strings sorted order. codepointEnd is - * undefined after calling this method. string is - * undefined unless codepoint == IS_STRING. Do not mix + * followed by all strings sorted order. Do not mix * calls to next() and nextRange() without * calling reset() between them. The results of doing so * are undefined. * - * @return true if there was another element in the set and this - * object contains the element. + * @return true if there was another element in the set. * @stable ICU 2.4 */ UBool next(); @@ -184,21 +192,20 @@ class U_COMMON_API UnicodeSetIterator : public UObject { /** * Returns the next element in the set, either a code point range * or a string. If there are no more elements in the set, return - * false. If codepoint == IS_STRING, the value is a - * string in the string field. Otherwise the value is a - * range of one or more code points from codepoint to - * codepointeEnd inclusive. + * false. If isString() == TRUE, the value is a + * string and can be accessed with getString(). Otherwise the value is a + * range of one or more code points from getCodepoint() to + * getCodepointeEnd() inclusive. * *

The order of iteration is all code points ranges in sorted * order, followed by all strings sorted order. Ranges are - * disjoint and non-contiguous. string is undefined - * unless codepoint == IS_STRING. Do not mix calls to + * disjoint and non-contiguous. The value returned from getString() + * is undefined unless isString() == TRUE. Do not mix calls to * next() and nextRange() without calling * reset() between them. The results of doing so are * undefined. * - * @return true if there was another element in the set and this - * object contains the element. + * @return true if there was another element in the set. * @stable ICU 2.4 */ UBool nextRange(); diff --git a/icu4c/source/test/intltest/usettest.cpp b/icu4c/source/test/intltest/usettest.cpp index d2ea4b488af..b05c4bd16a0 100644 --- a/icu4c/source/test/intltest/usettest.cpp +++ b/icu4c/source/test/intltest/usettest.cpp @@ -1,6 +1,6 @@ /* ************************************************************************************** -* Copyright (C) 1999-2005 Alan Liu ,International Business Machines Corporation and +* Copyright (C) 1999-2006 Alan Liu ,International Business Machines Corporation and * others. All Rights Reserved. ************************************************************************************** * Date Name Description @@ -68,6 +68,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec, CASE(17,TestSymbolTable); CASE(18,TestSurrogate); CASE(19,TestPosixClasses); + CASE(20,TestIteration); default: name = ""; break; } } @@ -674,6 +675,87 @@ void UnicodeSetTest::TestAPI() { } } +void UnicodeSetTest::TestIteration() { + UErrorCode ec = U_ZERO_ERROR; + int i = 0; + int outerLoop; + + // 6 code points, 3 ranges, 2 strings, 8 total elements + // Iteration will access them in sorted order - a, b, c, y, z, U0001abcd, "str1", "str2" + UnicodeSet set("[zabyc\\U0001abcd{str1}{str2}]", ec); + TEST_ASSERT_SUCCESS(ec); + UnicodeSetIterator it(set); + + for (outerLoop=0; outerLoop<3; outerLoop++) { + // Run the test multiple times, to check that iterator.reset() is working. + for (i=0; i<10; i++) { + UBool nextv = it.next(); + UBool isString = it.isString(); + int32_t codePoint = it.getCodepoint(); + int32_t codePointEnd = it.getCodepointEnd(); + UnicodeString s = it.getString(); + switch (i) { + case 0: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == FALSE); + TEST_ASSERT(codePoint==0x61); + TEST_ASSERT(s == "a"); + break; + case 1: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == FALSE); + TEST_ASSERT(codePoint==0x62); + TEST_ASSERT(s == "b"); + break; + case 2: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == FALSE); + TEST_ASSERT(codePoint==0x63); + TEST_ASSERT(s == "c"); + break; + case 3: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == FALSE); + TEST_ASSERT(codePoint==0x79); + TEST_ASSERT(s == "y"); + break; + case 4: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == FALSE); + TEST_ASSERT(codePoint==0x7a); + TEST_ASSERT(s == "z"); + break; + case 5: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == FALSE); + TEST_ASSERT(codePoint==0x1abcd); + TEST_ASSERT(s == UnicodeString((UChar32)0x1abcd)); + break; + case 6: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == TRUE); + TEST_ASSERT(s == "str1"); + break; + case 7: + TEST_ASSERT(nextv == TRUE); + TEST_ASSERT(isString == TRUE); + TEST_ASSERT(s == "str2"); + break; + case 8: + TEST_ASSERT(nextv == FALSE); + break; + case 9: + TEST_ASSERT(nextv == FALSE); + break; + } + } + it.reset(); // prepare to run the iteration again. + } +} + + + + void UnicodeSetTest::TestStrings() { UErrorCode ec = U_ZERO_ERROR; diff --git a/icu4c/source/test/intltest/usettest.h b/icu4c/source/test/intltest/usettest.h index 0d496228c3b..f5c253872b0 100644 --- a/icu4c/source/test/intltest/usettest.h +++ b/icu4c/source/test/intltest/usettest.h @@ -1,7 +1,7 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2005, International Business Machines Corporation and + * Copyright (c) 1997-2006, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** ********************************************************************** @@ -47,6 +47,8 @@ private: void TestAPI(void); + void TestIteration(void); + void TestStrings(void); void TestScriptSet(void);