ICU-1036 code coverage tests and comments updated.

X-SVN-Rev: 8348
2025-04-08 06:53:45 +00:00 · 2002-04-03 00:00:00 +00:00 · 2002-04-03 00:00:00 +00:00 · 32f9d2e206
commit 32f9d2e206
parent 59572346bd
6 changed files with 207 additions and 61 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterIteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterIteratorTest.java
@ -0,0 +1,136 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/Attic/UCharacterIteratorTest.java,v $ 
+* $Date: 2002/04/03 00:00:00 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.dev.test.lang;
+
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.UCharacterIterator;
+import com.ibm.icu.text.UTF16;
+
+/**
+* Testing class for UCharacterIterator
+* @author Syn Wee Quek
+* @since april 02 2002
+*/
+public final class UCharacterIteratorTest extends TestFmwk
+{ 
+	// constructor -----------------------------------------------------
+  
+	/**
+	 * Constructor
+	 */
+  	public UCharacterIteratorTest()
+  	{
+  	}
+  
+  	// public methods --------------------------------------------------
+  
+  	/**
+  	* Testing cloning
+  	*/
+  	public void TestClone()
+  	{
+     	 UCharacterIterator iterator = new UCharacterIterator("testing");
+     	 UCharacterIterator cloned = (UCharacterIterator)iterator.clone();
+     	 char completed = 0;
+     	 while (completed != UCharacterIterator.DONE) {
+     	 	completed = iterator.next();
+     	 	if (completed != cloned.next()) {
+     	 		errln("Cloned operation failed");
+     	 	}
+     	 }
+  	}
+  	
+  	/**
+  	 * Testing iteration
+  	 */
+  	public void TestIteration()
+  	{
+  		UCharacterIterator iterator  = new UCharacterIterator(
+  		                                               ITERATION_STRING_);
+  		UCharacterIterator iterator2 = new UCharacterIterator(
+  		                                               ITERATION_STRING_);
+  		if (iterator.first() != ITERATION_STRING_.charAt(0)) {
+  			errln("Iterator failed retrieving first character");
+  		}
+  		if (iterator.last() != ITERATION_STRING_.charAt(
+                                       ITERATION_STRING_.length() - 1)) {
+  			errln("Iterator failed retrieving last character");
+  		}                                               
+  		if (iterator.getBeginIndex() != 0 || 
+  		    iterator.getEndIndex() != ITERATION_STRING_.length()) {
+  		    errln("Iterator failed determining begin and end index");
+  		}  
+  		iterator2.setIndex(0);
+  		iterator.setIndex(0);
+  		int ch = 0;
+  		while (ch != UCharacterIterator.DONE_CODEPOINT) {
+  			int index = iterator2.getIndex();
+  			ch = iterator2.nextCodePoint();
+  			if (index != ITERATION_SUPPLEMENTARY_INDEX) {
+  				if (ch != (int)iterator.next() && 
+  				    ch != UCharacterIterator.DONE_CODEPOINT) {
+  					errln("Error mismatch in next() and nextCodePoint()"); 
+  				}
+  			}
+  			else {
+  				if (UTF16.getLeadSurrogate(ch) != iterator.next() ||
+  				    UTF16.getTrailSurrogate(ch) != iterator.next()) {
+  				    errln("Error mismatch in next and nextCodePoint for " +
+  				          "supplementary characters");
+  				}
+  			}
+  		}
+  		iterator.setIndex(ITERATION_STRING_.length());
+  		iterator2.setIndex(ITERATION_STRING_.length());
+  		while (ch != UCharacterIterator.DONE_CODEPOINT) {
+  			int index = iterator2.getIndex();
+  			ch = iterator2.previousCodePoint();
+  			if (index != ITERATION_SUPPLEMENTARY_INDEX) {
+  				if (ch != (int)iterator.previous() && 
+  				    ch != UCharacterIterator.DONE_CODEPOINT) {
+  					errln("Error mismatch in previous() and " +
+  					      "previousCodePoint()"); 
+  				}
+  			}
+  			else {
+  				if (UTF16.getLeadSurrogate(ch) != iterator.previous() || 
+  				    UTF16.getTrailSurrogate(ch) != iterator.previous()) {
+  				    errln("Error mismatch in previous and " +
+  				          "previousCodePoint for supplementary characters");
+  				}
+  			}
+  		}
+  	}
+    
+    public static void main(String[] arg)
+    {
+        try
+        {
+            UCharacterIteratorTest test = new UCharacterIteratorTest();
+            test.run(arg);
+        }
+        catch (Exception e)
+        {
+        	e.printStackTrace();
+        }
+    }
+    
+    // private data members ---------------------------------------------
+    
+    private static final String ITERATION_STRING_ =
+					                    "Testing 1 2 3 \ud800\udc00 456";
+	private static final int ITERATION_SUPPLEMENTARY_INDEX = 14;    
+}
+
--- a/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/UCharacterIterator.java,v $ 
- * $Date: 2002/03/15 22:48:07 $ 
- * $Revision: 1.3 $
+ * $Date: 2002/04/03 00:00:00 $ 
+ * $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -97,7 +97,7 @@ public final class UCharacterIterator implements CharacterIterator
     * Returns the current codepoint
     * @return current codepoint
     */
-    public int currentCodepoint()
+    public int currentCodePoint()
    {
        if (m_index_ >= 0 && m_index_ < m_length_) {
            return m_replaceable_.char32At(m_index_);
@ -167,7 +167,7 @@ public final class UCharacterIterator implements CharacterIterator
     */
    public char next()
    {
-        if (m_index_ < m_length_ - 1) {
+        if (m_index_ < m_length_) {
        	char result = m_replaceable_.charAt(m_index_);
            m_index_ ++;
            return result;
@ -188,20 +188,23 @@ public final class UCharacterIterator implements CharacterIterator
 	 * @return next codepoint in text or DONE_CODEPOINT if the new index is off the 
 	 *         end of the text range.
 	 */	
-	public int nextCodepoint()
+	public int nextCodePoint()
 	{
-		if (m_index_ < m_length_ - 1) {
-			int ch = m_replaceable_.charAt(m_index_);
+		if (m_index_ < m_length_) {
+			char ch = m_replaceable_.charAt(m_index_);
 			m_index_ ++;
 			if (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
-			    ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
-			    ch = m_replaceable_.charAt(m_index_);
-			    if (ch >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
-			    	ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+			    ch <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
+			    m_index_ < m_length_) {
+			    char trail = m_replaceable_.charAt(m_index_);
+			    if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
+			    	trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
 			    	m_index_ ++;
+			    	return UCharacterProperty.getRawSupplementary(ch, 
+			    	                                              trail);
 				}
 			}
-			return currentCodepoint();
+			return ch;
        }
        return DONE_CODEPOINT;
 	}
@ -235,20 +238,23 @@ public final class UCharacterIterator implements CharacterIterator
 	 * @return previous codepoint in text or DONE_CODEPOINT if the new index is 
 	 *         off the start of the text range.
     */
-    public int previousCodepoint()
+    public int previousCodePoint()
    {
        if (m_index_ > 0) {
            m_index_ --;
-            int ch = m_replaceable_.charAt(m_index_);
+            char ch = m_replaceable_.charAt(m_index_);
 			if (ch >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
-			    ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
-			    ch = m_replaceable_.charAt(m_index_);
-			    if (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
-			    	ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+			    ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE &&
+			    m_index_ > 0) {
+			    char lead = m_replaceable_.charAt(m_index_);
+			    if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
+			    	lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
 			    	m_index_ --;
+			    	return UCharacterProperty.getRawSupplementary(ch, 
+			    	                                              lead);
 				}
 			}
-   			return currentCodepoint();
+   			return ch;
        }
        return DONE_CODEPOINT;
    }
@ -266,7 +272,7 @@ public final class UCharacterIterator implements CharacterIterator
 	public char setIndex(int index)
 	{
 		int length = m_replaceable_.length();
-		if (index < 0 || index >= length) {
+		if (index < 0 || index > length) {
 			throw new IllegalArgumentException("Index index out of bounds");
 		}
 		m_index_ = index;
--- a/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *         /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java $ 
-* $Date: 2002/03/15 22:48:07 $ 
-* $Revision: 1.6 $
+* $Date: 2002/04/03 00:00:00 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -946,7 +946,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
        
        while (strIndex < limit) { 
        	ucharIter.setIndex(strIndex);
-	        int ch = ucharIter.currentCodepoint();
+	        int ch = ucharIter.currentCodePoint();
 	        
 	        toLowerCase(locale, ch, ucharIter, result);
 	        strIndex ++;
@ -1127,7 +1127,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
        
        while (strIndex < limit) { 
        	ucharIter.setIndex(strIndex);
-	        int ch = ucharIter.currentCodepoint();
+	        int ch = ucharIter.currentCodePoint();
 	        
 	        toUpperOrTitleCase(locale, ch, ucharIter, true, result);
 	        strIndex ++;
@ -1538,7 +1538,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
    * @param char32 code point
    * @return the size of the codepoint
    */
-    private static int setCodepoint(char[] target, int char32)
+    private static int setCodePoint(char[] target, int char32)
    {
        // Write the UTF-16 values
        if (char32 >= UTF16.SUPPLEMENTARY_MIN_VALUE) {
@ -1575,7 +1575,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
    {
    	uchariter.setIndex(offset);
    	
-    	int ch = uchariter.previousCodepoint();
+    	int ch = uchariter.previousCodePoint();
    	
        while (ch != UCharacterIterator.DONE_CODEPOINT) {
            if (ch == LATIN_SMALL_LETTER_I_ || ch == LATIN_SMALL_LETTER_J_ || 
@ -1591,7 +1591,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
                // intervening cc == 230
                return false; 
            }
-            ch = uchariter.previousCodepoint();
+            ch = uchariter.previousCodePoint();
        }

        return false; // not preceded by TYPE_i
@ -1610,7 +1610,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
    {
    	uchariter.setIndex(offset);
    	
-    	int ch = uchariter.previousCodepoint();
+    	int ch = uchariter.previousCodePoint();
    	
        while (ch != UCharacterIterator.DONE_CODEPOINT) {
            if (ch == LATIN_CAPITAL_LETTER_I_) {
@ -1623,25 +1623,27 @@ public final class UCharacterProperty implements Trie.DataManipulate
                // intervening cc == 230
                return false; 
            }
- 			ch = uchariter.previousCodepoint();           
+ 			ch = uchariter.previousCodePoint();           
        }

        return false; // not preceded by I
    }
    
    /** 
-    * Determines if offset is not followed by a sequence consisting of
-    * an ignorable sequence and then a cased letter {Ll, Lu, Lt}.
+    * Determines if codepoint at offset is not followed by a sequence 
+    * consisting of an ignorable sequence and then a cased letter 
+    * {Ll, Lu, Lt}.
    * @param uchariter String iterator to determine
-    * @param offset offset in string to check
-    * @return false if any character after index in src is a cased letter
+    * @param offset codepoint offset in string to check
+    * @return false if any character after offset in src is a cased letter
    * @see SpecialCasing.txt
    */
    private boolean isCFINAL(UCharacterIterator uchariter, int offset) 
    {
    	// iterator should have been determined to be not null by caller
        uchariter.setIndex(offset);
-    	int ch = uchariter.nextCodepoint();
+    	uchariter.nextCodePoint(); // rid of current codepoint
+        int ch = uchariter.nextCodePoint(); // start checking
    	
    	while (ch != UCharacterIterator.DONE_CODEPOINT) {
            int cat = getType(ch);
@ -1653,17 +1655,17 @@ public final class UCharacterProperty implements Trie.DataManipulate
            if (!isIgnorable(ch, cat)) {
                return true; // not ignorable
            }
-            ch = uchariter.nextCodepoint();
+            ch = uchariter.nextCodePoint();
        }

        return true;
    }

    /**
-    * Determines if offset is not preceded by a sequence consisting of a cased 
-    * letter {Ll, Lu, Lt} and an ignorable sequence. 
+    * Determines if codepoint at offset is not preceded by a sequence 
+    * consisting of a cased letter {Ll, Lu, Lt} and an ignorable sequence. 
    * @param uchariter string iterator to determine
-    * @param offset offset in string to check
+    * @param offset codepoint offset in string to check
    * @return true if any character before index in src is a cased letter
    * @see SpecialCasing.txt
    */
@ -1671,7 +1673,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
                                         int offset) 
    {
    	uchariter.setIndex(offset);
-    	int ch = uchariter.previousCodepoint();
+    	int ch = uchariter.previousCodePoint();
    	
        while (ch != UCharacterIterator.DONE_CODEPOINT) {
            int cat = getType(ch);
@ -1683,17 +1685,17 @@ public final class UCharacterProperty implements Trie.DataManipulate
            if (!isIgnorable(ch, cat)) {
                return false; // not ignorable
            }
-			ch = uchariter.previousCodepoint();
+			ch = uchariter.previousCodePoint();
        }

        return false; 
    }

    /** 
-    * Determines if a string at offset is followed by one or more characters 
-    * of combining class = 230.
-    * @param chariter text iterator to be determined
-    * @param offset offset in string to check
+    * Determines if a codepoint at offset in string is followed by one or 
+    * more characters of combining class = 230.
+    * @param uchariter text iterator to be determined
+    * @param offset codepoint offset in string to check
    * @return true if a string at offset is followed by one or more characters 
    *         of combining class = 230.
    * @see SpecialCasing.txt
@ -1702,7 +1704,8 @@ public final class UCharacterProperty implements Trie.DataManipulate
                                                 int offset) 
    {
        uchariter.setIndex(offset);
-        int ch = uchariter.nextCodepoint();
+        uchariter.nextCodePoint(); // rid of current codepoint
+        int ch = uchariter.nextCodePoint(); // start checking
        
        while (ch != UCharacterIterator.DONE_CODEPOINT) {
            int cc = NormalizerImpl.getCombiningClass(ch);
@ -1712,17 +1715,17 @@ public final class UCharacterProperty implements Trie.DataManipulate
            if (cc == 0) {
                return false; // next base character, no more cc==230 following
            }
-            ch = uchariter.nextCodepoint();
+            ch = uchariter.nextCodePoint();
        }

        return false; // no more cc == 230 following
    }

    /** 
-    * Determines if a string at offset is followed by a dot above 
-    * with no characters of combining class == 230 in between 
+    * Determines if a codepoint at offset in string is followed by a dot 
+    * above with no characters of combining class == 230 in between 
    * @param uchariter text iterator to be determined
-    * @param offset offset in string to check
+    * @param offset codepoint offset of the character in string to check
    * @return true if a string at offset is followed by oa dot above 
    *         with no characters of combining class == 230 in between
    * @see SpecialCasing.txt
@ -1731,7 +1734,8 @@ public final class UCharacterProperty implements Trie.DataManipulate
                                                int offset) 
    {
        uchariter.setIndex(offset);
-        int ch = uchariter.nextCodepoint();
+        uchariter.nextCodePoint(); // rid off current character
+        int ch = uchariter.nextCodePoint(); // start checking
        
        while (ch != UCharacterIterator.DONE_CODEPOINT) {
            if (ch == COMBINING_DOT_ABOVE_) {
@ -1741,7 +1745,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
            if (cc == 0 || cc == COMBINING_MARK_ABOVE_CLASS_) {
                return false; // next base character or cc==230 in between
            }
-            ch = uchariter.nextCodepoint();
+            ch = uchariter.nextCodePoint();
        }

        return false; // no dot above following
@ -1758,7 +1762,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
    private static boolean isIgnorable(int ch, int cat) 
    {
        return cat == UCharacterCategory.NON_SPACING_MARK || ch == HYPHEN_ || 
-                      ch == SOFT_HYPHEN_;
+               ch == SOFT_HYPHEN_;
    }
      
    /**
--- a/icu4j/src/com/ibm/icu/text/LowercaseTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/LowercaseTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/LowercaseTransliterator.java,v $ 
- * $Date: 2002/03/15 22:48:07 $ 
- * $Revision: 1.9 $
+ * $Date: 2002/04/03 00:00:00 $ 
+ * $Revision: 1.10 $
 *
 *****************************************************************************************
 */
@ -74,7 +74,7 @@ class LowercaseTransliterator extends Transliterator{
        
        while (textPos < limit) {
        	original.setIndex(textPos);
-            cp = original.currentCodepoint();
+            cp = original.currentCodePoint();
            oldLen = UTF16.getCharCount(cp);
            int newLen = m_charppty_.toLowerCase(loc, cp, original, buffer);
            if (newLen >= 0) {
--- a/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java
@ -3,8 +3,8 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java,v $ 
- * $Date: 2002/03/15 22:48:07 $ 
- * $Revision: 1.14 $
+ * $Date: 2002/04/02 23:59:59 $ 
+ * $Revision: 1.15 $
 */
 package com.ibm.icu.text;
 import java.util.*;
@ -104,7 +104,7 @@ class TitlecaseTransliterator extends Transliterator {
        
        while (textPos < limit) {
        	original.setIndex(textPos);
-            cp = original.currentCodepoint();
+            cp = original.currentCodePoint();
            oldLen = UTF16.getCharCount(cp);
            
            if (!SKIP.contains(cp)) {
--- a/icu4j/src/com/ibm/icu/text/UppercaseTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/UppercaseTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UppercaseTransliterator.java,v $ 
- * $Date: 2002/03/15 22:48:07 $ 
- * $Revision: 1.8 $
+ * $Date: 2002/04/02 23:59:59 $ 
+ * $Revision: 1.9 $
 *
 *****************************************************************************************
 */
@ -70,7 +70,7 @@ class UppercaseTransliterator extends Transliterator {
        
        while (textPos < limit) {
        	original.setIndex(textPos);
-            cp = original.currentCodepoint();
+            cp = original.currentCodePoint();
            oldLen = UTF16.getCharCount(cp);
            int newLen = m_charppty_.toUpperOrTitleCase(loc, cp, original, true, buffer);
            if (newLen >= 0) {