ICU-1007 remove ComposedCharIter

X-SVN-Rev: 5600
2025-04-15 01:42:37 +00:00 · 2001-08-29 16:32:33 +00:00 · 2001-08-29 16:32:33 +00:00 · 44850e6120
commit 44850e6120
parent 6a7682e8a0
6 changed files with 2 additions and 299 deletions
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@ -70,7 +70,7 @@ LIBS = $(LIBICUDT) @LIBS@
 CPPFLAGS += @DATA_PACKAGING_CPPFLAGS@

 OBJECTS = compdata.o dcmpdata.o normlzr.o unorm.o bidi.o ubidi.o \
-ubidiwrt.o ubidiln.o chariter.o compitr.o cwchar.o schriter.o uchriter.o \
+ubidiwrt.o ubidiln.o chariter.o cwchar.o schriter.o uchriter.o \
 cpputils.o digitlst.o filestrm.o ushape.o umemstrm.o locid.o locmap.o uloc.o \
 mutex.o umutex.o putil.o udata.o uresbund.o uresdata.o resbund.o \
 uchar.o ucmp8.o ucmp16.o ucmp32.o ucmpe32.o uvector.o uhash.o uhash_us.o \
--- a/icu4c/source/common/common.dsp
+++ b/icu4c/source/common/common.dsp
@ -108,10 +108,6 @@ SOURCE=.\compdata.cpp
 # End Source File
 # Begin Source File

-SOURCE=.\compitr.cpp
-# End Source File
-# Begin Source File
-
 SOURCE=.\convert.cpp
 # End Source File
 # Begin Source File
@ -414,10 +410,6 @@ SOURCE=.\compdata.h
 # End Source File
 # Begin Source File

-SOURCE=.\compitr.h
-# End Source File
-# Begin Source File
-
 SOURCE=.\unicode\convert.h

 !IF  "$(CFG)" == "common - Win32 Release"
--- a/icu4c/source/common/compitr.cpp
+++ b/icu4c/source/common/compitr.cpp
@ -1,133 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1996-2001, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*/
-
-#include "dcmpdata.h"
-
-#include "compitr.h"
-
-#include "unicode/normlzr.h"
-
-/**
- * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
- * all Unicode characters with canonical decompositions, including Korean
- * Hangul characters.
- */
-ComposedCharIter::ComposedCharIter()
-  : minDecomp(DecompData::MAX_COMPAT), 
-    hangul(FALSE),
-    curChar(0),
-    nextChar(ComposedCharIter::DONE)
-{
-}
-
-
-  /**
-   * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
-   * <p>
-   * @param compat    <tt>false</tt> for canonical decompositions only;
-   *                  <tt>true</tt> for both canonical and compatibility
-   *                  decompositions.
-   *
-   * @param options   Optional decomposition features.  Currently, the only
-   *                  supported option is {@link Normalizer#IGNORE_HANGUL}, which
-   *                  causes this <tt>ComposedCharIter</tt> not to iterate
-   *                  over the Hangul characters and their corresponding
-   *                  Jamo decompositions.
-   */
-ComposedCharIter::ComposedCharIter(UBool compat, 
-                   int32_t options)
-  : minDecomp(compat ? 0 : DecompData::MAX_COMPAT),
-    hangul((options & Normalizer::IGNORE_HANGUL) == 0),
-    curChar(0),
-    nextChar(ComposedCharIter::DONE)
-{
-}
-
-/**
- * Determines whether there any precomposed Unicode characters not yet returned
- * by {@link #next}.
- */
-UBool ComposedCharIter::hasNext() const {
-    if (nextChar == DONE)  {
-        ((ComposedCharIter*)this)->findNextChar();
-    }
-    return nextChar != DONE;
-}
-
-/**
- * Returns the next precomposed Unicode character.
- * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
- * by Unicode, in ascending order.  After all precomposed characters have
- * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
- * to <tt>next</tt> will return {@link #DONE}.
- */
-UChar ComposedCharIter::next()
-{
-    if (nextChar == DONE)  {
-        findNextChar();
-    }
-    curChar = nextChar;
-    nextChar = DONE;
-    return curChar;
-}
-
-/**
- * Returns the Unicode decomposition of the current character.
- * This method returns the decomposition of the precomposed character most
- * recently returned by {@link #next}.  The resulting decomposition is
- * affected by the settings of the
- * {@link Normalizer#COMPATIBILITY COMPATIBILITY}
- * and {@link Normalizer#NO_HANGUL NO_HANGUL} options passed to the constructor.
- */
-void ComposedCharIter::getDecomposition(UnicodeString& result) const
-{
-    // We duplicate most of the implementation of Normalizer::decompose() here
-    // for efficiency.  One thing we don't duplicate is the recursive
-    // decomposition code.  If we detect a need to do recursive decomposition
-    // (which happens for only 16 characters in Unicode 3.0) then we delegate to
-    // Normalizer::decompose().  This gives us optimal performance without
-    // having a complete copy of Normalizer::decompose() here, with its extra
-    // baggage of recursion buffers, etc. - Liu
-
-    result.truncate(0);
-
-    uint16_t offset = ucmp16_getu(DecompData::offsets, curChar);
-    uint16_t index  = (uint16_t)(offset & DecompData::DECOMP_MASK);
-    if (index > minDecomp) {
-        if ((offset & DecompData::DECOMP_RECURSE) != 0) {
-            // Let Normalizer::decompose() handle recursive decomp
-            UnicodeString temp(curChar);
-            UErrorCode status = U_ZERO_ERROR;
-            Normalizer::decompose(temp, minDecomp > 0,
-                                  hangul ? Normalizer::IGNORE_HANGUL : 0,
-                                  result, status);
-        } else {
-            Normalizer::doAppend((const UChar*)DecompData::contents, index, result);
-        }
-    } 
-    else if (hangul && curChar >= Normalizer::HANGUL_BASE && curChar < Normalizer::HANGUL_LIMIT) {
-        Normalizer::hangulToJamo(curChar, result, (uint16_t)minDecomp);
-    } 
-    else {
-        result += curChar;
-    }
-}
-
-void ComposedCharIter::findNextChar()
-{
-    if (curChar != DONE) {
-        UChar ch = curChar;
-        while (++ch < 0xFFFF) {
-            UChar offset = ucmp16_getu(DecompData::offsets, ch);
-            if (offset > minDecomp
-                || (hangul && ch >= Normalizer::HANGUL_BASE && ch < Normalizer::HANGUL_LIMIT) ) {
-                nextChar = ch;
-                break;
-            }
-        }
-    }
-}
--- a/icu4c/source/common/compitr.h
+++ b/icu4c/source/common/compitr.h
@ -1,130 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1996-2001, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*/
-
-#ifndef COMPITR_H
-#define COMPITR_H
-
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-
-
-/**
- * <tt>ComposedCharIter</tt> is an iterator class that returns all
- * of the precomposed characters defined in the Unicode standard, along
- * with their decomposed forms.  This is often useful when building
- * data tables (<i>e.g.</i> collation tables) which need to treat composed
- * and decomposed characters equivalently.
- * <p>
- * For example, imagine that you have built a collation table with ordering
- * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all
- * characters used in a particular language.  When you process input text using
- * this table, the text must first be decomposed so that it matches the form
- * used in the table.  This can impose a performance penalty that may be
- * unacceptable in some situations.
- * <p>
- * You can avoid this problem by ensuring that the collation table contains
- * rules for both the decomposed <i>and</i> composed versions of each character.
- * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the
- * composed characters in Unicode.  If the decomposition for that character
- * consists solely of characters that are listed in your ruleset, you can
- * add a new rule for the composed character that makes it equivalent to
- * its decomposition sequence.
- * <p>
- * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table
- * of the composed characters in Unicode.  If you want to iterate over the
- * composed characters in a particular string, use {@link Normalizer} instead.
- * <p>
- * When constructing a <tt>ComposedCharIter</tt> there is one
- * optional feature that you can enable or disable:
- * <ul>
- *   <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul
- *          characters and their corresponding Jamo decompositions.
- *          This option is off by default (<i>i.e.</i> Hangul processing is enabled)
- *          since the Unicode standard specifies that Hangul to Jamo 
- *          is a canonical decomposition.
- * </ul>
- * <p>
- * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
- * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
- * It will be updated as later versions of Unicode are released.
- */
-class U_COMMON_API ComposedCharIter 
-{
- public:
-  /**
-   * Constant that indicates the iteration has completed.
-   * {@link #next} returns this value when there are no more composed
-   * characters over which to iterate.
-   * This value is equal to <code>Normalizer::DONE</tt>.
-   */
-  enum { DONE = 0xffff };
-    
-  /**
-   * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
-   * all Unicode characters with canonical decompositions, including Korean
-   * Hangul characters.
-   */
-  ComposedCharIter();
-    
-  
-  /**
-   * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
-   * <p>
-   * @param compat    <tt>false</tt> for canonical decompositions only;
-   *                  <tt>true</tt> for both canonical and compatibility
-   *                  decompositions.
-   *
-   * @param options   Optional decomposition features.  Currently, the only
-   *                  supported option is {@link Normalizer#IGNORE_HANGUL}, which
-   *                  causes this <tt>ComposedCharIter</tt> not to iterate
-   *                  over the Hangul characters and their corresponding
-   *                  Jamo decompositions.
-   */
-  ComposedCharIter(UBool compat, int32_t options);
-  
-  /**
-   * Determines whether there any precomposed Unicode characters not yet returned
-   * by {@link #next}.
-   */
-  UBool hasNext(void) const;
-  
-  /**
-   * Returns the next precomposed Unicode character.
-   * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
-   * by Unicode, in ascending order.  After all precomposed characters have
-   * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
-   * to <tt>next</tt> will return {@link #DONE}.
-   */
-  UChar next(void);
-  
-  /**
-   * Returns the Unicode decomposition of the current character.
-   * This method returns the decomposition of the precomposed character most
-   * recently returned by {@link #next}.  The resulting decomposition is
-   * affected by the settings of the options passed to the constructor.
-   * {@link Normalizer#COMPATIBILITY COMPATIBILITY}
-   * and {@link Normalizer#NO_HANGUL NO_HANGUL} options passed to the constructor.
-   */
-  void getDecomposition(UnicodeString& result) const;
-  
- private:
-  void    findNextChar(void);
-  
-  int32_t    minDecomp;
-  UBool    hangul;
-  
-  UChar    curChar;
-  UChar    nextChar;
-};
-
-#endif // _COMPITR
-
-
-
--- a/icu4c/source/test/intltest/tstnorm.cpp
+++ b/icu4c/source/test/intltest/tstnorm.cpp
@ -5,7 +5,6 @@
 ********************************************************************/

 #include "tstnorm.h"
-#include "compitr.h"

 #define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array)))

@ -33,8 +32,7 @@ void BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec,
        CASE(7,TestTibetan);
        CASE(8,TestCompositionExclusion);
        CASE(9,TestZeroIndex);
-        CASE(10,TestComposedCharIter);
-        CASE(11,TestVerisign);
+        CASE(10,TestVerisign);
        default: name = ""; break;
    }
 }
@ -359,29 +357,6 @@ void BasicNormalizerTest::TestZeroIndex(void) {
    }
 }

-/**
- * Test ComposedCharIter.
- */
-void BasicNormalizerTest::TestComposedCharIter(void) {
-    ComposedCharIter iter;
-    UnicodeString decompose;
-    UnicodeString temp;
-    UnicodeString buffer;
-    UErrorCode status = U_ZERO_ERROR;
-    while (iter.hasNext()) {
-        UChar c = iter.next();
-        temp.remove(0);
-        temp.append(c);
-        iter.getDecomposition(decompose);
-        Normalizer::decompose(temp, TRUE, 0, buffer, status);
-        if (buffer != decompose) {
-            errln((UnicodeString)"FAIL: " +
-                  hex(c) + " -> ComposedCharIter:" + hex(decompose) +
-                  " vs. Normalizer:" + hex(buffer));
-        }
-    }
-}
-
 /**
 * Run a few specific cases that are failing for Verisign.
 */
--- a/icu4c/source/test/intltest/tstnorm.h
+++ b/icu4c/source/test/intltest/tstnorm.h
@ -44,7 +44,6 @@ public:
    void TestTibetan(void);
    void TestCompositionExclusion(void);
    void TestZeroIndex(void);
-    void TestComposedCharIter(void);
    void TestVerisign(void);

 private: