ICU-45 new builder for RBBI rules, remove obsolete RBBI files

X-SVN-Rev: 8941
2025-04-08 06:53:45 +00:00 · 2002-06-25 18:53:10 +00:00 · 2002-06-25 18:53:10 +00:00 · 70621f8923
commit 70621f8923
parent 631cd39ece
20 changed files with 0 additions and 3034 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -48,21 +48,7 @@ README text !eol
 *.spp -text
 *.tri2 -text

-icu4c/source/data/brkitr/charBE.brk -text
-icu4c/source/data/brkitr/charLE.brk -text
-icu4c/source/data/brkitr/lineBE.brk -text
-icu4c/source/data/brkitr/lineLE.brk -text
-icu4c/source/data/brkitr/line_thBE.brk -text
-icu4c/source/data/brkitr/line_thLE.brk -text
-icu4c/source/data/brkitr/sentBE.brk -text
-icu4c/source/data/brkitr/sentLE.brk -text
 icu4c/source/data/brkitr/thaidict.brk -text
-icu4c/source/data/brkitr/titleBE.brk -text
-icu4c/source/data/brkitr/titleLE.brk -text
-icu4c/source/data/brkitr/wordBE.brk -text
-icu4c/source/data/brkitr/wordLE.brk -text
-icu4c/source/data/brkitr/word_thBE.brk -text
-icu4c/source/data/brkitr/word_thLE.brk -text
 icu4c/source/data/unidata/UCARules.txt -text
 icu4c/source/samples/ucnv/data02.bin -text
 icu4c/source/test/testdata/importtest.bin -text
--- a/icu4c/source/common/rbbi_tbl.cpp
+++ b/icu4c/source/common/rbbi_tbl.cpp
@ -1,246 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999 IBM Corp. All rights reserved.
-**********************************************************************
-*   Date        Name        Description
-*   11/11/99    rgillam     Complete port from Java.
-**********************************************************************
-*/
-
-#include "ucmp8.h"
-#include "cmemory.h"
-#include "rbbi_tbl.h"
-#include "unicode/unistr.h"
-#ifdef RBBI_DEBUG
-#include <stdio.h>
-#endif
-
-U_NAMESPACE_BEGIN
-
-//=======================================================================
-// constructor
-//=======================================================================
-
-RuleBasedBreakIteratorTables::RuleBasedBreakIteratorTables(UDataMemory* memory)
-: refCount(0),
-  ownTables(FALSE)
-{
-  if(memory != 0) {
-    fMemory = memory;
-    const void* image = udata_getMemory(memory);
-
-    if(image != 0) {
-
-      const int32_t* im = (const int32_t*)(image);
-      const int8_t*  base = (const int8_t*)(image);
-
-      // the memory image begins with an index that gives the offsets into the
-      // image for each of the fields in the BreakIteratorTables object--
-      // use those to initialize the tables object (it will end up pointing
-      // into the memory image for everything)
-      numCategories = (int32_t)im[0];
-      description = UnicodeString(TRUE, (UChar*)((int32_t)im[1] + base), -1);
-      charCategoryTable = ucmp8_openAlias((uint16_t*)((int32_t)im[2] + base),
-                      (int8_t*)((int32_t)im[3] + base), 0);
-      stateTable = (int16_t*)((int32_t)im[4] + base);
-      backwardsStateTable = (int16_t*)((int32_t)im[5] + base);
-      endStates = (int8_t*)((int32_t)im[6] + base);
-      lookaheadStates = (int8_t*)((int32_t)im[7] + base);
-    } else {
-      udata_close(fMemory);
-    }
-  } else {
-    fMemory = 0;
-  }
-}
-
-RuleBasedBreakIteratorTables::RuleBasedBreakIteratorTables()
-: refCount(0),
-  ownTables(TRUE),
-  fMemory(0)
-{
-    // everything else is null-initialized.  This constructor depends on
-    // a RuleBasedBreakIteratorBuilder filling in all the members
-}
-
-//=======================================================================
-// boilerplate
-//=======================================================================
-
-/**
- * Destructor
- */
-RuleBasedBreakIteratorTables::~RuleBasedBreakIteratorTables() {
-    if (ownTables) {
-        delete [] stateTable;
-        delete [] backwardsStateTable;
-        delete [] endStates;
-        delete [] lookaheadStates;
-        ucmp8_close(charCategoryTable);
-    }
-    else {
-        uprv_free(charCategoryTable);
-        if(fMemory != 0) {
-          udata_close(fMemory);
-        }
-    }
-}
-
-/**
- * Equality operator.  Returns TRUE if both tables objects are of the
- * same class, have the same behavior, and iterate over the same text.
- */
-UBool
-RuleBasedBreakIteratorTables::operator==(const RuleBasedBreakIteratorTables& that) const {
-    return this->description == that.description;
-}
-
-/**
- * Compute a hash code for these tables
- * @return A hash code
- */
-int32_t
-RuleBasedBreakIteratorTables::hashCode() const {
-    return description.hashCode();
-}
-
-//=======================================================================
-// implementation
-//=======================================================================
-/**
- * Looks up a character's category (i.e., its category for breaking purposes,
- * not its Unicode category)
- * The ignored parameter is used by derived implementations.
- */
-int32_t
-RuleBasedBreakIteratorTables::lookupCategory(UChar c, BreakIterator* /*ignored*/) const {
-    return ucmp8_get(charCategoryTable, c);
-}
-
-/**
- * Given a current state and a character category, looks up the
- * next state to transition to in the state table.
- */
-int32_t
-RuleBasedBreakIteratorTables::lookupState(int32_t state, int32_t category) const {
-    return stateTable[state * numCategories + category];
-}
-
-/**
- * Given a current state and a character category, looks up the
- * next state to transition to in the backwards state table.
- */
-int32_t
-RuleBasedBreakIteratorTables::lookupBackwardState(int32_t state, int32_t category) const {
-    return backwardsStateTable[state * numCategories + category];
-}
-
-/**
- * Returns true if the specified state is an accepting state.
- */
-UBool
-RuleBasedBreakIteratorTables::isEndState(int32_t state) const {
-    return endStates[state];
-}
-
-/**
- * Returns true if the specified state is a lookahead state.
- */
-UBool
-RuleBasedBreakIteratorTables::isLookaheadState(int32_t state) const {
-    return lookaheadStates[state];
-}
-
-
-#ifdef RBBI_DEBUG
-//
-//   debugDumpTables
-//
-void RuleBasedBreakIteratorTables::debugDumpTables() const {
-    printf("Character Classes:\n");
-    int currentCharClass = 257;
-    int startCurrentRange = 0;
-    int initialStringLength = 0;
-    char  buf[80];
-
-    UnicodeString *charClassRanges = new UnicodeString[numCategories];
-
-    for (int i = 0; i < 0xffff; i++) {
-        if ( ucmp8_get(charCategoryTable, i) != currentCharClass) {
-            if (currentCharClass != 257) {
-                // Complete the output of the previous range.
-                if (i != startCurrentRange+1) {
-                    sprintf(buf, "-%x", i-1);
-                    charClassRanges[currentCharClass].append(buf);
-                }
-                if (charClassRanges[currentCharClass].length() % 72 < initialStringLength % 72) {
-                    charClassRanges[currentCharClass].append("\n     ");
-                }
-            }
-
-            // Output the start of the new range.
-            currentCharClass = ucmp8_get(charCategoryTable, i);
-            startCurrentRange = i;
-            initialStringLength = charClassRanges[currentCharClass].length();
-            if (charClassRanges[currentCharClass].length() > 0)
-                charClassRanges[currentCharClass].append(", ");
-            sprintf(buf, "%x", i);
-            charClassRanges[currentCharClass].append(buf);
-        }
-    }
-
-    for (int i=0; i<numCategories; i++) {
-        printf("%d:   ", i);
-        // Write out the chars in the UnicodeStrings.
-        //    We know we didn't put anything into them except for plain ascii chars.
-        for (int j=0; j<charClassRanges[i].length(); j++) {
-            putchar(charClassRanges[i].charAt(j));
-        }
-        putchar('\n');
-    }
-
-    delete [] charClassRanges;
-
-
-    // State table length might be too big by one, because the only indication
-    //   we have is the  pointer to the start of the next item in the memory
-    //   image, the backwardsStateTable, which is 4 byte aligned.
-    //
-    int   stateTableLength = backwardsStateTable - stateTable;
-    if ((stateTableLength % numCategories) == 1) {
-        stateTableLength -= 1;
-    }
-
-    printf("\n\nState Table.   *: end state     %%: look ahead state\n");
-    printf("C:\t");
-    for (int i = 0; i < numCategories; i++) {
-        printf("%d\t", i);
-    }
-    printf("\n=================================================");
- 
-    for (int i = 0; i < stateTableLength; i++) {
-        if (i % numCategories == 0) {
-            putchar('\n');
-            if (endStates[i / numCategories])
-                putchar('*');
-            else
-                putchar(' ');
-            if (lookaheadStates[i / numCategories]) {
-                putchar('%');
-            }
-            else
-                putchar(' ');
-            printf("%d:\t", i / numCategories);
-        }
-        if (stateTable[i] == 0) {
-            printf(".\t");
-        } else {
-            printf("%d\t", stateTable[i]);
-        }
-    }
-    printf("\n\n\n");
-}
-#endif // RBBI_DEBUG
-
-U_NAMESPACE_END
-
--- a/icu4c/source/common/rbbi_tbl.h
+++ b/icu4c/source/common/rbbi_tbl.h
@ -1,235 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999 IBM Corp. All rights reserved.
-**********************************************************************
-*   Date        Name        Description
-*   11/11/99    rgillam     Complete port from Java.
-**********************************************************************
-*/
-
-#ifndef RBBI_TBL_H
-#define RBBI_TBL_H
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/brkiter.h"
-#include "unicode/udata.h"
-#include "filestrm.h"
-
-U_CDECL_BEGIN
-#ifndef UCMP8_H
-typedef struct _CompactByteArray CompactByteArray;
-#endif
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-/* forward declarations */
-class RuleBasedBreakIterator;
-class DictionaryBasedBreakIterator;
-
-/**
- * This class contains the internal static tables that are used by the
- * RuleBasedBreakIterator.  Once created, these tables are immutable,
- * so they can be shared among all break iterators using a particular
- * set of rules.  This class uses a reference-counting scheme to
- * manage the sharing.
- *
- * @author Richard Gillam
- */
-class RuleBasedBreakIteratorTables {
-
-private:
-    /**
-     * The number of RuleBasedBreakIterators using this object.
-     */
-    int16_t refCount;
-
-protected:
-    /**
-     * Whether or not we own the storage for the tables (the tables may be
-     * stored in a memory-mapped file)
-     */
-    UBool ownTables;
-
-private:
-    /**
-     * The textual description that was used to create these tables
-     */
-    UnicodeString description;
-
-    /**
-     * A table that indexes from character values to character category numbers
-     */
-    CompactByteArray* charCategoryTable;
-
-    /**
-     * The table of state transitions used for forward iteration
-     */
-    int16_t* stateTable;
-
-    /**
-     * The table of state transitions used to sync up the iterator with the
-     * text in backwards and random-access iteration
-     */
-    int16_t* backwardsStateTable;
-
-    /**
-     * A list of flags indicating which states in the state table are accepting
-     * ("end") states
-     */
-    int8_t* endStates;
-
-    /**
-     * A list of flags indicating which states in the state table are
-     * lookahead states (states which turn lookahead on and off)
-     */
-    int8_t* lookaheadStates;
-
-    /**
-     * The number of character categories (and, thus, the number of columns in
-     * the state tables)
-     */
-    int32_t numCategories;
-
-    //=======================================================================
-    // constructor
-    //=======================================================================
-
-    /**
-     * Creates a tables object, adopting all of the tables that are passed in.
-     */
-protected:
-    RuleBasedBreakIteratorTables();
-    
-    RuleBasedBreakIteratorTables(UDataMemory* memory);
-    UDataMemory *fMemory;
-
-private:
-    /**
-     * The copy constructor is declared private and is a no-op.
-     * THIS CLASS MAY NOT BE COPIED.
-     */
-    RuleBasedBreakIteratorTables(const RuleBasedBreakIteratorTables& that);
-
-    //=======================================================================
-    // boilerplate
-    //=======================================================================
-
-protected:
-    /**
-     * Destructor
-     */
-    virtual ~RuleBasedBreakIteratorTables();
-
-private:
-    /**
-     * The assignment operator is declared private and is a no-op.
-     * THIS CLASS MAY NOT BE COPIED.
-     */
-    RuleBasedBreakIteratorTables& operator=(const RuleBasedBreakIteratorTables& that);
-
-    /**
-     * Equality operator.  Returns TRUE if both tables objects are of the
-     * same class, have the same behavior, and iterate over the same text.
-     */
-    virtual UBool operator==(const RuleBasedBreakIteratorTables& that) const;
-
-    /**
-     * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
-     * and vice versa.
-     */
-    UBool operator!=(const RuleBasedBreakIteratorTables& that) const;
-
-    /**
-     * Compute a hash code for these tables
-     * @return A hash code
-     */
-    virtual int32_t hashCode(void) const;
-
-    /**
-     * Returns the description used to create these tables
-     */
-    const UnicodeString& getRules(void) const;
-
-    //=======================================================================
-    // reference counting
-    //=======================================================================
-    
-    /**
-     * increments the reference count.
-     */
-    void addReference(void);
-
-    /**
-     * decrements the reference count and deletes the object if it reaches zero
-     */
-    void removeReference(void);
-
-protected:
-    //=======================================================================
-    // implementation
-    //=======================================================================
-    /**
-     * Looks up a character's category (i.e., its category for breaking purposes,
-     * not its Unicode category)
-     */
-    virtual int32_t lookupCategory(UChar c, BreakIterator* bi) const;
-
-    /**
-     * Given a current state and a character category, looks up the
-     * next state to transition to in the state table.
-     */
-    virtual int32_t lookupState(int32_t state, int32_t category) const;
-
-    /**
-     * Given a current state and a character category, looks up the
-     * next state to transition to in the backwards state table.
-     */
-    virtual int32_t lookupBackwardState(int32_t state, int32_t category) const;
-
-    /**
-     * Returns true if the specified state is an accepting state.
-     */
-    virtual UBool isEndState(int32_t state) const;
-
-    /**
-     * Returns true if the specified state is a lookahead state.
-     */
-    virtual UBool isLookaheadState(int32_t state) const;
-
-#ifdef RBBI_DEBUG
-    //
-    // Print out state table and character classes.
-    //    For debugging only.
-    //
-    void debugDumpTables() const;
-#endif
-
-    friend class RuleBasedBreakIterator;
-    friend class DictionaryBasedBreakIterator;
-};
-
-inline UBool
-RuleBasedBreakIteratorTables::operator!=(const RuleBasedBreakIteratorTables& that) const {
-    return !operator==(that);
-}
-
-inline const UnicodeString&
-RuleBasedBreakIteratorTables::getRules(void) const {
-    return description;
-}
-
-inline void
-RuleBasedBreakIteratorTables::addReference(void) {
-    ++refCount;
-}
-
-inline void
-RuleBasedBreakIteratorTables::removeReference(void) {
-    if (--refCount <= 0)
-        delete this;
-}
-U_NAMESPACE_END
-
-#endif
--- a/icu4c/source/data/brkitr/charBE.brk
+++ b/icu4c/source/data/brkitr/charBE.brk
--- a/icu4c/source/data/brkitr/charLE.brk
+++ b/icu4c/source/data/brkitr/charLE.brk
--- a/icu4c/source/data/brkitr/lineBE.brk
+++ b/icu4c/source/data/brkitr/lineBE.brk
--- a/icu4c/source/data/brkitr/lineLE.brk
+++ b/icu4c/source/data/brkitr/lineLE.brk
--- a/icu4c/source/data/brkitr/line_thBE.brk
+++ b/icu4c/source/data/brkitr/line_thBE.brk
--- a/icu4c/source/data/brkitr/line_thLE.brk
+++ b/icu4c/source/data/brkitr/line_thLE.brk
--- a/icu4c/source/data/brkitr/sentBE.brk
+++ b/icu4c/source/data/brkitr/sentBE.brk
--- a/icu4c/source/data/brkitr/sentLE.brk
+++ b/icu4c/source/data/brkitr/sentLE.brk
--- a/icu4c/source/data/brkitr/titleBE.brk
+++ b/icu4c/source/data/brkitr/titleBE.brk
--- a/icu4c/source/data/brkitr/titleLE.brk
+++ b/icu4c/source/data/brkitr/titleLE.brk
--- a/icu4c/source/data/brkitr/wordBE.brk
+++ b/icu4c/source/data/brkitr/wordBE.brk
--- a/icu4c/source/data/brkitr/wordLE.brk
+++ b/icu4c/source/data/brkitr/wordLE.brk
--- a/icu4c/source/data/brkitr/word_thBE.brk
+++ b/icu4c/source/data/brkitr/word_thBE.brk
--- a/icu4c/source/data/brkitr/word_thLE.brk
+++ b/icu4c/source/data/brkitr/word_thLE.brk
--- a/icu4c/source/i18n/rbbi_bld.cpp
+++ b/icu4c/source/i18n/rbbi_bld.cpp
--- a/icu4c/source/i18n/rbbi_bld.h
+++ b/icu4c/source/i18n/rbbi_bld.h
@ -1,358 +0,0 @@
-/*
-* Copyright (C) {1999}, International Business Machines Corporation and others. All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   12/15/99    rgillam     Port from Java.
-**********************************************************************
-*/
-
-#ifndef RBBI_BLD_H
-#define RBBI_BLD_H
-
-#include "rbbi.h"
-#include "rbbi_tbl.h"
-#include "unicode/uniset.h"
-#include "uvector.h"
-
-class ExpressionList;
-
-//=======================================================================
-// RuleBasedBreakIterator.Builder
-//=======================================================================
-/**
- * The Builder class has the job of constructing a RuleBasedBreakIterator from a
- * textual description.  A Builder is constructed by RuleBasedBreakIterator's
- * constructor, which uses it to construct the iterator itself and then throws it
- * away.
- * <p>The construction logic is separated out into its own class for two primary
- * reasons:
- * <ul><li>The construction logic is quite complicated and large.  Separating it
- * out into its own class means the code must only be loaded into memory while a
- * RuleBasedBreakIterator is being constructed, and can be purged after that.
- * <li>There is a fair amount of state that must be maintained throughout the
- * construction process that is not needed by the iterator after construction.
- * Separating this state out into another class prevents all of the functions that
- * construct the iterator from having to have really long parameter lists,
- * (hopefully) contributing to readability and maintainability.</ul>
- * <p>It'd be really nice if this could be an independent class rather than an
- * inner class, because that would shorten the source file considerably, but
- * making Builder an inner class of RuleBasedBreakIterator allows it direct access
- * to RuleBasedBreakIterator's private members, which saves us from having to
- * provide some kind of "back door" to the Builder class that could then also be
- * used by other classes.
- */
-class RuleBasedBreakIteratorBuilder {
-
-protected:
-    /**
-     * The iterator we're constructing.
-     */
-    RuleBasedBreakIterator& iterator;
-
-    /**
-     * The tables object for the iterator we're constructing.
-     */
-    RuleBasedBreakIteratorTables* tables;
-
-    /**
-     * A temporary place to hold the rules as they're being processed.
-     */
-    UVector tempRuleList;
-
-    /**
-     * A temporary holding place used for calculating the character categories.
-     * This object contains UnicodeSet objects.
-     */
-    UVector categories;
-
-    /**
-     * The number of categories (and thus the number of columns in the finished state tables)
-     */
-    int32_t numCategories;
-
-    /**
-     * A table used to map parts of regexp text to lists of character categories,
-     * rather than having to figure them out from scratch each time
-     */
-    ExpressionList* expressions;
-
-    /**
-     * A temporary holding place for the list of ignore characters
-     */
-    UnicodeSet ignoreChars;
-
-    /**
-     * A temporary holding place where the forward state table is built
-     */
-    UVector tempStateTable;
-
-    /**
-     * A list of all the states that have to be filled in with transitions to the
-     * next state that is created.  Used when building the state table from the
-     * regular expressions.
-     */
-    UVector decisionPointList;
-
-    /**
-     * A UStack for holding decision point lists.  This is used to handle nested
-     * parentheses and braces in regexps.
-     */
-    UStack decisionPointStack;
-
-    /**
-     * A list of states that loop back on themselves.  Used to handle .*?
-     */
-    UVector loopingStates;
-
-    /**
-     * Looping states actually have to be backfilled later in the process
-     * than everything else.  This is where a the list of states to backfill
-     * is accumulated.  This is also used to handle .*?
-     */
-    UVector statesToBackfill;
-
-    /**
-     * A list mapping pairs of state numbers for states that are to be combined
-     * to the state number of the state representing their combination.  Used
-     * in the process of making the state table deterministic to prevent
-     * infinite recursion.
-     */
-    UVector mergeList;
-
-    /**
-     * A flag that is used to indicate when the list of looping states can
-     * be reset.
-     */
-    UBool clearLoopingStates;
-
-    /**
-     * A place where an error message can be stored if we get a parse error.
-     * The error message is never displayed anywhere, so this is useful pretty
-     * much only in conjunction with a debugger.
-     */
-    UnicodeString errorMessage;
-
-    /**
-     * A bit mask used to indicate a bit in the table's flags column that marks a
-     * state as an accepting state.
-     */
-    static const int32_t END_STATE_FLAG /*= 0x8000*/;
-
-    /**
-     * A bit mask used to indicate a bit in the table's flags column that marks a
-     * state as one the builder shouldn't loop to any looping states
-     */
-    static const int32_t DONT_LOOP_FLAG /*= 0x4000*/;
-
-    /**
-     * A bit mask used to indicate a bit in the table's flags column that marks a
-     * state as a lookahead state.
-     */
-    static const int32_t LOOKAHEAD_STATE_FLAG /*= 0x2000*/;
-
-    /**
-     * A bit mask representing the union of the mask values listed above.
-     * Used for clearing or masking off the flag bits.
-     */
-    static const int32_t ALL_FLAGS /*= END_STATE_FLAG | LOOKAHEAD_STATE_FLAG
-            | DONT_LOOP_FLAG*/;
-
-public:
-
-    /**
-     * The Builder class contains a reference to the iterator it's supposed to build.
-     */
-    RuleBasedBreakIteratorBuilder(RuleBasedBreakIterator& iteratorToBuild);
-
-    /**
-     * Destructor.
-     */
-    ~RuleBasedBreakIteratorBuilder();
-
-    /**
-     * This is the main function for setting up the BreakIterator's tables.  It
-     * just vectors different parts of the job off to other functions.
-     */
-    virtual void buildBreakIterator(const UnicodeString&    description,
-                                    UErrorCode& err);
-
-private:
-
-    /**
-     * Thus function has three main purposes:
-     * <ul><li>Perform general syntax checking on the description, so the rest of the
-     * build code can assume that it's parsing a legal description.
-     * <li>Split the description into separate rules
-     * <li>Perform variable-name substitutions (so that no one else sees variable names)
-     * </ul>
-     */
-    virtual void buildRuleList(UnicodeString& description,
-                               UErrorCode& err);
-
-protected:
-
-    /**
-     * This function performs variable-name substitutions.  First it does syntax
-     * checking on the variable-name definition.  If it's syntactically valid, it
-     * then goes through the remainder of the description and does a simple
-     * find-and-replace of the variable name with its text.  (The variable text
-     * must be enclosed in either [] or () for this to work.)
-     */
-    virtual void processSubstitution(UnicodeString& description,
-                                     int32_t ruleStart,
-                                     int32_t ruleEnd,
-                                     int32_t startPos,
-                                     UErrorCode& err);
-
-    /**
-     * This function defines a protocol for handling substitution names that
-     * are "special," i.e., that have some property beyond just being
-     * substitutions.  At the RuleBasedBreakIterator level, we have one
-     * special substitution name, "<ignore>".  Subclasses can override this
-     * function to add more.  Any special processing that has to go on beyond
-     * that which is done by the normal substitution-processing code is done
-     * here.
-     */
-    virtual void handleSpecialSubstitution(const UnicodeString& replace,
-                                           const UnicodeString& replaceWith,
-                                           int32_t startPos,
-                                           const UnicodeString& description,
-                                           UErrorCode& err);
-
-    /**
-     * This function provides a hook for subclasses to mess with the character
-     * category table.
-     */
-    virtual void mungeExpressionList();
-
-    /**
-     * This function builds the character category table.  On entry,
-     * tempRuleList is a UVector of break rules that has had variable names substituted.
-     * On exit, the charCategoryTable data member has been initialized to hold the
-     * character category table, and tempRuleList's rules have been munged to contain
-     * character category numbers everywhere a literal character or a [] expression
-     * originally occurred.
-     */
-    virtual void buildCharCategories(UErrorCode& err);
-
-private:
-
-    /**
-     * This is the function that builds the forward state table.  Most of the real
-     * work is done in parseRule(), which is called once for each rule in the
-     * description.
-     */
-    virtual void buildStateTable(UErrorCode& err);
-
-    /**
-     * This is where most of the work really happens.  This routine parses a single
-     * rule in the rule description, adding and modifying states in the state
-     * table according to the new expression.  The state table is kept deterministic
-     * throughout the whole operation, although some ugly postprocessing is needed
-     * to handle the *? token.
-     */
-    virtual void parseRule(const UnicodeString& rule,
-                           UBool               forward);
-
-    /**
-     * Update entries in the state table, and merge states when necessary to keep
-     * the table deterministic.
-     * @param rows The list of rows that need updating (the decision point list)
-     * @param pendingChars A character category list, encoded in a String.  This is the
-     * list of the columns that need updating.
-     * @param newValue Update the cells specfied above to contain this value
-     */
-    virtual void updateStateTable(const UVector&       rows,
-                                  const UnicodeString& pendingChars,
-                                  int16_t              newValue);
-
-    /**
-     * The real work of making the state table deterministic happens here.  This function
-     * merges a state in the state table (specified by rowNum) with a state that is
-     * passed in (newValues).  The basic process is to copy the nonzero cells in newStates
-     * into the state in the state table (we'll call that oldValues).  If there's a
-     * collision (i.e., if the same cell has a nonzero value in both states, and it's
-     * not the SAME value), then we have to reconcile the collision.  We do this by
-     * creating a new state, adding it to the end of the state table, and using this
-     * function recursively to merge the original two states into a single, combined
-     * state.  This process may happen recursively (i.e., each successive level may
-     * involve collisions).  To prevent infinite recursion, we keep a log of merge
-     * operations.  Any time we're merging two states we've merged before, we can just
-     * supply the row number for the result of that merge operation rather than creating
-     * a new state just like it.
-     * @param rowNum The row number in the state table of the state to be updated
-     * @param newValues The state to merge it with.
-     * @param rowsBeingUpdated A copy of the list of rows passed to updateStateTable()
-     * (itself a copy of the decision point list from parseRule()).  Newly-created
-     * states get added to the decision point list if their "parents" were on it.
-     */
-    virtual void mergeStates(int32_t  rowNum,
-                             int16_t* newValues,
-                             const UVector& rowsBeingUpdated);
-
-    /**
-     * The merge list is a list of pairs of rows that have been merged somewhere in
-     * the process of building this state table, along with the row number of the
-     * row containing the merged state.  This function looks up a pair of row numbers
-     * and returns the row number of the row they combine into.  (It returns 0 if
-     * this pair of rows isn't in the merge list.)
-     */
-    virtual int32_t searchMergeList(int32_t a, int32_t b);
-
-    /**
-     * This function is used to update the list of current loooping states (i.e.,
-     * states that are controlled by a *? construct).  It backfills values from
-     * the looping states into unpopulated cells of the states that are currently
-     * marked for backfilling, and then updates the list of looping states to be
-     * the new list
-     * @param newLoopingStates The list of new looping states
-     * @param endStates The list of states to treat as end states (states that
-     * can exit the loop).
-     */
-    virtual void setLoopingStates(const UVector* newLoopingStates,
-                                  const UVector& endStates);
-
-    /**
-     * This removes "ending states" and states reachable from them from the
-     * list of states to backfill.
-     * @param The row number of the state to remove from the backfill list
-     */
-    virtual void eliminateBackfillStates(int32_t baseState);
-
-    /**
-     * This function completes the backfilling process by actually doing the
-     * backfilling on the states that are marked for it
-     */
-    virtual void backfillLoopingStates(void);
-
-    /**
-     * This function completes the state-table-building process by doing several
-     * postprocessing steps and copying everything into its final resting place
-     * in the iterator itself
-     * @param forward True if we're working on the forward state table
-     */
-    virtual void finishBuildingStateTable(UBool forward);
-
-    /**
-     * This function builds the backward state table from the forward state
-     * table and any additional rules (identified by the ! on the front)
-     * supplied in the description
-     */
-    virtual void buildBackwardsStateTable(UErrorCode& err);
-
-protected:
-
-    /**
-     * Throws an IllegalArgumentException representing a syntax error in the rule
-     * description.  The exception's message contains some debugging information.
-     * @param message A message describing the problem
-     * @param position The position in the description where the problem was
-     * discovered
-     * @param context The string containing the error
-     */
-    virtual void setUpErrorMessage(const UnicodeString& message,
-                                   int32_t position,
-                                   const UnicodeString& context);
-};
-
-#endif
--- a/icu4c/source/i18n/unicode/parseerr.h
+++ b/icu4c/source/i18n/unicode/parseerr.h
@ -1,88 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999-2000, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   03/14/00    aliu        Creation.
-*   06/27/00    aliu        Change from C++ class to C struct
-**********************************************************************
-*/
-#ifndef PARSEERR_H
-#define PARSEERR_H
-
-#include "unicode/utypes.h"
-
-
-/**
- * The capacity of the context strings in UParseError.
- * @draft ICU 2.0
- */ 
-enum { U_PARSE_CONTEXT_LEN = 16 };
-
-/**
- * A UParseError struct is used to returned detailed information about
- * parsing errors.  It is used by ICU parsing engines that parse long
- * rules, patterns, or programs, where the text being parsed is long
- * enough that more information than a UErrorCode is needed to
- * localize the error.
- *
- * <p>The code field is an integer error code specific to each parsing
- * engine, but globally unique.  See the engine header file for
- * possible values.  The line, offset, and context fields are
- * optional; parsing engines may choose not to use to use them.
- *
- * <p>Examples of engines which use UParseError (or may use it in the
- * future) are RuleBasedTransliterator and RuleBasedBreakIterator.
- * 
- * @draft ICU 2.0
- */
-typedef struct _UParseError {
-
-    /**
-     * An integer indicating the type of error.  If no error was
-     * encountered, the parse engine sets this to zero, and the
-     * other fields' values should be ignored.
-     *
-     * <p>Each parse engine should use a range of codes from
-     * 0xNNNN0001 to 0xNNNNFFFF, where NNNN is a 16-bit integer
-     * between 0x0001 and 0xFFFF unique to each parse engine.
-     * Parse engines should define the enum PARSE_ERROR_BASE
-     * to be 0xNNNN0000.
-     */
-    /*int32_t        code; */
-
-    /**
-     * The line on which the error occured.  If the parse engine
-     * is not using this field, it should set it to zero.  Otherwise
-     * it should be a positive integer. The default value of this field
-     * is -1. It will be set to 0 if the code populating this struct is not
-     * using line numbers.
-     */
-    int32_t        line;
-
-    /**
-     * The character offset to the error.  If the line field is
-     * being used, then this offset is from the start of the line.
-     * If the line field is not being used, then this offset is from
-     * the start of the text.The default value of this field
-     * is -1. It will be set to appropriate value by the code that 
-     * populating the struct.
-     */
-    int32_t    offset;
-
-    /**
-     * Textual context before the error.  Null-terminated.
-     * May be the empty string if not implemented by parser.
-     */
-    UChar          preContext[U_PARSE_CONTEXT_LEN];
-
-    /**
-     * Textual context after the error.  Null-terminated.
-     * May be the empty string if not implemented by parser.
-     */
-    UChar          postContext[U_PARSE_CONTEXT_LEN];
-
-} UParseError;
-
-#endif