ICU-3295 rbbi rt port to Java. Stubs for new classes.

X-SVN-Rev: 14935
2025-04-16 02:07:15 +00:00 · 2004-04-12 22:08:32 +00:00 · 2004-04-12 22:08:32 +00:00 · a7fafffd6e
commit a7fafffd6e
parent 6a69915b58
6 changed files with 3633 additions and 3088 deletions
--- a/icu4j/src/com/ibm/icu/dev/tool/rbbi/WriteTablesToFiles.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/rbbi/WriteTablesToFiles.java
@ -20,32 +20,32 @@ public class WriteTablesToFiles {
        String suffix = (littleEndian ? "LE" : "BE");
        
        bi = BreakIterator.getCharacterInstance();
-        ((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
+        ((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
                    "char" + suffix + ".brk"), littleEndian);
        
        bi = BreakIterator.getWordInstance();
-        ((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
+        ((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
                    "word" + suffix + ".brk"), littleEndian);
        
        bi = BreakIterator.getLineInstance();
-        ((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
+        ((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
                    "line" + suffix + ".brk"), littleEndian);
        
        bi = BreakIterator.getSentenceInstance();
-        ((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
+        ((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
                    "sent" + suffix + ".brk"), littleEndian);

        bi = BreakIterator.getTitleInstance();
-        ((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
+        ((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
                    "title" + suffix + ".brk"), littleEndian);

        java.util.Locale thai = new java.util.Locale("th", "", "");
        bi = BreakIterator.getWordInstance(thai);
-        ((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
+        ((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
                    "word_th" + suffix + ".brk"), littleEndian);

        bi = BreakIterator.getLineInstance(thai);
-        ((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
+        ((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
                    "line_th" + suffix + ".brk"), littleEndian);
    }
 }
--- a/icu4j/src/com/ibm/icu/text/BreakIteratorFactory.java
+++ b/icu4j/src/com/ibm/icu/text/BreakIteratorFactory.java
@ -97,7 +97,7 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
        String[] classNames = bundle.getStringArray("BreakIteratorClasses");
        String rules = bundle.getString(rulesName);
        if (classNames[kind].equals("RuleBasedBreakIterator")) {
-            iter = new RuleBasedBreakIterator(rules);
+            iter = new RuleBasedBreakIterator_Old(rules);
        }
        else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
            try {
@ -121,7 +121,7 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
 	    // in our current tests.
 	    ///CLOVER:OFF
            if (iter == null) {
-                iter = new RuleBasedBreakIterator(rules);
+                iter = new RuleBasedBreakIterator_Old(rules);
            }
 	    ///CLOVER:ON
        }
--- a/icu4j/src/com/ibm/icu/text/DictionaryBasedBreakIterator.java
+++ b/icu4j/src/com/ibm/icu/text/DictionaryBasedBreakIterator.java
@ -17,16 +17,16 @@ import java.io.IOException;
 import java.io.*;

 /**
- * A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
+ * A subclass of RuleBasedBreakIterator_Old that adds the ability to use a dictionary
 * to further subdivide ranges of text beyond what is possible using just the
 * state-table-based algorithm.  This is necessary, for example, to handle
 * word and line breaking in Thai, which doesn't use spaces between words.  The
- * state-table-based algorithm used by RuleBasedBreakIterator is used to divide
+ * state-table-based algorithm used by RuleBasedBreakIterator_Old is used to divide
 * up text as far as possible, and then contiguous ranges of letters are
 * repeatedly compared against a list of known words (i.e., the dictionary)
 * to divide them up into words.
 *
- * DictionaryBasedBreakIterator uses the same rule language as RuleBasedBreakIterator,
+ * DictionaryBasedBreakIterator uses the same rule language as RuleBasedBreakIterator_Old,
 * but adds one more special substitution name: _dictionary_.  This substitution
 * name is used to identify characters in words in the dictionary.  The idea is that
 * if the iterator passes over a chunk of text that includes two or more characters
@ -41,7 +41,7 @@ import java.io.*;
 *
 * @stable ICU 2.0
 */
-public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {
+public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator_Old {

    /**
     * a list of known words that is used to divide up contiguous ranges of letters,
@ -83,9 +83,9 @@ public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {

    /**
     * Constructs a DictionaryBasedBreakIterator.
-     * @param description Same as the description parameter on RuleBasedBreakIterator,
+     * @param description Same as the description parameter on RuleBasedBreakIterator_Old,
     * except for the special meaning of DICTIONARY_VAR.  This parameter is just
-     * passed through to RuleBasedBreakIterator's constructor.
+     * passed through to RuleBasedBreakIterator_Old's constructor.
     * @param dictionaryStream the stream containing the dictionary data
     * @stable ICU 2.0
     */
@ -97,11 +97,11 @@ public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {

    /**
     * Returns a Builder that is customized to build a DictionaryBasedBreakIterator.
-     * This is the same as RuleBasedBreakIterator.Builder, except for the extra code
+     * This is the same as RuleBasedBreakIterator_Old.Builder, except for the extra code
     * to handle the DICTIONARY_VAR tag.
     * @internal
     */
-    protected RuleBasedBreakIterator.Builder makeBuilder() {
+    protected RuleBasedBreakIterator_Old.Builder makeBuilder() {
        return new Builder();
    }

@ -313,7 +313,7 @@ switch (categoryFlags.length % 4) {
        // categories represented in the dictionary.  If it is, bump the dictionary-
        // character count.
        int result = super.lookupCategory(c);
-        if (result != RuleBasedBreakIterator.IGNORE && categoryFlags[result]) {
+        if (result != RuleBasedBreakIterator_Old.IGNORE && categoryFlags[result]) {
            ++dictionaryCharCount;
        }
        return result;
@ -514,11 +514,11 @@ switch (categoryFlags.length % 4) {

    /**
     * The Builder class for DictionaryBasedBreakIterator inherits almost all of
-     * its functionality from the Builder class for RuleBasedBreakIterator, but
+     * its functionality from the Builder class for RuleBasedBreakIterator_Old, but
     * extends it with extra logic to handle the DICTIONARY_VAR token
     * @internal
     */
-    protected class Builder extends RuleBasedBreakIterator.Builder {
+    protected class Builder extends RuleBasedBreakIterator_Old.Builder {

        /**
         * A UnicodeSet that contains all the characters represented in the dictionary
--- a/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator.java
--- a/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator_New.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator_New.java
@ -0,0 +1,234 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2004 International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+
+/**
+ * @author andy
+ *
+ * To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Generation - Code and Comments
+ */
+public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
+    //=======================================================================
+    // boilerplate
+    //=======================================================================
+    /**
+     * Clones this iterator.
+     * @return A newly-constructed RuleBasedBreakIterator with the same
+     * behavior as this one.
+     * @stable ICU 2.0
+     */
+    public Object clone()
+    {
+    	RuleBasedBreakIterator_New result = (RuleBasedBreakIterator_New) super.clone();
+        // TODO: real clone code
+        return result;
+    }
+
+    /**
+     * Returns true if both BreakIterators are of the same class, have the same
+     * rules, and iterate over the same text.
+     * @stable ICU 2.0
+     */
+    public boolean equals(Object that) {
+    	return false;  // TODO:
+    }
+
+    /**
+     * Returns the description used to create this iterator
+     * @stable ICU 2.0
+     */
+    public String toString() {
+        return "";      // TODO:
+    }
+
+    /**
+     * Compute a hashcode for this BreakIterator
+     * @return A hash code
+     * @stable ICU 2.0
+     */
+    public int hashCode()
+    {
+        return 0;        // TODO
+    }
+
+    //=======================================================================
+    // BreakIterator overrides
+    //=======================================================================
+
+    /**
+     * Sets the current iteration position to the beginning of the text.
+     * (i.e., the CharacterIterator's starting offset).
+     * @return The offset of the beginning of the text.
+     * @stable ICU 2.0
+     */
+	public int first() {
+		return 0;             // TODO;
+	}
+    /**
+     * Sets the current iteration position to the end of the text.
+     * (i.e., the CharacterIterator's ending offset).
+     * @return The text's past-the-end offset.
+     * @stable ICU 2.0
+     */
+	public int last() {
+		return 0;             // TODO:
+	}
+    /**
+     * Advances the iterator either forward or backward the specified number of steps.
+     * Negative values move backward, and positive values move forward.  This is
+     * equivalent to repeatedly calling next() or previous().
+     * @param n The number of steps to move.  The sign indicates the direction
+     * (negative is backwards, and positive is forwards).
+     * @return The character offset of the boundary position n boundaries away from
+     * the current one.
+     * @stable ICU 2.0
+     */
+	public int next(int n) {
+		return  0;             // TODO:
+	}
+    /**
+     * Advances the iterator to the next boundary position.
+     * @return The position of the first boundary after this one.
+     * @stable ICU 2.0
+     */
+	public int next() {
+		return  0;             // TODO:
+	}
+    /**
+     * Advances the iterator backwards, to the last boundary preceding this one.
+     * @return The position of the last boundary position preceding this one.
+     * @stable ICU 2.0
+     */
+	public int previous() {
+		return  0;             // TODO:
+	}
+    /**
+     * Sets the iterator to refer to the first boundary position following
+     * the specified position.
+     * @param offset The position from which to begin searching for a break position.
+     * @return The position of the first break after the current position.
+     * @stable ICU 2.0
+     */
+	public int following(int offset) {
+		return  0;             // TODO:
+	}
+    /**
+     * Sets the iterator to refer to the last boundary position before the
+     * specified position.
+     * @param offset The position to begin searching for a break from.
+     * @return The position of the last boundary before the starting position.
+     * @stable ICU 2.0
+     */
+    public int preceding(int offset) {
+    	return  0;             // TODO:
+    }
+
+/**
+ * Returns true if the specfied position is a boundary position.  As a side
+ * effect, leaves the iterator pointing to the first boundary position at
+ * or after "offset".
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ * @stable ICU 2.0
+ */
+public boolean isBoundary(int offset) {
+	return true;    // TODO:
+}
+
+/**
+ * Returns the current iteration position.
+ * @return The current iteration position.
+ * @stable ICU 2.0
+ */
+public int current() {
+		return 0;             // TODO:
+	}
+
+
+
+/**
+ * Return the status tag from the break rule that determined the most recently
+ * returned break position.  The values appear in the rule source
+ * within brackets, {123}, for example.  For rules that do not specify a
+ * status, a default value of 0 is returned.  If more than one rule applies,
+ * the numerically largest of the possible status values is returned.
+ * <p>
+ * Of the standard types of ICU break iterators, only the word break
+ * iterator provides status values.  The values are defined in
+ * <code>enum UWordBreak</code>, and allow distinguishing between words
+ * that contain alphabetic letters, "words" that appear to be numbers,
+ * punctuation and spaces, words containing ideographic characters, and
+ * more.  Call <code>getRuleStatus</code> after obtaining a boundary
+ * position from <code>next()<code>, <code>previous()</code>, or 
+ * any other break iterator functions that returns a boundary position.
+ * <p>
+ * @return the status from the break rule that determined the most recently
+ * returned break position.
+ *
+ * @draft ICU 3.0
+ */
+public int  getRuleStatus() {
+	return  0;             // TODO:
+}
+
+
+
+/**
+ * Get the status (tag) values from the break rule(s) that determined the most 
+ * recently returned break position.  The values appear in the rule source
+ * within brackets, {123}, for example.  The default status value for rules
+ * that do not explicitly provide one is zero.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * <p>
+ * If the size  of the output array is insufficient to hold the data,
+ *  the output will be truncated to the available length.  No exception
+ *  will be thrown.
+ *
+ * @param fillInArray an array to be filled in with the status values.  
+ * @return          The number of rule status values from rules that determined 
+ *                  the most recent boundary returned by the break iterator.
+ *                  In the event that the array is too small, the return value
+ *                  is the total number of status values that were available,
+ *                  not the reduced number that were actually returned.
+ * @draft ICU 3.0
+ */
+public int getRuleStatusVec(int[] fillInArray) {
+    if (fillInArray != null && fillInArray.length >= 1) {    // TODO:
+        fillInArray[0] = 0;
+    }
+    return 1;
+ }
+
+
+/**
+ * Return a CharacterIterator over the text being analyzed.  This version
+ * of this method returns the actual CharacterIterator we're using internally.
+ * Changing the state of this iterator can have undefined consequences.  If
+ * you need to change it, clone it first.
+ * @return An iterator over the text being analyzed.
+ * @stable ICU 2.0
+ */
+	public CharacterIterator getText() {
+		return new StringCharacterIterator("");
+	}
+
+
+    /**
+     * Set the iterator to analyze a new piece of text.  This function resets
+     * the current iteration position to the beginning of the text.
+     * @param newText An iterator over the text to analyze.
+     * @stable ICU 2.0
+     */
+	public void setText(CharacterIterator newText) {
+	}
+
+}
--- a/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator_Old.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator_Old.java