ICU-3295 rbbi rt port to Java.

X-SVN-Rev: 15241
2025-04-14 09:21:03 +00:00 · 2004-05-10 22:51:38 +00:00 · 2004-05-10 22:51:38 +00:00 · f4f77062d8
commit f4f77062d8
parent 51f4d6a8a2
3 changed files with 64 additions and 49 deletions
--- a/icu4j/src/com/ibm/icu/text/RBBIDataWrapper.java
+++ b/icu4j/src/com/ibm/icu/text/RBBIDataWrapper.java
@ -21,23 +21,22 @@ import com.ibm.icu.impl.CharTrie;
 * <p>Internal class used for Rule Based Break Iterators</p>
 * <p>This class provides access to the compiled break rule data, as
 * it is stored in a .brk file.  
+* @internal
 * 
 */
-
-
-public class RBBIDataWrapper {
+class RBBIDataWrapper {
    //
    // These fields are the ready-to-use compiled rule data, as
    //   read from the file.
    //
-    public RBBIDataHeader fHeader;
-    public short          fFTable[];
-    public short          fRTable[];
-    public short          fSFTable[];
-    public short          fSRTable[];
-    public CharTrie       fTrie;
-    public String         fRuleSource;
-    public int            fStatusTable[];
+    RBBIDataHeader fHeader;
+    short          fFTable[];
+    short          fRTable[];
+    short          fSFTable[];
+    short          fSRTable[];
+    CharTrie       fTrie;
+    String         fRuleSource;
+    int            fStatusTable[];
    
    // Index offsets to the fields in a state table row.
    //    Corresponds to struct RBBIStateTableRow in the C version.
@ -148,7 +147,7 @@ public class RBBIDataWrapper {
        RBBIDataWrapper This = new RBBIDataWrapper();
        
        // Seek past the ICU data header.
-        //   TODO:  verify that it looks good.
+        //   TODO:  verify that the header looks good.
        dis.skip(0x80);
        
        // Read in the RBBI data header...
@ -295,7 +294,9 @@ public class RBBIDataWrapper {
    
    
    
-    /** Debug function to display the break iterator data.  */
+    /** Debug function to display the break iterator data.  
+     *  @internal
+     */
    void dump() {
        System.out.println("RBBI Data Wrapper dump ...");
        System.out.println();
@ -314,7 +315,6 @@ public class RBBIDataWrapper {
    }
    
    /** Fixed width int-to-string conversion.   
-     *  TODO:  there must be an easy built-in way to do this  
     *  @internal
     * 
     */
@ -328,7 +328,6 @@ public class RBBIDataWrapper {
    }
    
    /** Fixed width int-to-string conversion.   
-     *  TODO:  there must be an easy built-in way to do this  
     *  @internal
     * 
     */
--- a/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator.java
@ -13,6 +13,7 @@ import java.io.IOException;

 /**
 * <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p>
+ * @stable ICU 2.0
 */

 public class RuleBasedBreakIterator extends BreakIterator {
@ -57,10 +58,10 @@ public class RuleBasedBreakIterator extends BreakIterator {
     * produced by the ICU4C tool "genbrk".
     * @return A RuleBasedBreakIterator based on the supplied break rules.
     * @throws IOException
+     * @draft ICU 3.0
     */
    public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
-     // TODO:
-        return null;      
+        return RuleBasedBreakIterator_New.getInstanceFromCompiledRules(is);      
    }
    
    
@ -114,24 +115,33 @@ public class RuleBasedBreakIterator extends BreakIterator {
        /** Tag value for "words" that do not fit into any of other categories. 
         *  Includes spaces and most punctuation. */
        public static final int WORD_NONE           = 0;
-        /** Upper bound for tags for uncategorized words. */
+        /** Upper bound for tags for uncategorized words. 
+         *  @draft ICU 3.0 */
        public static final int WORD_NONE_LIMIT     = 100;
-        /** Tag value for words that appear to be numbers, lower limit. */
+        /** Tag value for words that appear to be numbers, lower limit. 
+        *  @draft ICU 3.0 */
        public static final int WORD_NUMBER         = 100;
-        /** Tag value for words that appear to be numbers, upper limit. */
+        /** Tag value for words that appear to be numbers, upper limit.
+        *  @draft ICU 3.0 */
        public static final int WORD_NUMBER_LIMIT   = 200;
        /** Tag value for words that contain letters, excluding
-         *  hiragana, katakana or ideographic characters, lower limit. */
+         *  hiragana, katakana or ideographic characters, lower limit. 
+        *  @draft ICU 3.0 */
        public static final int WORD_LETTER         = 200;
-        /** Tag value for words containing letters, upper limit  */
+        /** Tag value for words containing letters, upper limit 
+        *  @draft ICU 3.0 */
        public static final int WORD_LETTER_LIMIT   = 300;
-        /** Tag value for words containing kana characters, lower limit */
+        /** Tag value for words containing kana characters, lower limit
+        *  @draft ICU 3.0 */
        public static final int WORD_KANA           = 300;
-        /** Tag value for words containing kana characters, upper limit */
+        /** Tag value for words containing kana characters, upper limit
+        *  @draft ICU 3.0 */
        public static final int WORD_KANA_LIMIT     = 400;
-        /** Tag value for words containing ideographic characters, lower limit */
+        /** Tag value for words containing ideographic characters, lower limit
+        *  @draft ICU 3.0 */
        public static final int WORD_IDEO           = 400;
-        /** Tag value for words containing ideographic characters, upper limit */
+        /** Tag value for words containing ideographic characters, upper limit
+        *  @draft ICU 3.0 */
        public static final int WORD_IDEO_LIMIT     = 500;

    //=======================================================================
--- a/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator_New.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedBreakIterator_New.java
@ -13,6 +13,7 @@ import java.io.InputStream;

 /**
 * Rule Based Break Iterator implementation.
+ * @internal
 */
 public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
    
@ -133,6 +134,11 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
    //=======================================================================
    // Constructors & Factories
    //=======================================================================
+    
+    /**
+     * Create a break iterator from a precompiled set of rules.
+     * @internal
+     */
    public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
        RuleBasedBreakIterator_New  This = new RuleBasedBreakIterator_New();
        This.fRData = RBBIDataWrapper.get(is);
@ -506,28 +512,6 @@ public int current() {



-/**
- * Return the status tag from the break rule that determined the most recently
- * returned break position.  The values appear in the rule source
- * within brackets, {123}, for example.  For rules that do not specify a
- * status, a default value of 0 is returned.  If more than one rule applies,
- * the numerically largest of the possible status values is returned.
- * <p>
- * Of the standard types of ICU break iterators, only the word break
- * iterator provides status values.  The values are defined in
- * <code>enum UWordBreak</code>, and allow distinguishing between words
- * that contain alphabetic letters, "words" that appear to be numbers,
- * punctuation and spaces, words containing ideographic characters, and
- * more.  Call <code>getRuleStatus</code> after obtaining a boundary
- * position from <code>next()<code>, <code>previous()</code>, or 
- * any other break iterator functions that returns a boundary position.
- * <p>
- * @return the status from the break rule that determined the most recently
- * returned break position.
- *
- * @draft ICU 3.0
- */
-
 private void makeRuleStatusValid() {
    if (fLastStatusIndexValid == false) {
        //  No cached status is available.
@ -553,6 +537,28 @@ private void makeRuleStatusValid() {



+/**
+ * Return the status tag from the break rule that determined the most recently
+ * returned break position.  The values appear in the rule source
+ * within brackets, {123}, for example.  For rules that do not specify a
+ * status, a default value of 0 is returned.  If more than one rule applies,
+ * the numerically largest of the possible status values is returned.
+ * <p>
+ * Of the standard types of ICU break iterators, only the word break
+ * iterator provides status values.  The values are defined in
+ * <code>enum UWordBreak</code>, and allow distinguishing between words
+ * that contain alphabetic letters, "words" that appear to be numbers,
+ * punctuation and spaces, words containing ideographic characters, and
+ * more.  Call <code>getRuleStatus</code> after obtaining a boundary
+ * position from <code>next()<code>, <code>previous()</code>, or 
+ * any other break iterator functions that returns a boundary position.
+ * <p>
+ * @return the status from the break rule that determined the most recently
+ * returned break position.
+ *
+ * @draft ICU 3.0
+ */
+
 public int  getRuleStatus() {
    makeRuleStatusValid();
    //   Status records have this form:
@ -943,7 +949,7 @@ public int getRuleStatusVec(int[] fillInArray) {
                        if (stateTable[row + RBBIDataWrapper.ACCEPTING] != lookaheadStatus) {  
                            // TODO:  handle this case of overlapping lookahead matches.
                            //        With correctly written rules, we won't get here.
-                            System.out.println("Trouble in handlePrevious()");  // comment out 
+                            // System.out.println("Trouble in handlePrevious()"); 
                        }
                        result               = lookaheadResult;
                        fLastRuleStatusIndex = lookaheadTagIdx;