ICU-3295 rbbi rt port to Java. Stubs for new classes.

X-SVN-Rev: 14935
This commit is contained in:
Andy Heninger 2004-04-12 22:08:32 +00:00
parent 6a69915b58
commit a7fafffd6e
6 changed files with 3633 additions and 3088 deletions

View file

@ -20,32 +20,32 @@ public class WriteTablesToFiles {
String suffix = (littleEndian ? "LE" : "BE");
bi = BreakIterator.getCharacterInstance();
((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
"char" + suffix + ".brk"), littleEndian);
bi = BreakIterator.getWordInstance();
((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
"word" + suffix + ".brk"), littleEndian);
bi = BreakIterator.getLineInstance();
((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
"line" + suffix + ".brk"), littleEndian);
bi = BreakIterator.getSentenceInstance();
((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
"sent" + suffix + ".brk"), littleEndian);
bi = BreakIterator.getTitleInstance();
((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
"title" + suffix + ".brk"), littleEndian);
java.util.Locale thai = new java.util.Locale("th", "", "");
bi = BreakIterator.getWordInstance(thai);
((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
"word_th" + suffix + ".brk"), littleEndian);
bi = BreakIterator.getLineInstance(thai);
((RuleBasedBreakIterator)bi).writeTablesToFile(new FileOutputStream(
((RuleBasedBreakIterator_Old)bi).writeTablesToFile(new FileOutputStream(
"line_th" + suffix + ".brk"), littleEndian);
}
}

View file

@ -97,7 +97,7 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
String[] classNames = bundle.getStringArray("BreakIteratorClasses");
String rules = bundle.getString(rulesName);
if (classNames[kind].equals("RuleBasedBreakIterator")) {
iter = new RuleBasedBreakIterator(rules);
iter = new RuleBasedBreakIterator_Old(rules);
}
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
try {
@ -121,7 +121,7 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
// in our current tests.
///CLOVER:OFF
if (iter == null) {
iter = new RuleBasedBreakIterator(rules);
iter = new RuleBasedBreakIterator_Old(rules);
}
///CLOVER:ON
}

View file

@ -17,16 +17,16 @@ import java.io.IOException;
import java.io.*;
/**
* A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
* A subclass of RuleBasedBreakIterator_Old that adds the ability to use a dictionary
* to further subdivide ranges of text beyond what is possible using just the
* state-table-based algorithm. This is necessary, for example, to handle
* word and line breaking in Thai, which doesn't use spaces between words. The
* state-table-based algorithm used by RuleBasedBreakIterator is used to divide
* state-table-based algorithm used by RuleBasedBreakIterator_Old is used to divide
* up text as far as possible, and then contiguous ranges of letters are
* repeatedly compared against a list of known words (i.e., the dictionary)
* to divide them up into words.
*
* DictionaryBasedBreakIterator uses the same rule language as RuleBasedBreakIterator,
* DictionaryBasedBreakIterator uses the same rule language as RuleBasedBreakIterator_Old,
* but adds one more special substitution name: _dictionary_. This substitution
* name is used to identify characters in words in the dictionary. The idea is that
* if the iterator passes over a chunk of text that includes two or more characters
@ -41,7 +41,7 @@ import java.io.*;
*
* @stable ICU 2.0
*/
public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {
public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator_Old {
/**
* a list of known words that is used to divide up contiguous ranges of letters,
@ -83,9 +83,9 @@ public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {
/**
* Constructs a DictionaryBasedBreakIterator.
* @param description Same as the description parameter on RuleBasedBreakIterator,
* @param description Same as the description parameter on RuleBasedBreakIterator_Old,
* except for the special meaning of DICTIONARY_VAR. This parameter is just
* passed through to RuleBasedBreakIterator's constructor.
* passed through to RuleBasedBreakIterator_Old's constructor.
* @param dictionaryStream the stream containing the dictionary data
* @stable ICU 2.0
*/
@ -97,11 +97,11 @@ public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {
/**
* Returns a Builder that is customized to build a DictionaryBasedBreakIterator.
* This is the same as RuleBasedBreakIterator.Builder, except for the extra code
* This is the same as RuleBasedBreakIterator_Old.Builder, except for the extra code
* to handle the DICTIONARY_VAR tag.
* @internal
*/
protected RuleBasedBreakIterator.Builder makeBuilder() {
protected RuleBasedBreakIterator_Old.Builder makeBuilder() {
return new Builder();
}
@ -313,7 +313,7 @@ switch (categoryFlags.length % 4) {
// categories represented in the dictionary. If it is, bump the dictionary-
// character count.
int result = super.lookupCategory(c);
if (result != RuleBasedBreakIterator.IGNORE && categoryFlags[result]) {
if (result != RuleBasedBreakIterator_Old.IGNORE && categoryFlags[result]) {
++dictionaryCharCount;
}
return result;
@ -514,11 +514,11 @@ switch (categoryFlags.length % 4) {
/**
* The Builder class for DictionaryBasedBreakIterator inherits almost all of
* its functionality from the Builder class for RuleBasedBreakIterator, but
* its functionality from the Builder class for RuleBasedBreakIterator_Old, but
* extends it with extra logic to handle the DICTIONARY_VAR token
* @internal
*/
protected class Builder extends RuleBasedBreakIterator.Builder {
protected class Builder extends RuleBasedBreakIterator_Old.Builder {
/**
* A UnicodeSet that contains all the characters represented in the dictionary

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,234 @@
/*
*******************************************************************************
* Copyright (C) 2004 International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
/**
* @author andy
*
* To change the template for this generated type comment go to
* Window - Preferences - Java - Code Generation - Code and Comments
*/
public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
//=======================================================================
// boilerplate
//=======================================================================
/**
* Clones this iterator.
* @return A newly-constructed RuleBasedBreakIterator with the same
* behavior as this one.
* @stable ICU 2.0
*/
public Object clone()
{
RuleBasedBreakIterator_New result = (RuleBasedBreakIterator_New) super.clone();
// TODO: real clone code
return result;
}
/**
* Returns true if both BreakIterators are of the same class, have the same
* rules, and iterate over the same text.
* @stable ICU 2.0
*/
public boolean equals(Object that) {
return false; // TODO:
}
/**
* Returns the description used to create this iterator
* @stable ICU 2.0
*/
public String toString() {
return ""; // TODO:
}
/**
* Compute a hashcode for this BreakIterator
* @return A hash code
* @stable ICU 2.0
*/
public int hashCode()
{
return 0; // TODO
}
//=======================================================================
// BreakIterator overrides
//=======================================================================
/**
* Sets the current iteration position to the beginning of the text.
* (i.e., the CharacterIterator's starting offset).
* @return The offset of the beginning of the text.
* @stable ICU 2.0
*/
public int first() {
return 0; // TODO;
}
/**
* Sets the current iteration position to the end of the text.
* (i.e., the CharacterIterator's ending offset).
* @return The text's past-the-end offset.
* @stable ICU 2.0
*/
public int last() {
return 0; // TODO:
}
/**
* Advances the iterator either forward or backward the specified number of steps.
* Negative values move backward, and positive values move forward. This is
* equivalent to repeatedly calling next() or previous().
* @param n The number of steps to move. The sign indicates the direction
* (negative is backwards, and positive is forwards).
* @return The character offset of the boundary position n boundaries away from
* the current one.
* @stable ICU 2.0
*/
public int next(int n) {
return 0; // TODO:
}
/**
* Advances the iterator to the next boundary position.
* @return The position of the first boundary after this one.
* @stable ICU 2.0
*/
public int next() {
return 0; // TODO:
}
/**
* Advances the iterator backwards, to the last boundary preceding this one.
* @return The position of the last boundary position preceding this one.
* @stable ICU 2.0
*/
public int previous() {
return 0; // TODO:
}
/**
* Sets the iterator to refer to the first boundary position following
* the specified position.
* @param offset The position from which to begin searching for a break position.
* @return The position of the first break after the current position.
* @stable ICU 2.0
*/
public int following(int offset) {
return 0; // TODO:
}
/**
* Sets the iterator to refer to the last boundary position before the
* specified position.
* @param offset The position to begin searching for a break from.
* @return The position of the last boundary before the starting position.
* @stable ICU 2.0
*/
public int preceding(int offset) {
return 0; // TODO:
}
/**
* Returns true if the specfied position is a boundary position. As a side
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param offset the offset to check.
* @return True if "offset" is a boundary position.
* @stable ICU 2.0
*/
public boolean isBoundary(int offset) {
return true; // TODO:
}
/**
* Returns the current iteration position.
* @return The current iteration position.
* @stable ICU 2.0
*/
public int current() {
return 0; // TODO:
}
/**
* Return the status tag from the break rule that determined the most recently
* returned break position. The values appear in the rule source
* within brackets, {123}, for example. For rules that do not specify a
* status, a default value of 0 is returned. If more than one rule applies,
* the numerically largest of the possible status values is returned.
* <p>
* Of the standard types of ICU break iterators, only the word break
* iterator provides status values. The values are defined in
* <code>enum UWordBreak</code>, and allow distinguishing between words
* that contain alphabetic letters, "words" that appear to be numbers,
* punctuation and spaces, words containing ideographic characters, and
* more. Call <code>getRuleStatus</code> after obtaining a boundary
* position from <code>next()<code>, <code>previous()</code>, or
* any other break iterator functions that returns a boundary position.
* <p>
* @return the status from the break rule that determined the most recently
* returned break position.
*
* @draft ICU 3.0
*/
public int getRuleStatus() {
return 0; // TODO:
}
/**
* Get the status (tag) values from the break rule(s) that determined the most
* recently returned break position. The values appear in the rule source
* within brackets, {123}, for example. The default status value for rules
* that do not explicitly provide one is zero.
* <p>
* For word break iterators, the possible values are defined in enum UWordBreak.
* <p>
* If the size of the output array is insufficient to hold the data,
* the output will be truncated to the available length. No exception
* will be thrown.
*
* @param fillInArray an array to be filled in with the status values.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
* In the event that the array is too small, the return value
* is the total number of status values that were available,
* not the reduced number that were actually returned.
* @draft ICU 3.0
*/
public int getRuleStatusVec(int[] fillInArray) {
if (fillInArray != null && fillInArray.length >= 1) { // TODO:
fillInArray[0] = 0;
}
return 1;
}
/**
* Return a CharacterIterator over the text being analyzed. This version
* of this method returns the actual CharacterIterator we're using internally.
* Changing the state of this iterator can have undefined consequences. If
* you need to change it, clone it first.
* @return An iterator over the text being analyzed.
* @stable ICU 2.0
*/
public CharacterIterator getText() {
return new StringCharacterIterator("");
}
/**
* Set the iterator to analyze a new piece of text. This function resets
* the current iteration position to the beginning of the text.
* @param newText An iterator over the text to analyze.
* @stable ICU 2.0
*/
public void setText(CharacterIterator newText) {
}
}

File diff suppressed because it is too large Load diff