mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-16 10:17:23 +00:00
ICU-5901 RBBI.getRuleStatus(), hoist to BreakIterator from RuleBasedBreakIterator.
X-SVN-Rev: 33829
This commit is contained in:
parent
2f731899ca
commit
83ad0ae901
2 changed files with 82 additions and 32 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -372,6 +372,52 @@ public abstract class BreakIterator implements Cloneable
|
|||
*/
|
||||
public abstract int current();
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, return the status tag from the
|
||||
* break rule that determined the most recently
|
||||
* returned break position.
|
||||
* <p>
|
||||
* For break iterator types that do not support a rule status,
|
||||
* a default value of 0 is returned.
|
||||
* <p>
|
||||
* @return The status from the break rule that determined the most recently
|
||||
* returned break position.
|
||||
*
|
||||
* @draft ICU 52
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
*/
|
||||
|
||||
public int getRuleStatus() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
|
||||
* that determined the most recently returned break position.
|
||||
* <p>
|
||||
* For break iterator types that do not support rule status,
|
||||
* no values are returned.
|
||||
* <p>
|
||||
* If the size of the output array is insufficient to hold the data,
|
||||
* the output will be truncated to the available length. No exception
|
||||
* will be thrown.
|
||||
*
|
||||
* @param fillInArray an array to be filled in with the status values.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* In the event that the array is too small, the return value
|
||||
* is the total number of status values that were available,
|
||||
* not the reduced number that were actually returned.
|
||||
* @draft ICU 52
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
*/
|
||||
public int getRuleStatusVec(int[] fillInArray) {
|
||||
if (fillInArray != null && fillInArray.length > 0) {
|
||||
fillInArray[0] = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a CharacterIterator over the text being analyzed.
|
||||
* For at least some subclasses of BreakIterator, this is a reference
|
||||
|
@ -422,17 +468,17 @@ public abstract class BreakIterator implements Cloneable
|
|||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int KIND_WORD = 1;
|
||||
/**
|
||||
/**
|
||||
* {@icu}
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int KIND_LINE = 2;
|
||||
/**
|
||||
/**
|
||||
* {@icu}
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int KIND_SENTENCE = 3;
|
||||
/**
|
||||
/**
|
||||
* {@icu}
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Created on May 5, 2004
|
||||
*
|
||||
*
|
||||
* Copyright (C) 2004-2013 International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
|
@ -29,13 +29,13 @@ import com.ibm.icu.util.ULocale;
|
|||
*
|
||||
*/
|
||||
public class RBBITestExtended extends TestFmwk {
|
||||
|
||||
|
||||
public static void main(String[] args)throws Exception {
|
||||
new RBBITestExtended().run(args);
|
||||
}
|
||||
|
||||
|
||||
public RBBITestExtended() {
|
||||
|
||||
|
||||
public RBBITestExtended() {
|
||||
}
|
||||
|
||||
|
||||
|
@ -65,7 +65,7 @@ public void TestExtended() {
|
|||
errln("Could not open test data file rbbitst.txt");
|
||||
return;
|
||||
}
|
||||
isr = new InputStreamReader(is, "UTF-8");
|
||||
isr = new InputStreamReader(is, "UTF-8");
|
||||
int c;
|
||||
int count = 0;
|
||||
for (;;) {
|
||||
|
@ -78,17 +78,17 @@ public void TestExtended() {
|
|||
// BOM in the test data file. Discard it.
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
UTF16.append(testFileBuf, c);
|
||||
}
|
||||
|
||||
|
||||
} catch (IOException e) {
|
||||
errln(e.toString());
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
String testString = testFileBuf.toString();
|
||||
|
||||
|
||||
|
||||
final int PARSE_COMMENT = 1;
|
||||
final int PARSE_TAG = 2;
|
||||
|
@ -239,7 +239,7 @@ public void TestExtended() {
|
|||
if (c == -1) {
|
||||
errln("Error in named character in test file at line " + lineNum +
|
||||
", col " + column);
|
||||
} else {
|
||||
} else {
|
||||
// Named code point was recognized. Insert it
|
||||
// into the test data.
|
||||
UTF16.append(tp.dataToBreak, c);
|
||||
|
@ -247,7 +247,7 @@ public void TestExtended() {
|
|||
tp.srcLine[i] = lineNum;
|
||||
tp.srcCol[i] = column;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if (nameEndIdx > charIdx) {
|
||||
charIdx = nameEndIdx+1;
|
||||
|
@ -279,7 +279,7 @@ public void TestExtended() {
|
|||
if (c == CH_BACKSLASH) {
|
||||
// Check for \ at end of line, a line continuation.
|
||||
// Advance over (discard) the newline
|
||||
int cp = UTF16.charAt(testString, charIdx);
|
||||
int cp = UTF16.charAt(testString, charIdx);
|
||||
if (cp == CH_CR && charIdx<len && UTF16.charAt(testString, charIdx+1) == CH_LF) {
|
||||
// We have a CR LF
|
||||
// Need an extra increment of the input ptr to move over both of them
|
||||
|
@ -306,7 +306,7 @@ public void TestExtended() {
|
|||
tp.srcLine[i] = lineNum;
|
||||
tp.srcCol[i] = column;
|
||||
}
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -320,7 +320,7 @@ public void TestExtended() {
|
|||
|
||||
// Normal, non-escaped data char.
|
||||
UTF16.append(tp.dataToBreak, c);
|
||||
|
||||
|
||||
// Save the mapping from offset in the data to line/column numbers in
|
||||
// the original input file. Will be used for better error messages only.
|
||||
// If there's an expected break before this char, the slot in the mapping
|
||||
|
@ -360,13 +360,13 @@ public void TestExtended() {
|
|||
|
||||
errln("Syntax Error in test file at line "+ lineNum +", col %d" + column);
|
||||
return;
|
||||
|
||||
|
||||
// parseState = PARSE_COMMENT; // TODO: unreachable. Don't stop on errors.
|
||||
// break;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -378,7 +378,7 @@ void executeTest(TestParams t) {
|
|||
if (t.bi == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
t.bi.setText(t.dataToBreak.toString());
|
||||
//
|
||||
// Run the iterator forward
|
||||
|
@ -396,14 +396,14 @@ void executeTest(TestParams t) {
|
|||
// and this one.
|
||||
for (i=prevBP+1; i<bp; i++) {
|
||||
if (t.expectedBreaks[i] != 0) {
|
||||
errln("Forward Iteration, break expected, but not found. Pos=" + i +
|
||||
errln("Forward Iteration, break expected, but not found. Pos=" + i +
|
||||
" File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the break we did find was expected
|
||||
if (t.expectedBreaks[bp] == 0) {
|
||||
errln("Forward Iteration, break found, but not expected. Pos=" + bp +
|
||||
errln("Forward Iteration, break found, but not expected. Pos=" + bp +
|
||||
" File line,col= " + t.srcLine[bp] + ", " + t.srcCol[bp]);
|
||||
} else {
|
||||
// The break was expected.
|
||||
|
@ -413,12 +413,16 @@ void executeTest(TestParams t) {
|
|||
expectedTagVal = 0;
|
||||
}
|
||||
int line = t.srcLine[bp];
|
||||
int rs = ((RuleBasedBreakIterator)t.bi).getRuleStatus();
|
||||
int rs = t.bi.getRuleStatus();
|
||||
if (rs != expectedTagVal) {
|
||||
errln("Incorrect status for forward break. Pos = " + bp +
|
||||
". File line,col = " + line + ", " + t.srcCol[bp] + "\n" +
|
||||
" Actual, Expected status = " + rs + ", " + expectedTagVal);
|
||||
}
|
||||
int[] fillInArray = new int[4];
|
||||
int numStatusVals = t.bi.getRuleStatusVec(fillInArray);
|
||||
assertTrue("", numStatusVals >= 1);
|
||||
assertEquals("", expectedTagVal, fillInArray[0]);
|
||||
}
|
||||
|
||||
|
||||
|
@ -428,12 +432,12 @@ void executeTest(TestParams t) {
|
|||
// Verify that there were no missed expected breaks after the last one found
|
||||
for (i=prevBP+1; i<t.dataToBreak.length()+1; i++) {
|
||||
if (t.expectedBreaks[i] != 0) {
|
||||
errln("Forward Iteration, break expected, but not found. Pos=" + i +
|
||||
errln("Forward Iteration, break expected, but not found. Pos=" + i +
|
||||
" File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Run the iterator backwards, verify that the same breaks are found.
|
||||
//
|
||||
|
@ -450,14 +454,14 @@ void executeTest(TestParams t) {
|
|||
// and this one. (UVector returns zeros for index out of bounds.)
|
||||
for (i=prevBP-1; i>bp; i--) {
|
||||
if (t.expectedBreaks[i] != 0) {
|
||||
errln("Reverse Itertion, break expected, but not found. Pos=" + i +
|
||||
errln("Reverse Itertion, break expected, but not found. Pos=" + i +
|
||||
" File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the break we did find was expected
|
||||
if (t.expectedBreaks[bp] == 0) {
|
||||
errln("Reverse Itertion, break found, but not expected. Pos=" + bp +
|
||||
errln("Reverse Itertion, break found, but not expected. Pos=" + bp +
|
||||
" File line,col= " + t.srcLine[bp] + ", " + t.srcCol[bp]);
|
||||
} else {
|
||||
// The break was expected.
|
||||
|
@ -467,12 +471,12 @@ void executeTest(TestParams t) {
|
|||
expectedTagVal = 0;
|
||||
}
|
||||
int line = t.srcLine[bp];
|
||||
int rs = ((RuleBasedBreakIterator)t.bi).getRuleStatus();
|
||||
int rs = t.bi.getRuleStatus();
|
||||
if (rs != expectedTagVal) {
|
||||
errln("Incorrect status for reverse break. Pos= " + bp +
|
||||
"File line,col= " + line + ", " + t.srcCol[bp] + "\n" +
|
||||
" Actual, Expected status = " + rs + ", " + expectedTagVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prevBP = bp;
|
||||
|
@ -490,7 +494,7 @@ void executeTest(TestParams t) {
|
|||
boolean boundaryExpected = (t.expectedBreaks[i] != 0);
|
||||
boolean boundaryFound = t.bi.isBoundary(i);
|
||||
if (boundaryExpected != boundaryFound) {
|
||||
errln("isBoundary(" + i + ") incorrect.\n" +
|
||||
errln("isBoundary(" + i + ") incorrect.\n" +
|
||||
" File line,col= " + t.srcLine[i] + ", " + t.srcCol[i] +
|
||||
" Expected, Actual= " + boundaryExpected + ", " + boundaryFound);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue