ICU-3295 rbbi rt port to Java, bugfixes ported over from C++

X-SVN-Rev: 15441
This commit is contained in:
Andy Heninger 2004-05-20 21:27:44 +00:00
parent 4b51dcf477
commit 8b6db59694
6 changed files with 90 additions and 55 deletions

View file

@ -800,9 +800,7 @@ public class BreakIteratorTest extends TestFmwk
public void TestBug4097920() {
Vector lineSelectionData = new Vector();
lineSelectionData.addElement("dog,");
lineSelectionData.addElement("cat,");
lineSelectionData.addElement("mouse ");
lineSelectionData.addElement("dog,cat,mouse ");
lineSelectionData.addElement("(one)");
lineSelectionData.addElement("(two)\n");
generalIteratorTest(lineBreak, lineSelectionData);
@ -964,8 +962,9 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
lineSelectionData.addElement("\u0e40\u0e03\u0e35\u0e22\u0e07");
lineSelectionData.addElement("\u0e43\u0e2b\u0e21\u0e48");
generalIteratorTest(BreakIterator.getLineInstance(new Locale("th", "", "")),
lineSelectionData);
Locale loc = new Locale("th", "", "");
BreakIterator bi = BreakIterator.getLineInstance(loc);
generalIteratorTest(bi, lineSelectionData);
}
/**

View file

@ -510,9 +510,7 @@ public class RBBITest extends TestFmwk
// linedata.addElement("foo\ufeffbar");
// to test for bug #4097920
linedata.addElement("dog,");
linedata.addElement("cat,");
linedata.addElement("mouse ");
linedata.addElement("dog,cat,mouse ");
linedata.addElement("(one)");
linedata.addElement("(two)\n");

View file

@ -379,8 +379,6 @@ void executeTest(TestParams t) {
// and this one.
for (i=prevBP+1; i<bp; i++) {
if (t.expectedBreaks[i] != 0) {
int expected[] = {0, i};
printStringBreaks(t.dataToBreak, expected, 2);
errln("Forward Iteration, break expected, but not found. Pos=" + i +
" File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
}
@ -388,8 +386,6 @@ void executeTest(TestParams t) {
// Check that the break we did find was expected
if (t.expectedBreaks[bp] == 0) {
int expected[] = {0, bp};
printStringBreaks(t.dataToBreak, expected, 2);
errln("Forward Iteration, break found, but not expected. Pos=" + bp +
" File line,col= " + t.srcLine[bp] + ", " + t.srcCol[bp]);
} else {
@ -472,35 +468,6 @@ void executeTest(TestParams t) {
}
void printStringBreaks(StringBuffer ustr, int expected[],
int expectedcount)
{
String name;
System.out.println("code alpha extend alphanum type line name");
int j;
for (j = 0; j < ustr.length(); j ++) {
if (expectedcount > 0) {
int k;
for (k = 0; k < expectedcount; k ++) {
if (j == expected[k]) {
System.out.println("------------------------------------------------ " + j);
}
}
}
int c = UTF16.charAt(ustr, j);
if (c > 0xffff) {
j ++;
}
name = UCharacter.getName(c);
System.out.println( UCharacter.isUAlphabetic(c) + " " +
UCharacter.hasBinaryProperty(c, UProperty.GRAPHEME_EXTEND) + " " +
UCharacter.isLetterOrDigit(c) + " " +
UCharacter.getPropertyValueName(UProperty.LINE_BREAK,
UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK),
UProperty.NameChoice.SHORT)
);
}
}
}

View file

@ -1,8 +1,10 @@
/*
* Created on Apr 23, 2004
*
*******************************************************************************
* Copyright (C) 2003-2004 International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.test.rbbi;
package com.ibm.icu.dev.test.rbbi;
// Monkey testing of RuleBasedBreakIterator
@ -22,6 +24,10 @@ import java.util.Locale;
* Monkey tests for RBBI. These tests have independent implementations of
* the Unicode TR boundary rules, and compare results between these and ICU's
* implementation, using random data.
*
* Tests cover Grapheme Cluster (char), Word and Line breaks
*
* Ported from ICU4C, original code in file source/test/intltest/rbbitst.cpp
*
*/
public class RBBITestMonkey extends TestFmwk {
@ -685,7 +691,7 @@ public class RBBITestMonkey extends TestFmwk {
if (!fNU.contains(prevChar) && fCL.contains(thisChar) ||
fEX.contains(thisChar) ||
!fNU.contains(prevChar) && fIS.contains(thisChar) ||
fSY.contains(thisChar)) {
!fNU.contains(prevChar) && fSY.contains(thisChar)) {
continue;
}
@ -793,7 +799,12 @@ public class RBBITestMonkey extends TestFmwk {
continue;
}
}
if (fPR.contains(prevChar) && fAL.contains(thisChar)) {
continue;
}
if (fPR.contains(prevChar) && fID.contains(thisChar)) {
continue;
}
// LB 18b
if (fHY.contains(prevChar) || fBB.contains(thisChar)) {
break;
@ -803,6 +814,11 @@ public class RBBITestMonkey extends TestFmwk {
if (fAL.contains(prevChar) && fAL.contains(thisChar)) {
continue;
}
// LB 19b
if (fIS.contains(prevChar) && fAL.contains(thisChar)) {
continue;
}
// LB 20 Break everywhere else
break;
@ -927,8 +943,8 @@ public class RBBITestMonkey extends TestFmwk {
// Match the following regular expression in the input text.
// (PR CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS) CM*) * (CL CM*)? (PO CM*)?
// 0 1 3 3 3 7 7 7 7 9 9 11 11 (match states)
// (PR CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)? (PO CM*)?
// 0 1 3 3 3 7 7 7 7 7 9 9 11 11 (match states)
// retVals array [0] index of the start of the match, or -1 if no match
// [1] index of first char following the match.
private int[] LBNumberCheck(StringBuffer s, int startIdx, int[] retVals) {
@ -992,8 +1008,8 @@ public class RBBITestMonkey extends TestFmwk {
break;
}
break matchLoop; /* No Match */
// (PR CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS) CM*) * (CL CM*)? (PO CM*)?
// 0 1 3 3 4 7 7 7 7 7 9 9 11 11 (match states)
// (PR CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)? (PO CM*)?
// 0 1 3 3 4 7 7 7 7 7 7 9 9 11 11 (match states)
case 7:
if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
@ -1008,6 +1024,10 @@ public class RBBITestMonkey extends TestFmwk {
matchState = 7;
break;
}
if (cLBType == UCharacter.LineBreak.BREAK_SYMBOLS) {
matchState = 7;
break;
}
if (cLBType == UCharacter.LineBreak.CLOSE_PUNCTUATION) {
matchState = 9;
break;
@ -1121,7 +1141,9 @@ public class RBBITestMonkey extends TestFmwk {
}
//
// The following UnicodeSets are used in matching a Grapheme Cluster
//
private static UnicodeSet GC_Control =
new UnicodeSet("[[:Zl:][:Zp:][:Cc:][:Cf:]-[\\u000d\\u000a]-[:Grapheme_Extend:]]");
@ -1308,6 +1330,8 @@ void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int
boolean[] forwardBreaks = new boolean[TESTSTRINGLEN*2 + 1];
boolean[] reverseBreaks = new boolean[TESTSTRINGLEN*2 + 1];
boolean[] isBoundaryBreaks = new boolean[TESTSTRINGLEN*2 + 1];
boolean[] followingBreaks = new boolean[TESTSTRINGLEN*2 + 1];
boolean[] precedingBreaks = new boolean[TESTSTRINGLEN*2 + 1];
int i;
int loopCount = 0;
boolean printTestData = false;
@ -1388,6 +1412,8 @@ void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int
Arrays.fill(forwardBreaks, false);
Arrays.fill(reverseBreaks, false);
Arrays.fill(isBoundaryBreaks, false);
Arrays.fill(followingBreaks, false);
Arrays.fill(precedingBreaks, false);
// Calculate the expected results for this test string.
mk.setText(testText);
@ -1446,6 +1472,47 @@ void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int
isBoundaryBreaks[i] = bi.isBoundary(i);
}
// Find the break positions using the following() function.
lastBreakPos = 0;
followingBreaks[0] = true;
for (i=0; i<testText.length(); i++) {
breakPos = bi.following(i);
if (breakPos <= i ||
breakPos < lastBreakPos ||
breakPos > testText.length() ||
breakPos > lastBreakPos && lastBreakPos > i ) {
errln(name + " break monkey test: " +
"Out of range value returned by BreakIterator::following().\n" +
"index=" + i + "following returned=" + breakPos +
"lastBreak=" + lastBreakPos);
precedingBreaks[i] = !expectedBreaks[i]; // Forces an error.
} else {
followingBreaks[breakPos] = true;
lastBreakPos = breakPos;
}
}
// Find the break positions using the preceding() function.
lastBreakPos = testText.length();
precedingBreaks[testText.length()] = true;
for (i=testText.length(); i>0; i--) {
breakPos = bi.preceding(i);
if (breakPos >= i ||
breakPos > lastBreakPos ||
breakPos < 0 ||
breakPos < lastBreakPos && lastBreakPos < i ) {
errln(name + " break monkey test: " +
"Out of range value returned by BreakIterator::preceding().\n" +
"index=" + i + "preceding returned=" + breakPos +
"lastBreak=" + lastBreakPos);
precedingBreaks[i] = !expectedBreaks[i]; // Forces an error.
} else {
precedingBreaks[breakPos] = true;
lastBreakPos = breakPos;
}
}
// Compare the expected and actual results.
for (i=0; i<=testText.length(); i++) {
@ -1456,6 +1523,10 @@ void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int
errorType = "previous()";
} else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
errorType = "isBoundary()";
} else if (followingBreaks[i] != expectedBreaks[i]) {
errorType = "following()";
} else if (precedingBreaks[i] != expectedBreaks[i]) {
errorType = "preceding()";
}

View file

@ -1,4 +1,4 @@
# Copyright (c) 2001-2003 International Business Machines
# Copyright (c) 2001-2004 International Business Machines
# Corporation and others. All Rights Reserved.
#
# RBBI Test Data
@ -445,7 +445,7 @@ What is the proper use of the abbreviation pp.•? •Yes, I am definatelly 12"
<data>•foo\u00a0bar•</data>
# to test for bug #4097920
<data>•dog,cat,mouse •(one)•(two)\n<100></data>
<data>•dog,cat,mouse •(one)•(two)\n<100></data>
# to test for bug #4035266
<data>•The •balance •is •$-23,456.78, •not •-•$32,456.78!\n<100></data>

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9bdc90082678b551c99d1eaf505af262fc39509442d2ab71874cdf5f70bb2c83
size 1495983
oid sha256:60dbf83606b385c96bff52ccfddb4dc4cbfc7ef062aa8ee9ef37ede6b4e7375c
size 1572633