mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-12515 fix errors in J filtered brk, enable @ss=
* fix errors in the filtered break implementation, sync with C * sync rbbitst.txt with C * enable 'en@ss=standard' syntax to enable filtered break X-SVN-Rev: 39213
This commit is contained in:
parent
dea458fef7
commit
12b103f98a
4 changed files with 295 additions and 147 deletions
|
@ -11,6 +11,7 @@ package com.ibm.icu.impl;
|
|||
import java.text.CharacterIterator;
|
||||
import java.util.HashSet;
|
||||
|
||||
import com.ibm.icu.impl.ICUResourceBundle.OpenType;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.FilteredBreakIteratorBuilder;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
|
@ -19,7 +20,6 @@ import com.ibm.icu.util.CharsTrie;
|
|||
import com.ibm.icu.util.CharsTrieBuilder;
|
||||
import com.ibm.icu.util.StringTrieBuilder;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
|
||||
/**
|
||||
* @author tomzhang
|
||||
|
@ -46,84 +46,132 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
|
|||
this.backwardsTrie = backwardsTrie;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
int n = delegate.next();
|
||||
|
||||
/**
|
||||
* Reset the filter from the delegate.
|
||||
*/
|
||||
private final void resetState() {
|
||||
text = UCharacterIterator.getInstance((CharacterIterator) delegate.getText().clone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Is there an exception at this point?
|
||||
*
|
||||
* @param n
|
||||
* @return
|
||||
*/
|
||||
private final boolean breakExceptionAt(int n) {
|
||||
// Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt()
|
||||
|
||||
int bestPosn = -1;
|
||||
int bestValue = -1;
|
||||
|
||||
// loops while 'n' points to an exception
|
||||
text.setIndex(n);
|
||||
backwardsTrie.reset();
|
||||
int uch;
|
||||
|
||||
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
|
||||
if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here??
|
||||
// TODO only do this the 1st time?
|
||||
} else {
|
||||
uch = text.nextCodePoint();
|
||||
}
|
||||
|
||||
BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
|
||||
while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and..
|
||||
((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie
|
||||
if (r.hasValue()) { // remember the best match so far
|
||||
bestPosn = text.getIndex();
|
||||
bestValue = backwardsTrie.getValue();
|
||||
}
|
||||
}
|
||||
|
||||
if (r.matches()) { // exact match?
|
||||
bestValue = backwardsTrie.getValue();
|
||||
bestPosn = text.getIndex();
|
||||
}
|
||||
|
||||
if (bestPosn >= 0) {
|
||||
if (bestValue == Builder.MATCH) { // exact match!
|
||||
return true; // Exception here.
|
||||
} else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) {
|
||||
// make sure there's a forward trie
|
||||
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
|
||||
// to see if it matches something going forward.
|
||||
forwardsPartialTrie.reset();
|
||||
|
||||
BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
text.setIndex(bestPosn); // hope that's close ..
|
||||
while ((uch = text.nextCodePoint()) != BreakIterator.DONE
|
||||
&& ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) {
|
||||
}
|
||||
if (rfwd.matches()) {
|
||||
// Exception here
|
||||
return true;
|
||||
} // else fall through
|
||||
} // else fall through
|
||||
} // else fall through
|
||||
return false; // No exception here.
|
||||
}
|
||||
|
||||
/**
|
||||
* Given that the delegate has already given its "initial" answer,
|
||||
* find the NEXT actual (non-excepted) break.
|
||||
* @param n initial position from delegate
|
||||
* @return new break position or UBRK_DONE
|
||||
*/
|
||||
private final int internalNext(int n) {
|
||||
if (n == BreakIterator.DONE || // at end or
|
||||
backwardsTrie == null) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// UCharacterIterator text;
|
||||
text = UCharacterIterator.getInstance((CharacterIterator) delegate.getText().clone());
|
||||
do { // outer loop runs once per underlying break (from fDelegate).
|
||||
// loops while 'n' points to an exception.
|
||||
text.setIndex(n);
|
||||
backwardsTrie.reset();
|
||||
int uch;
|
||||
resetState();
|
||||
|
||||
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
|
||||
if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here??
|
||||
// TODO only do this the 1st time?
|
||||
final int textLen = text.getLength();
|
||||
|
||||
while (n != BreakIterator.DONE && n != textLen) {
|
||||
// outer loop runs once per underlying break (from fDelegate).
|
||||
// loops while 'n' points to an exception.
|
||||
|
||||
if (breakExceptionAt(n)) {
|
||||
// n points to a break exception
|
||||
n = delegate.next();
|
||||
} else {
|
||||
uch = text.nextCodePoint();
|
||||
// no exception at this spot
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return n; //hit underlying DONE or break at end of text
|
||||
}
|
||||
|
||||
BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
/**
|
||||
* Given that the delegate has already given its "initial" answer,
|
||||
* find the NEXT actual (non-excepted) break.
|
||||
* @param n initial position from delegate
|
||||
* @return new break position or UBRK_DONE
|
||||
*/
|
||||
private final int internalPrev(int n) {
|
||||
if (n == 0 || n == BreakIterator.DONE || // at end or
|
||||
backwardsTrie == null) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
resetState();
|
||||
|
||||
int bestPosn = -1;
|
||||
int bestValue = -1;
|
||||
while (n != BreakIterator.DONE && n != 0) {
|
||||
// outer loop runs once per underlying break (from fDelegate).
|
||||
// loops while 'n' points to an exception.
|
||||
|
||||
while ((uch = text.previousCodePoint()) != BreakIterator.DONE && // more to consume backwards and..
|
||||
((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie
|
||||
if (r.hasValue()) { // remember the best match so far
|
||||
bestPosn = text.getIndex();
|
||||
bestValue = backwardsTrie.getValue();
|
||||
}
|
||||
}
|
||||
|
||||
if (r.matches()) { // exact match?
|
||||
bestValue = backwardsTrie.getValue();
|
||||
bestPosn = text.getIndex();
|
||||
}
|
||||
|
||||
if (bestPosn >= 0) {
|
||||
if (bestValue == Builder.MATCH) { // exact match!
|
||||
n = delegate.next(); // skip this one. Find the next lowerlevel break.
|
||||
if (n == BreakIterator.DONE) {
|
||||
break;
|
||||
}
|
||||
continue; // See if the next is another exception.
|
||||
} else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) {
|
||||
// make sure there's a forward trie
|
||||
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
|
||||
// to see if it matches something going forward.
|
||||
forwardsPartialTrie.reset();
|
||||
|
||||
BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
text.setIndex(bestPosn); // hope that's close ..
|
||||
while ((uch = text.nextCodePoint()) != BreakIterator.DONE
|
||||
&& ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) {
|
||||
}
|
||||
if (rfwd.matches()) {
|
||||
// only full matches here, nothing to check
|
||||
// skip the next:
|
||||
n = delegate.next();
|
||||
if (n == BreakIterator.DONE) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
// no match (no exception) -return the 'underlying' break
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break; // internal error and/or no forwards trie
|
||||
}
|
||||
if (breakExceptionAt(n)) {
|
||||
// n points to a break exception
|
||||
n = delegate.previous();
|
||||
} else {
|
||||
break; // No match - so exit. Not an exception.
|
||||
// no exception at this spot
|
||||
return n;
|
||||
}
|
||||
} while (n != BreakIterator.DONE);
|
||||
return n;
|
||||
}
|
||||
return n; //hit underlying DONE or break at end of text
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -150,32 +198,20 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
|
|||
return other;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int first() {
|
||||
return delegate.first();
|
||||
return internalNext(delegate.first());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int last() {
|
||||
return delegate.last();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next(int n) {
|
||||
// TODO
|
||||
throw new UnsupportedOperationException("next(int) is not yet implemented");
|
||||
public int preceding(int offset) {
|
||||
return internalPrev(delegate.preceding(offset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int previous() {
|
||||
// TODO
|
||||
throw new UnsupportedOperationException("previous() is not yet implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int following(int offset) {
|
||||
// TODO
|
||||
throw new UnsupportedOperationException("following(int) is not yet implemented");
|
||||
return internalPrev(delegate.previous());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -184,9 +220,39 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int preceding(int offset) {
|
||||
// TODO
|
||||
throw new UnsupportedOperationException("preceding(int) is not yet implemented");
|
||||
public boolean isBoundary(int offset) {
|
||||
if(!delegate.isBoundary(offset)) {
|
||||
return false; // No underlying break to suppress?
|
||||
}
|
||||
|
||||
// delegate thinks there's a break…
|
||||
if(backwardsTrie == null) {
|
||||
return true; // no data
|
||||
}
|
||||
|
||||
resetState();
|
||||
return !breakExceptionAt(offset); // if there's an exception: no break.
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
return internalNext(delegate.next());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next(int n) {
|
||||
return internalNext(delegate.next(n));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int following(int offset) {
|
||||
return internalNext(delegate.following(offset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int last() {
|
||||
// Don't suppress a break opportunity at the end of text.
|
||||
return delegate.last();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -215,20 +281,18 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
|
|||
* @param loc the locale to get filtered iterators
|
||||
*/
|
||||
public Builder(ULocale loc) {
|
||||
ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BRKITR_BASE_NAME, loc);
|
||||
ICUResourceBundle exceptions = rb.findWithFallback("exceptions");
|
||||
if (exceptions != null) {
|
||||
ICUResourceBundle breaks = exceptions.findWithFallback("SentenceBreak");
|
||||
|
||||
if (breaks != null) {
|
||||
for (int index = 0, size = breaks.getSize(); index < size; ++index) {
|
||||
ICUResourceBundle b = (ICUResourceBundle) breaks.get(index);
|
||||
String br = b.getString();
|
||||
filterSet.add(br);
|
||||
}
|
||||
ICUResourceBundle rb = ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BRKITR_BASE_NAME, loc, OpenType.LOCALE_ROOT);
|
||||
|
||||
ICUResourceBundle breaks = rb.findWithFallback("exceptions/SentenceBreak");
|
||||
|
||||
if (breaks != null) {
|
||||
for (int index = 0, size = breaks.getSize(); index < size; ++index) {
|
||||
ICUResourceBundle b = (ICUResourceBundle) breaks.get(index);
|
||||
String br = b.getString();
|
||||
filterSet.add(br);
|
||||
}
|
||||
} // else - no exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -261,7 +325,7 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
|
|||
// Short circuit - nothing to except.
|
||||
return adoptBreakIterator;
|
||||
}
|
||||
|
||||
|
||||
CharsTrieBuilder builder = new CharsTrieBuilder();
|
||||
CharsTrieBuilder builder2 = new CharsTrieBuilder();
|
||||
|
||||
|
|
|
@ -136,6 +136,7 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
|||
typeKeyExt = "_" + lbKeyValue;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
String typeKey = (typeKeyExt == null)? KIND_NAMES[kind]: KIND_NAMES[kind] + typeKeyExt;
|
||||
String brkfname = rb.getStringWithFallback("boundaries/" + typeKey);
|
||||
|
@ -162,6 +163,15 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
|||
iter.setLocale(uloc, uloc);
|
||||
iter.setBreakType(kind);
|
||||
|
||||
// filtered break
|
||||
if (kind == BreakIterator.KIND_SENTENCE) {
|
||||
final String ssKeyword = locale.getKeywordValue("ss");
|
||||
if (ssKeyword != null && ssKeyword.equals("standard")) {
|
||||
final ULocale base = new ULocale(locale.getBaseName());
|
||||
return FilteredBreakIteratorBuilder.createInstance(base).build(iter);
|
||||
}
|
||||
}
|
||||
|
||||
return iter;
|
||||
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Before
|
||||
public void init(){
|
||||
characterBreak = BreakIterator.getCharacterInstance();
|
||||
|
@ -301,8 +301,8 @@ public class BreakIteratorTest extends TestFmwk
|
|||
for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
|
||||
if (k == 2) {
|
||||
errln("Break between CR and LF in string U+" + Integer.toHexString(
|
||||
(int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
|
||||
(int)(work.charAt(3))));
|
||||
(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
|
||||
(work.charAt(3))));
|
||||
errorCount++;
|
||||
if (errorCount >= 75)
|
||||
return;
|
||||
|
@ -328,8 +328,8 @@ public class BreakIteratorTest extends TestFmwk
|
|||
tb.setText(work.toString());
|
||||
for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
|
||||
if (k == 2) {
|
||||
errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
|
||||
+ " and U+" + Integer.toHexString((int)(work.charAt(2))));
|
||||
errln("Break between U+" + Integer.toHexString((work.charAt(1)))
|
||||
+ " and U+" + Integer.toHexString((work.charAt(2))));
|
||||
errorCount++;
|
||||
if (errorCount >= 75)
|
||||
return;
|
||||
|
@ -348,7 +348,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
out.append(c);
|
||||
else {
|
||||
out.append("\\u");
|
||||
temp = Integer.toHexString((int)c);
|
||||
temp = Integer.toHexString(c);
|
||||
out.append(zeros.substring(0, 4 - temp.length()));
|
||||
out.append(temp);
|
||||
}
|
||||
|
@ -568,7 +568,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
generalIteratorTest(lineBreak, lineSelectionData);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @bug 4117554
|
||||
|
@ -709,7 +709,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
int begin = 3;
|
||||
int end = str.length() - 3;
|
||||
// not used boolean gotException = false;
|
||||
|
||||
|
||||
|
||||
iter.setText(new StringCharacterIterator(str, begin, end, begin));
|
||||
for (int index = -1; index < begin + 1; ++index) {
|
||||
|
@ -772,16 +772,16 @@ public class BreakIteratorTest extends TestFmwk
|
|||
if (locList.length == 0)
|
||||
errln("getAvailableLocales() returned an empty list!");
|
||||
// I have no idea how to test this function...
|
||||
|
||||
|
||||
com.ibm.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
|
||||
if (ulocList.length == 0) {
|
||||
errln("getAvailableULocales() returned an empty list!");
|
||||
errln("getAvailableULocales() returned an empty list!");
|
||||
} else {
|
||||
logln("getAvailableULocales() returned " + ulocList.length + " locales");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @bug 4068137
|
||||
*/
|
||||
|
@ -838,7 +838,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Bug 4450804
|
||||
*/
|
||||
|
@ -893,7 +893,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
assertEquals("Next point", 5, brk.next());
|
||||
assertEquals("Last point", BreakIterator.DONE, brk.next());
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Test case for Ticket#10721. BreakIterator factory method should throw NPE
|
||||
* when specified locale is null.
|
||||
|
@ -956,7 +956,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
errln("getWordInstance((ULocale)null) did not throw NPE.");
|
||||
} catch (NullPointerException e) { /* OK */ }
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test FilteredBreakIteratorBuilder newly introduced
|
||||
*/
|
||||
|
@ -980,14 +980,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
logln("Building new BI\n");
|
||||
filteredBI = builder.build(baseBI);
|
||||
|
||||
logln("Testing:");
|
||||
filteredBI.setText(text);
|
||||
assertEquals("1st next", 20, filteredBI.next());
|
||||
assertEquals("1st next", 84, filteredBI.next());
|
||||
assertEquals("1st next", 90, filteredBI.next());
|
||||
assertEquals("1st next", 181, filteredBI.next());
|
||||
assertEquals("1st next", 278, filteredBI.next());
|
||||
filteredBI.first();
|
||||
assertDefaultBreakBehavior(filteredBI, text);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1015,7 +1008,7 @@ public class BreakIteratorTest extends TestFmwk
|
|||
assertEquals("2nd next", 278, filteredBI.next());
|
||||
filteredBI.first();
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
logln("Constructing empty builder\n");
|
||||
|
@ -1072,15 +1065,39 @@ public class BreakIteratorTest extends TestFmwk
|
|||
filteredBI = builder.build(baseBI);
|
||||
|
||||
if(filteredBI != null) {
|
||||
logln("Testing:");
|
||||
filteredBI.setText(text);
|
||||
|
||||
assertEquals("5th next", 84, filteredBI.next());
|
||||
assertEquals("5th next", 278, filteredBI.next());
|
||||
filteredBI.first();
|
||||
assertEnglishBreakBehavior(filteredBI, text);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
logln("Constructing English @ss=standard\n");
|
||||
filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("en-US-u-ss-standard"));
|
||||
|
||||
if(filteredBI != null) {
|
||||
assertEnglishBreakBehavior(filteredBI, text);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
logln("Constructing Afrikaans @ss=standard - should be == default\n");
|
||||
filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("af-u-ss-standard"));
|
||||
|
||||
assertDefaultBreakBehavior(filteredBI, text);
|
||||
}
|
||||
|
||||
{
|
||||
logln("Constructing Japanese @ss=standard - should be == default\n");
|
||||
filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("ja-u-ss-standard"));
|
||||
|
||||
assertDefaultBreakBehavior(filteredBI, text);
|
||||
}
|
||||
{
|
||||
logln("Constructing tfg @ss=standard - should be == default\n");
|
||||
filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("tfg-u-ss-standard"));
|
||||
|
||||
assertDefaultBreakBehavior(filteredBI, text);
|
||||
}
|
||||
|
||||
{
|
||||
logln("Constructing French builder");
|
||||
builder = FilteredBreakIteratorBuilder.createInstance(ULocale.FRENCH);
|
||||
|
@ -1092,12 +1109,48 @@ public class BreakIteratorTest extends TestFmwk
|
|||
filteredBI = builder.build(baseBI);
|
||||
|
||||
if(filteredBI != null) {
|
||||
logln("Testing:");
|
||||
filteredBI.setText(text);
|
||||
assertEquals("6th next", 20, filteredBI.next());
|
||||
assertEquals("6th next", 84, filteredBI.next());
|
||||
filteredBI.first();
|
||||
assertFrenchBreakBehavior(filteredBI, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param filteredBI
|
||||
* @param text
|
||||
*/
|
||||
private void assertFrenchBreakBehavior(BreakIterator filteredBI, String text) {
|
||||
logln("Testing French behavior:");
|
||||
filteredBI.setText(text);
|
||||
assertEquals("6th next", 20, filteredBI.next());
|
||||
assertEquals("6th next", 84, filteredBI.next());
|
||||
filteredBI.first();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param filteredBI
|
||||
* @param text
|
||||
*/
|
||||
private void assertEnglishBreakBehavior(BreakIterator filteredBI, String text) {
|
||||
logln("Testing English filtered behavior:");
|
||||
filteredBI.setText(text);
|
||||
|
||||
assertEquals("5th next", 84, filteredBI.next());
|
||||
assertEquals("5th next", 278, filteredBI.next());
|
||||
filteredBI.first();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param filteredBI
|
||||
* @param text
|
||||
*/
|
||||
private void assertDefaultBreakBehavior(BreakIterator filteredBI, String text) {
|
||||
logln("Testing Default Behavior:");
|
||||
filteredBI.setText(text);
|
||||
assertEquals("1st next", 20, filteredBI.next());
|
||||
assertEquals("1st next", 84, filteredBI.next());
|
||||
assertEquals("1st next", 90, filteredBI.next());
|
||||
assertEquals("1st next", 181, filteredBI.next());
|
||||
assertEquals("1st next", 278, filteredBI.next());
|
||||
filteredBI.first();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# Copyright (c) 2001-2016 International Business Machines
|
||||
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
# Copyright (c) 2001-2016 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# RBBI Test Data
|
||||
|
@ -31,6 +33,7 @@
|
|||
# are merged back into ICU4C's copy of the file, lest they get overwritten later.
|
||||
# TODO: figure out how to have a single copy of the file for use by both C and Java.
|
||||
|
||||
|
||||
# Temp debugging tests
|
||||
<locale en>
|
||||
<word>
|
||||
|
@ -40,18 +43,37 @@
|
|||
## FILTERED BREAK TESTS
|
||||
|
||||
# (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
|
||||
#<locale en>
|
||||
#<sent>
|
||||
#<data>\
|
||||
#•In the meantime Mr. •Weston arrived with his small ship, which he had now recovered. •Capt. •Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. •Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
#
|
||||
#<locale en@ss=standard>
|
||||
#<sent>
|
||||
#<data>\
|
||||
#•In the meantime Mr. Weston arrived with his small ship, which he had now recovered. •Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
#
|
||||
<locale en>
|
||||
<sent>
|
||||
<data>\
|
||||
•In the meantime Mr. •Weston arrived with his small ship, which he had now recovered. •Capt. •Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. •Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
|
||||
<locale en@ss=standard>
|
||||
<sent>
|
||||
<data>\
|
||||
•In the meantime Mr. Weston arrived with his small ship, which he had now recovered. •Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
|
||||
# This hits the case where "D." would match the end of "Ph.D.".
|
||||
<locale en@ss=standard>
|
||||
<sent>
|
||||
<data>\
|
||||
•Doctor with a D. •As in, Ph.D., you know.•</data>
|
||||
|
||||
# same as root (unless some exceptions are added!)
|
||||
<locale tfg@ss=standard>
|
||||
<sent>
|
||||
<data>\
|
||||
•In the meantime Mr. •Weston arrived with his small ship, which he had now recovered. •Capt. •Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. •Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
|
||||
# same as root (unless some exceptions are added!)
|
||||
<locale ja@ss=standard>
|
||||
<sent>
|
||||
<data>\
|
||||
•In the meantime Mr. •Weston arrived with his small ship, which he had now recovered. •Capt. •Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. •Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
|
||||
## END FILTERED BREAK TESTS
|
||||
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
#
|
||||
|
@ -283,7 +305,6 @@
|
|||
<data>•栃木<400>県<400>足利<400>市<400>で<400>の<400>撮影<400>が<400>公開<400></data>
|
||||
<data>•栃木<400>県<400>足利<400>市<400>で<400>の<400>撮影<400>が<400>公開<400>さ<400>れ<400>た<400></data>
|
||||
|
||||
|
||||
# Ticket #11999
|
||||
# Unhandled Break Engine was consuming all characters, not just unhandled.
|
||||
# \U00011700 is AHOM LETTER KA. There is no dictionary for AHOM, triggering the unhandled engine,
|
||||
|
|
Loading…
Add table
Reference in a new issue