mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-09 07:22:11 +00:00
ICU-22789 Rename and adjust boundary logic for boundariesBackFrom API for Segments interface
This commit is contained in:
parent
9fbcc8a055
commit
4163a6d898
5 changed files with 28 additions and 15 deletions
|
@ -120,8 +120,8 @@ public class LocalizedSegmenter implements Segmenter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundariesBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
|
||||
public IntStream boundariesBackFrom(int i) {
|
||||
return SegmentsImplUtils.boundariesBackFrom(this.breakIter, this.source, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -93,8 +93,8 @@ public class RuleBasedSegmenter implements Segmenter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundariesBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
|
||||
public IntStream boundariesBackFrom(int i) {
|
||||
return SegmentsImplUtils.boundariesBackFrom(this.breakIter, this.source, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ public interface Segments {
|
|||
|
||||
IntStream boundariesAfter(int i);
|
||||
|
||||
IntStream boundariesBeforeIndex(int i);
|
||||
IntStream boundariesBackFrom(int i);
|
||||
|
||||
//
|
||||
// Inner enums/classes in common for other inner classes
|
||||
|
|
|
@ -10,6 +10,8 @@ import java.util.stream.IntStream;
|
|||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
|
||||
// Global TODO: make initialization of breakIterator a prerequisite
|
||||
public class SegmentsImplUtils {
|
||||
|
||||
public static boolean isBoundary(BreakIterator breakIter, CharSequence source, int i) {
|
||||
|
@ -97,11 +99,20 @@ public class SegmentsImplUtils {
|
|||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
}
|
||||
|
||||
public static IntStream boundariesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
public static IntStream boundariesBackFrom(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
// TODO: make initialization of breakIterator a prerequisite
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
int sourceLength = sourceSequence.length();
|
||||
if (i < 0) {
|
||||
return IntStream.empty();
|
||||
}
|
||||
|
||||
boolean isOnBoundary = i <= sourceLength && isBoundary(breakIter, sourceSequence, i);
|
||||
int backFromIdx = isOnBoundary ? i + 1 : i;
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i);
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, backFromIdx);
|
||||
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
|
||||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
}
|
||||
|
|
|
@ -329,7 +329,7 @@ public class SegmentsTest extends CoreTestFmwk {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testBoundariesBeforeIndex() {
|
||||
public void testBoundariesBackFrom() {
|
||||
Segmenter enWordSegmenter =
|
||||
new LocalizedSegmenterBuilder()
|
||||
.setLocale(ULocale.ENGLISH)
|
||||
|
@ -343,10 +343,12 @@ public class SegmentsTest extends CoreTestFmwk {
|
|||
Segments segments = enWordSegmenter.segment(source);
|
||||
|
||||
Object[][] casesData = {
|
||||
{"first " + TAKE_LIMIT + " before beginning", -2, new int[0]},
|
||||
{"first " + TAKE_LIMIT + " at the beginning", 0, new int[0]},
|
||||
{"first " + TAKE_LIMIT + " in the middle of the 2nd to last", 42, new int[]{41, 40, 36, 35, 32}},
|
||||
{"first " + TAKE_LIMIT + " after the end", source.length()+1, new int[]{45, 44, 41, 40, 36}},
|
||||
{"first " + TAKE_LIMIT + " before beginning", -2, new int[0]},
|
||||
{"first " + TAKE_LIMIT + " at the beginning", 0, new int[]{0}},
|
||||
{"first " + TAKE_LIMIT + " from the start of the 2nd to last segment", 41, new int[]{41, 40, 36, 35, 32}},
|
||||
{"first " + TAKE_LIMIT + " in the middle of the 2nd to last segment", 42, new int[]{41, 40, 36, 35, 32}},
|
||||
{"first " + TAKE_LIMIT + " at the end", source.length(), new int[]{45, 44, 41, 40, 36}},
|
||||
{"first " + TAKE_LIMIT + " after the end", source.length()+1, new int[]{45, 44, 41, 40, 36}},
|
||||
};
|
||||
|
||||
for (Object[] caseDatum : casesData) {
|
||||
|
@ -354,11 +356,11 @@ public class SegmentsTest extends CoreTestFmwk {
|
|||
int startIdx = (int) caseDatum[1];
|
||||
int[] exp = (int[]) caseDatum[2];
|
||||
|
||||
int[] act = segments.boundariesBeforeIndex(startIdx).limit(TAKE_LIMIT).toArray();
|
||||
int[] act = segments.boundariesBackFrom(startIdx).limit(TAKE_LIMIT).toArray();
|
||||
|
||||
assertThat(act, is(exp));
|
||||
assertThat(desc, act, is(exp));
|
||||
|
||||
if (startIdx == -2) {
|
||||
if (startIdx < 0) {
|
||||
logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue