ICU-22789 Rename and adjust boundary logic for boundariesBackFrom API for Segments interface

This commit is contained in:
Elango Cheran 2025-01-03 08:42:12 -08:00
parent 9fbcc8a055
commit 4163a6d898
5 changed files with 28 additions and 15 deletions

View file

@ -120,8 +120,8 @@ public class LocalizedSegmenter implements Segmenter {
}
@Override
public IntStream boundariesBeforeIndex(int i) {
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
public IntStream boundariesBackFrom(int i) {
return SegmentsImplUtils.boundariesBackFrom(this.breakIter, this.source, i);
}
}

View file

@ -93,8 +93,8 @@ public class RuleBasedSegmenter implements Segmenter {
}
@Override
public IntStream boundariesBeforeIndex(int i) {
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
public IntStream boundariesBackFrom(int i) {
return SegmentsImplUtils.boundariesBackFrom(this.breakIter, this.source, i);
}
}
}

View file

@ -33,7 +33,7 @@ public interface Segments {
IntStream boundariesAfter(int i);
IntStream boundariesBeforeIndex(int i);
IntStream boundariesBackFrom(int i);
//
// Inner enums/classes in common for other inner classes

View file

@ -10,6 +10,8 @@ import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
// Global TODO: make initialization of breakIterator a prerequisite
public class SegmentsImplUtils {
public static boolean isBoundary(BreakIterator breakIter, CharSequence source, int i) {
@ -97,11 +99,20 @@ public class SegmentsImplUtils {
return boundariesAsIntegers.mapToInt(Integer::intValue);
}
public static IntStream boundariesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
public static IntStream boundariesBackFrom(BreakIterator breakIter, CharSequence sourceSequence, int i) {
// TODO: make initialization of breakIterator a prerequisite
breakIter.setText(sourceSequence);
int sourceLength = sourceSequence.length();
if (i < 0) {
return IntStream.empty();
}
boolean isOnBoundary = i <= sourceLength && isBoundary(breakIter, sourceSequence, i);
int backFromIdx = isOnBoundary ? i + 1 : i;
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i);
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, backFromIdx);
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
return boundariesAsIntegers.mapToInt(Integer::intValue);
}

View file

@ -329,7 +329,7 @@ public class SegmentsTest extends CoreTestFmwk {
}
@Test
public void testBoundariesBeforeIndex() {
public void testBoundariesBackFrom() {
Segmenter enWordSegmenter =
new LocalizedSegmenterBuilder()
.setLocale(ULocale.ENGLISH)
@ -343,10 +343,12 @@ public class SegmentsTest extends CoreTestFmwk {
Segments segments = enWordSegmenter.segment(source);
Object[][] casesData = {
{"first " + TAKE_LIMIT + " before beginning", -2, new int[0]},
{"first " + TAKE_LIMIT + " at the beginning", 0, new int[0]},
{"first " + TAKE_LIMIT + " in the middle of the 2nd to last", 42, new int[]{41, 40, 36, 35, 32}},
{"first " + TAKE_LIMIT + " after the end", source.length()+1, new int[]{45, 44, 41, 40, 36}},
{"first " + TAKE_LIMIT + " before beginning", -2, new int[0]},
{"first " + TAKE_LIMIT + " at the beginning", 0, new int[]{0}},
{"first " + TAKE_LIMIT + " from the start of the 2nd to last segment", 41, new int[]{41, 40, 36, 35, 32}},
{"first " + TAKE_LIMIT + " in the middle of the 2nd to last segment", 42, new int[]{41, 40, 36, 35, 32}},
{"first " + TAKE_LIMIT + " at the end", source.length(), new int[]{45, 44, 41, 40, 36}},
{"first " + TAKE_LIMIT + " after the end", source.length()+1, new int[]{45, 44, 41, 40, 36}},
};
for (Object[] caseDatum : casesData) {
@ -354,11 +356,11 @@ public class SegmentsTest extends CoreTestFmwk {
int startIdx = (int) caseDatum[1];
int[] exp = (int[]) caseDatum[2];
int[] act = segments.boundariesBeforeIndex(startIdx).limit(TAKE_LIMIT).toArray();
int[] act = segments.boundariesBackFrom(startIdx).limit(TAKE_LIMIT).toArray();
assertThat(act, is(exp));
assertThat(desc, act, is(exp));
if (startIdx == -2) {
if (startIdx < 0) {
logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0");
}
}