ICU-22789 Add isBoundary API for Segments interface

This commit is contained in:
Elango Cheran 2025-01-02 10:00:08 -08:00
parent f12d724f50
commit 0e2b1dbb81
5 changed files with 57 additions and 1 deletions

View file

@ -79,6 +79,11 @@ public class LocalizedSegmenter implements Segmenter {
return SegmentsImplUtils.ranges(this.breakIter, this.source);
}
@Override
public boolean isBoundary(int i) {
return SegmentsImplUtils.isBoundary(this.breakIter, this.source, i);
}
@Override
public Stream<Segment> rangesAfterIndex(int i) {
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);

View file

@ -52,6 +52,11 @@ public class RuleBasedSegmenter implements Segmenter {
return SegmentsImplUtils.ranges(this.breakIter, this.source);
}
@Override
public boolean isBoundary(int i) {
return SegmentsImplUtils.isBoundary(this.breakIter, this.source, i);
}
@Override
public Stream<Segment> rangesAfterIndex(int i) {
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);

View file

@ -12,6 +12,14 @@ public interface Segments {
Stream<Segment> ranges();
/**
* Returns whether offset {@code i} is a segmentation boundary. Throws an exception when
* {@code i} is not a valid boundary position for the source sequence.
* @param i
* @return
*/
boolean isBoundary(int i);
Stream<Segment> rangesAfterIndex(int i);
Stream<Segment> rangesBeforeIndex(int i);

View file

@ -12,13 +12,19 @@ import java.util.stream.StreamSupport;
public class SegmentsImplUtils {
public static boolean isBoundary(BreakIterator breakIter, CharSequence source, int i) {
breakIter.setText(source);
return breakIter.isBoundary(i);
}
public static Stream<CharSequence> subSequences(BreakIterator breakIter, CharSequence sourceSequence) {
return ranges(breakIter, sourceSequence).map(rangeToSequenceFn(sourceSequence));
}
public static Stream<Segment> ranges(BreakIterator breakIter, CharSequence sourceSequence) {
return rangesAfterIndex(breakIter, sourceSequence, -1);
};
}
public static Stream<Segment> rangesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
breakIter.setText(sourceSequence);

View file

@ -90,6 +90,38 @@ public class SegmentsTest extends CoreTestFmwk {
assertThat(act2, is(exp2));
}
@Test
public void testIsBoundary() {
Segmenter enWordSegmenter =
new LocalizedSegmenterBuilder()
.setLocale(ULocale.ENGLISH)
.setSegmentationType(LocalizedSegmenter.SegmentationType.WORD)
.build();
String source1 = "The quick brown fox jumped over the lazy dog.";
// Create new Segments for source1
Segments segments1 = enWordSegmenter.segment(source1);
Object[][] casesData = {
{"start of segment", 4, true},
{"between start and limit of segment", 6, false},
{"limit of segment", 9, true},
{"beginning of string", 0, true},
{"end of string", source1.length(), true},
};
for (Object[] caseDatum : casesData) {
String desc = (String) caseDatum[0];
int idx = (int) caseDatum[1];
boolean exp = (boolean) caseDatum[2];
assertThat(desc, segments1.isBoundary(idx) == exp);
}
}
@Test
public void testRangesAfterIndex() {
Segmenter enWordSegmenter =