ICU-22789 Add segmentAt API for Segments interface

This commit is contained in:
Elango Cheran 2025-01-03 12:20:54 -08:00
parent d9017e0408
commit 47ffdd8fa9
5 changed files with 84 additions and 1 deletions
icu4j/main/core/src
main/java/com/ibm/icu/text/segmenter
test/java/com/ibm/icu/dev/test/text/segmenter

View file

@ -74,6 +74,11 @@ public class LocalizedSegmenter implements Segmenter {
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
}
@Override
public Segment segmentAt(int i) {
return SegmentsImplUtils.segmentAt(this.breakIter, this.source, i);
}
@Override
public Stream<Segment> ranges() {
return SegmentsImplUtils.ranges(this.breakIter, this.source);

View file

@ -47,6 +47,11 @@ public class RuleBasedSegmenter implements Segmenter {
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
}
@Override
public Segment segmentAt(int i) {
return SegmentsImplUtils.segmentAt(this.breakIter, this.source, i);
}
@Override
public Stream<Segment> ranges() {
return SegmentsImplUtils.ranges(this.breakIter, this.source);

View file

@ -9,6 +9,8 @@ import java.util.stream.Stream;
public interface Segments {
Stream<CharSequence> subSequences();
Segment segmentAt(int i);
Stream<Segment> ranges();
Stream<Segment> rangesAfterIndex(int i);

View file

@ -24,6 +24,32 @@ public class SegmentsImplUtils {
return ranges(breakIter, sourceSequence).map(rangeToSequenceFn(sourceSequence));
}
public static Segment segmentAt(BreakIterator breakIter, CharSequence sourceSequence, int i) {
// TODO: make initialization of breakIterator a prerequisite
breakIter.setText(sourceSequence);
int start;
int limit;
boolean isBoundary = breakIter.isBoundary(i);
if (isBoundary) {
start = i;
limit = breakIter.next();
} else {
// BreakIterator::isBoundary(i) will advance forwards to the next boundary if the argument
// is not a boundary.
limit = breakIter.current();
start = breakIter.previous();
}
if (start != BreakIterator.DONE && limit != BreakIterator.DONE) {
return new Segment(start, limit, sourceSequence);
} else {
return null;
}
}
public static Stream<Segment> ranges(BreakIterator breakIter, CharSequence sourceSequence) {
return rangesAfterIndex(breakIter, sourceSequence, -1);
}

View file

@ -261,7 +261,7 @@ public class SegmentsTest extends CoreTestFmwk {
Segment segment = segments.rangeBeforeIndex(startIdx);
if (startIdx == -2) {
if (startIdx < 0 ) {
logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0");
}
@ -366,4 +366,49 @@ public class SegmentsTest extends CoreTestFmwk {
}
}
@Test
public void testSegmentAt() {
Segmenter enWordSegmenter =
new LocalizedSegmenterBuilder()
.setLocale(ULocale.ENGLISH)
.setSegmentationType(SegmentationType.WORD)
.build();
String source = "The quick brown fox jumped over the lazy dog.";
// Create new Segments for source
Segments segments1 = enWordSegmenter.segment(source);
Object[][] casesData = {
{"index before beginning", -2, null, null},
{"index at beginning", 0, 0, 3},
{"index in the middle of the first segment", 2, 0, 3},
{"index in the middle of the third segment", 5, 4, 9},
{"index at the end", source.length()-1, 44, 45},
{"index after the end", source.length()+1, null, null},
};
for (Object[] caseDatum : casesData) {
String desc = (String) caseDatum[0];
int startIdx = (int) caseDatum[1];
Integer expStart = (Integer) caseDatum[2];
Integer expLimit = (Integer) caseDatum[3];
if (startIdx < 0 ) {
logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0");
}
if (expStart == null) {
assertThat("Out of bounds range should be null", expLimit == null);
} else {
Segment segment = segments1.segmentAt(startIdx);
assertEquals(desc + ", start", (long) expStart.intValue(), (long) segment.start);
assertEquals(desc + ", limit", (long) expLimit.intValue(), (long) segment.limit);
}
}
}
}