mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-22789 Add segmentAt API for Segments interface
This commit is contained in:
parent
d9017e0408
commit
47ffdd8fa9
5 changed files with 84 additions and 1 deletions
icu4j/main/core/src
main/java/com/ibm/icu/text/segmenter
test/java/com/ibm/icu/dev/test/text/segmenter
|
@ -74,6 +74,11 @@ public class LocalizedSegmenter implements Segmenter {
|
|||
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Segment segmentAt(int i) {
|
||||
return SegmentsImplUtils.segmentAt(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> ranges() {
|
||||
return SegmentsImplUtils.ranges(this.breakIter, this.source);
|
||||
|
|
|
@ -47,6 +47,11 @@ public class RuleBasedSegmenter implements Segmenter {
|
|||
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Segment segmentAt(int i) {
|
||||
return SegmentsImplUtils.segmentAt(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> ranges() {
|
||||
return SegmentsImplUtils.ranges(this.breakIter, this.source);
|
||||
|
|
|
@ -9,6 +9,8 @@ import java.util.stream.Stream;
|
|||
public interface Segments {
|
||||
Stream<CharSequence> subSequences();
|
||||
|
||||
Segment segmentAt(int i);
|
||||
|
||||
Stream<Segment> ranges();
|
||||
|
||||
Stream<Segment> rangesAfterIndex(int i);
|
||||
|
|
|
@ -24,6 +24,32 @@ public class SegmentsImplUtils {
|
|||
return ranges(breakIter, sourceSequence).map(rangeToSequenceFn(sourceSequence));
|
||||
}
|
||||
|
||||
public static Segment segmentAt(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
// TODO: make initialization of breakIterator a prerequisite
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
int start;
|
||||
int limit;
|
||||
|
||||
boolean isBoundary = breakIter.isBoundary(i);
|
||||
|
||||
if (isBoundary) {
|
||||
start = i;
|
||||
limit = breakIter.next();
|
||||
} else {
|
||||
// BreakIterator::isBoundary(i) will advance forwards to the next boundary if the argument
|
||||
// is not a boundary.
|
||||
limit = breakIter.current();
|
||||
start = breakIter.previous();
|
||||
}
|
||||
|
||||
if (start != BreakIterator.DONE && limit != BreakIterator.DONE) {
|
||||
return new Segment(start, limit, sourceSequence);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static Stream<Segment> ranges(BreakIterator breakIter, CharSequence sourceSequence) {
|
||||
return rangesAfterIndex(breakIter, sourceSequence, -1);
|
||||
}
|
||||
|
|
|
@ -261,7 +261,7 @@ public class SegmentsTest extends CoreTestFmwk {
|
|||
|
||||
Segment segment = segments.rangeBeforeIndex(startIdx);
|
||||
|
||||
if (startIdx == -2) {
|
||||
if (startIdx < 0 ) {
|
||||
logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0");
|
||||
}
|
||||
|
||||
|
@ -366,4 +366,49 @@ public class SegmentsTest extends CoreTestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSegmentAt() {
|
||||
Segmenter enWordSegmenter =
|
||||
new LocalizedSegmenterBuilder()
|
||||
.setLocale(ULocale.ENGLISH)
|
||||
.setSegmentationType(SegmentationType.WORD)
|
||||
.build();
|
||||
|
||||
String source = "The quick brown fox jumped over the lazy dog.";
|
||||
|
||||
// Create new Segments for source
|
||||
Segments segments1 = enWordSegmenter.segment(source);
|
||||
|
||||
Object[][] casesData = {
|
||||
{"index before beginning", -2, null, null},
|
||||
{"index at beginning", 0, 0, 3},
|
||||
{"index in the middle of the first segment", 2, 0, 3},
|
||||
{"index in the middle of the third segment", 5, 4, 9},
|
||||
{"index at the end", source.length()-1, 44, 45},
|
||||
{"index after the end", source.length()+1, null, null},
|
||||
};
|
||||
|
||||
for (Object[] caseDatum : casesData) {
|
||||
String desc = (String) caseDatum[0];
|
||||
int startIdx = (int) caseDatum[1];
|
||||
Integer expStart = (Integer) caseDatum[2];
|
||||
Integer expLimit = (Integer) caseDatum[3];
|
||||
|
||||
if (startIdx < 0 ) {
|
||||
logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0");
|
||||
}
|
||||
|
||||
if (expStart == null) {
|
||||
assertThat("Out of bounds range should be null", expLimit == null);
|
||||
} else {
|
||||
Segment segment = segments1.segmentAt(startIdx);
|
||||
|
||||
assertEquals(desc + ", start", (long) expStart.intValue(), (long) segment.start);
|
||||
assertEquals(desc + ", limit", (long) expLimit.intValue(), (long) segment.limit);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue