mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-22789 Add isBoundary API for Segments interface
This commit is contained in:
parent
f12d724f50
commit
0e2b1dbb81
5 changed files with 57 additions and 1 deletions
|
@ -79,6 +79,11 @@ public class LocalizedSegmenter implements Segmenter {
|
|||
return SegmentsImplUtils.ranges(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBoundary(int i) {
|
||||
return SegmentsImplUtils.isBoundary(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> rangesAfterIndex(int i) {
|
||||
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);
|
||||
|
|
|
@ -52,6 +52,11 @@ public class RuleBasedSegmenter implements Segmenter {
|
|||
return SegmentsImplUtils.ranges(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBoundary(int i) {
|
||||
return SegmentsImplUtils.isBoundary(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> rangesAfterIndex(int i) {
|
||||
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);
|
||||
|
|
|
@ -12,6 +12,14 @@ public interface Segments {
|
|||
|
||||
Stream<Segment> ranges();
|
||||
|
||||
/**
|
||||
* Returns whether offset {@code i} is a segmentation boundary. Throws an exception when
|
||||
* {@code i} is not a valid boundary position for the source sequence.
|
||||
* @param i
|
||||
* @return
|
||||
*/
|
||||
boolean isBoundary(int i);
|
||||
|
||||
Stream<Segment> rangesAfterIndex(int i);
|
||||
|
||||
Stream<Segment> rangesBeforeIndex(int i);
|
||||
|
|
|
@ -12,13 +12,19 @@ import java.util.stream.StreamSupport;
|
|||
|
||||
public class SegmentsImplUtils {
|
||||
|
||||
public static boolean isBoundary(BreakIterator breakIter, CharSequence source, int i) {
|
||||
breakIter.setText(source);
|
||||
|
||||
return breakIter.isBoundary(i);
|
||||
}
|
||||
|
||||
public static Stream<CharSequence> subSequences(BreakIterator breakIter, CharSequence sourceSequence) {
|
||||
return ranges(breakIter, sourceSequence).map(rangeToSequenceFn(sourceSequence));
|
||||
}
|
||||
|
||||
public static Stream<Segment> ranges(BreakIterator breakIter, CharSequence sourceSequence) {
|
||||
return rangesAfterIndex(breakIter, sourceSequence, -1);
|
||||
};
|
||||
}
|
||||
|
||||
public static Stream<Segment> rangesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
|
|
@ -90,6 +90,38 @@ public class SegmentsTest extends CoreTestFmwk {
|
|||
assertThat(act2, is(exp2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsBoundary() {
|
||||
Segmenter enWordSegmenter =
|
||||
new LocalizedSegmenterBuilder()
|
||||
.setLocale(ULocale.ENGLISH)
|
||||
.setSegmentationType(LocalizedSegmenter.SegmentationType.WORD)
|
||||
.build();
|
||||
|
||||
String source1 = "The quick brown fox jumped over the lazy dog.";
|
||||
|
||||
// Create new Segments for source1
|
||||
Segments segments1 = enWordSegmenter.segment(source1);
|
||||
|
||||
Object[][] casesData = {
|
||||
{"start of segment", 4, true},
|
||||
{"between start and limit of segment", 6, false},
|
||||
{"limit of segment", 9, true},
|
||||
{"beginning of string", 0, true},
|
||||
{"end of string", source1.length(), true},
|
||||
};
|
||||
|
||||
for (Object[] caseDatum : casesData) {
|
||||
String desc = (String) caseDatum[0];
|
||||
int idx = (int) caseDatum[1];
|
||||
boolean exp = (boolean) caseDatum[2];
|
||||
|
||||
assertThat(desc, segments1.isBoundary(idx) == exp);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangesAfterIndex() {
|
||||
Segmenter enWordSegmenter =
|
||||
|
|
Loading…
Add table
Reference in a new issue