ICU-22789 Refactor default impls of Segments interface into reusable static util fns for concrete classes

This commit is contained in:
Elango Cheran 2024-12-31 15:29:50 -08:00
parent 94ec357df5
commit 5b6eaddced
4 changed files with 209 additions and 100 deletions
icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter

View file

@ -2,6 +2,9 @@ package com.ibm.icu.text.segmenter;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.util.ULocale;
import java.util.function.Function;
import java.util.stream.IntStream;
import java.util.stream.Stream;
public class LocalizedSegmenter implements Segmenter {
@ -76,7 +79,7 @@ public class LocalizedSegmenter implements Segmenter {
}
public static class LocalizedSegments implements Segments {
public class LocalizedSegments implements Segments {
private CharSequence source;
@ -92,17 +95,57 @@ public class LocalizedSegmenter implements Segmenter {
@Override
public CharSequence getSourceSequence() {
return source;
return this.source;
}
@Override
public Segmenter getSegmenter() {
return segmenter;
public Stream<CharSequence> subSequences() {
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
}
@Override
public BreakIterator getInstanceBreakIterator() {
return this.breakIter;
public Stream<Segment> ranges() {
return SegmentsImplUtils.ranges(this.breakIter, this.source);
}
@Override
public Stream<Segment> rangesAfterIndex(int i) {
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);
}
@Override
public Stream<Segment> rangesBeforeIndex(int i) {
return SegmentsImplUtils.rangesBeforeIndex(this.breakIter, this.source, i);
}
@Override
public Segment rangeAfterIndex(int i) {
return SegmentsImplUtils.rangeAfterIndex(this.breakIter, this.source, i);
}
@Override
public Segment rangeBeforeIndex(int i) {
return SegmentsImplUtils.rangeBeforeIndex(this.breakIter, this.source, i);
}
@Override
public Function<Segment, CharSequence> rangeToSequenceFn() {
return SegmentsImplUtils.rangeToSequenceFn(this.source);
}
@Override
public IntStream boundaries() {
return SegmentsImplUtils.boundaries(this.breakIter, this.source);
}
@Override
public IntStream boundariesAfterIndex(int i) {
return SegmentsImplUtils.boundariesAfterIndex(this.breakIter, this.source, i);
}
@Override
public IntStream boundariesBeforeIndex(int i) {
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
}
}

View file

@ -2,6 +2,9 @@ package com.ibm.icu.text.segmenter;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import java.util.function.Function;
import java.util.stream.IntStream;
import java.util.stream.Stream;
public class RuleBasedSegmenter implements Segmenter {
@ -65,13 +68,53 @@ public class RuleBasedSegmenter implements Segmenter {
}
@Override
public Segmenter getSegmenter() {
return segmenter;
public Stream<CharSequence> subSequences() {
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
}
@Override
public BreakIterator getInstanceBreakIterator() {
return this.breakIter;
public Stream<Segment> ranges() {
return SegmentsImplUtils.ranges(this.breakIter, this.source);
}
@Override
public Stream<Segment> rangesAfterIndex(int i) {
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);
}
@Override
public Stream<Segment> rangesBeforeIndex(int i) {
return SegmentsImplUtils.rangesBeforeIndex(this.breakIter, this.source, i);
}
@Override
public Segment rangeAfterIndex(int i) {
return SegmentsImplUtils.rangeAfterIndex(this.breakIter, this.source, i);
}
@Override
public Segment rangeBeforeIndex(int i) {
return SegmentsImplUtils.rangeBeforeIndex(this.breakIter, this.source, i);
}
@Override
public Function<Segment, CharSequence> rangeToSequenceFn() {
return SegmentsImplUtils.rangeToSequenceFn(this.source);
}
@Override
public IntStream boundaries() {
return SegmentsImplUtils.boundaries(this.breakIter, this.source);
}
@Override
public IntStream boundariesAfterIndex(int i) {
return SegmentsImplUtils.boundariesAfterIndex(this.breakIter, this.source, i);
}
@Override
public IntStream boundariesBeforeIndex(int i) {
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
}
}
}

View file

@ -11,105 +11,25 @@ public interface Segments {
CharSequence getSourceSequence();
@Deprecated
Segmenter getSegmenter();
Stream<CharSequence> subSequences();
@Deprecated
BreakIterator getInstanceBreakIterator();
Stream<Segment> ranges();
default Stream<CharSequence> subSequences() {
return ranges().map(rangeToSequenceFn());
}
Stream<Segment> rangesAfterIndex(int i);
default Stream<Segment> ranges() {
return rangesAfterIndex(-1);
};
Stream<Segment> rangesBeforeIndex(int i);
default Stream<Segment> rangesAfterIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
Segment rangeAfterIndex(int i);
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.FORWARDS, i);
return StreamSupport.stream(iterable.spliterator(), false);
}
Segment rangeBeforeIndex(int i);
default Stream<Segment> rangesBeforeIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
Function<Segment, CharSequence> rangeToSequenceFn();
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.BACKWARDS, i);
return StreamSupport.stream(iterable.spliterator(), false);
}
IntStream boundaries();
default Segment rangeAfterIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
IntStream boundariesAfterIndex(int i);
int start = breakIter.following(i);
if (start == BreakIterator.DONE) {
return null;
}
int limit = breakIter.next();
if (limit == BreakIterator.DONE) {
return null;
}
return new Segment(start, limit);
}
default Segment rangeBeforeIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
// TODO(ICU-22987): Remove after fixing preceding(int) to return `DONE` for negative inputs
if (i < 0) {
// return the same thing as we would if preceding() returned DONE
return null;
}
int start = breakIter.preceding(i);
int limit = breakIter.previous();
if (start == BreakIterator.DONE || limit == BreakIterator.DONE) {
return null;
}
assert limit <= start;
return new Segment(limit, start);
}
default Function<Segment, CharSequence> rangeToSequenceFn() {
return segment -> getSourceSequence().subSequence(segment.start, segment.limit);
}
default IntStream boundaries() {
return boundariesAfterIndex(-1);
}
default IntStream boundariesAfterIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i);
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
return boundariesAsIntegers.mapToInt(Integer::intValue);
}
default IntStream boundariesBeforeIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i);
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
return boundariesAsIntegers.mapToInt(Integer::intValue);
}
IntStream boundariesBeforeIndex(int i);
//
// Inner enums/classes in common for other inner classes

View file

@ -0,0 +1,103 @@
package com.ibm.icu.text.segmenter;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.segmenter.Segments.BoundaryIterable;
import com.ibm.icu.text.segmenter.Segments.IterationDirection;
import com.ibm.icu.text.segmenter.Segments.Segment;
import com.ibm.icu.text.segmenter.Segments.SegmentIterable;
import java.util.function.Function;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
public class SegmentsImplUtils {
public static Stream<CharSequence> subSequences(BreakIterator breakIter, CharSequence sourceSequence) {
return ranges(breakIter, sourceSequence).map(rangeToSequenceFn(sourceSequence));
}
public static Stream<Segment> ranges(BreakIterator breakIter, CharSequence sourceSequence) {
return rangesAfterIndex(breakIter, sourceSequence, -1);
};
public static Stream<Segment> rangesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
breakIter.setText(sourceSequence);
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.FORWARDS, i);
return StreamSupport.stream(iterable.spliterator(), false);
}
public static Stream<Segment> rangesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
breakIter.setText(sourceSequence);
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.BACKWARDS, i);
return StreamSupport.stream(iterable.spliterator(), false);
}
public static Segment rangeAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
breakIter.setText(sourceSequence);
int start = breakIter.following(i);
if (start == BreakIterator.DONE) {
return null;
}
int limit = breakIter.next();
if (limit == BreakIterator.DONE) {
return null;
}
return new Segment(start, limit);
}
public static Segment rangeBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
breakIter.setText(sourceSequence);
// TODO(ICU-22987): Remove after fixing preceding(int) to return `DONE` for negative inputs
if (i < 0) {
// return the same thing as we would if preceding() returned DONE
return null;
}
int start = breakIter.preceding(i);
int limit = breakIter.previous();
if (start == BreakIterator.DONE || limit == BreakIterator.DONE) {
return null;
}
assert limit <= start;
return new Segment(limit, start);
}
public static Function<Segment, CharSequence> rangeToSequenceFn(CharSequence sourceSequence) {
return segment -> sourceSequence.subSequence(segment.start, segment.limit);
}
public static IntStream boundaries(BreakIterator breakIter, CharSequence sourceSequence) {
return boundariesAfterIndex(breakIter, sourceSequence, -1);
}
public static IntStream boundariesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
breakIter.setText(sourceSequence);
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i);
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
return boundariesAsIntegers.mapToInt(Integer::intValue);
}
public static IntStream boundariesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
breakIter.setText(sourceSequence);
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i);
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
return boundariesAsIntegers.mapToInt(Integer::intValue);
}
}