mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-22789 Refactor default impls of Segments
interface into reusable static util fns for concrete classes
This commit is contained in:
parent
94ec357df5
commit
5b6eaddced
4 changed files with 209 additions and 100 deletions
icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter
|
@ -2,6 +2,9 @@ package com.ibm.icu.text.segmenter;
|
|||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class LocalizedSegmenter implements Segmenter {
|
||||
|
||||
|
@ -76,7 +79,7 @@ public class LocalizedSegmenter implements Segmenter {
|
|||
|
||||
}
|
||||
|
||||
public static class LocalizedSegments implements Segments {
|
||||
public class LocalizedSegments implements Segments {
|
||||
|
||||
private CharSequence source;
|
||||
|
||||
|
@ -92,17 +95,57 @@ public class LocalizedSegmenter implements Segmenter {
|
|||
|
||||
@Override
|
||||
public CharSequence getSourceSequence() {
|
||||
return source;
|
||||
return this.source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Segmenter getSegmenter() {
|
||||
return segmenter;
|
||||
public Stream<CharSequence> subSequences() {
|
||||
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BreakIterator getInstanceBreakIterator() {
|
||||
return this.breakIter;
|
||||
public Stream<Segment> ranges() {
|
||||
return SegmentsImplUtils.ranges(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> rangesAfterIndex(int i) {
|
||||
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> rangesBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.rangesBeforeIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Segment rangeAfterIndex(int i) {
|
||||
return SegmentsImplUtils.rangeAfterIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Segment rangeBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.rangeBeforeIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Function<Segment, CharSequence> rangeToSequenceFn() {
|
||||
return SegmentsImplUtils.rangeToSequenceFn(this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundaries() {
|
||||
return SegmentsImplUtils.boundaries(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundariesAfterIndex(int i) {
|
||||
return SegmentsImplUtils.boundariesAfterIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundariesBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,9 @@ package com.ibm.icu.text.segmenter;
|
|||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.RuleBasedBreakIterator;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class RuleBasedSegmenter implements Segmenter {
|
||||
|
||||
|
@ -65,13 +68,53 @@ public class RuleBasedSegmenter implements Segmenter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Segmenter getSegmenter() {
|
||||
return segmenter;
|
||||
public Stream<CharSequence> subSequences() {
|
||||
return SegmentsImplUtils.subSequences(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BreakIterator getInstanceBreakIterator() {
|
||||
return this.breakIter;
|
||||
public Stream<Segment> ranges() {
|
||||
return SegmentsImplUtils.ranges(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> rangesAfterIndex(int i) {
|
||||
return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Segment> rangesBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.rangesBeforeIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Segment rangeAfterIndex(int i) {
|
||||
return SegmentsImplUtils.rangeAfterIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Segment rangeBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.rangeBeforeIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Function<Segment, CharSequence> rangeToSequenceFn() {
|
||||
return SegmentsImplUtils.rangeToSequenceFn(this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundaries() {
|
||||
return SegmentsImplUtils.boundaries(this.breakIter, this.source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundariesAfterIndex(int i) {
|
||||
return SegmentsImplUtils.boundariesAfterIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntStream boundariesBeforeIndex(int i) {
|
||||
return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,105 +11,25 @@ public interface Segments {
|
|||
|
||||
CharSequence getSourceSequence();
|
||||
|
||||
@Deprecated
|
||||
Segmenter getSegmenter();
|
||||
Stream<CharSequence> subSequences();
|
||||
|
||||
@Deprecated
|
||||
BreakIterator getInstanceBreakIterator();
|
||||
Stream<Segment> ranges();
|
||||
|
||||
default Stream<CharSequence> subSequences() {
|
||||
return ranges().map(rangeToSequenceFn());
|
||||
}
|
||||
Stream<Segment> rangesAfterIndex(int i);
|
||||
|
||||
default Stream<Segment> ranges() {
|
||||
return rangesAfterIndex(-1);
|
||||
};
|
||||
Stream<Segment> rangesBeforeIndex(int i);
|
||||
|
||||
default Stream<Segment> rangesAfterIndex(int i) {
|
||||
BreakIterator breakIter = getInstanceBreakIterator();
|
||||
breakIter.setText(getSourceSequence());
|
||||
Segment rangeAfterIndex(int i);
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.FORWARDS, i);
|
||||
return StreamSupport.stream(iterable.spliterator(), false);
|
||||
}
|
||||
Segment rangeBeforeIndex(int i);
|
||||
|
||||
default Stream<Segment> rangesBeforeIndex(int i) {
|
||||
BreakIterator breakIter = getInstanceBreakIterator();
|
||||
breakIter.setText(getSourceSequence());
|
||||
Function<Segment, CharSequence> rangeToSequenceFn();
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.BACKWARDS, i);
|
||||
return StreamSupport.stream(iterable.spliterator(), false);
|
||||
}
|
||||
IntStream boundaries();
|
||||
|
||||
default Segment rangeAfterIndex(int i) {
|
||||
BreakIterator breakIter = getInstanceBreakIterator();
|
||||
breakIter.setText(getSourceSequence());
|
||||
IntStream boundariesAfterIndex(int i);
|
||||
|
||||
int start = breakIter.following(i);
|
||||
if (start == BreakIterator.DONE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int limit = breakIter.next();
|
||||
if (limit == BreakIterator.DONE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new Segment(start, limit);
|
||||
}
|
||||
|
||||
default Segment rangeBeforeIndex(int i) {
|
||||
BreakIterator breakIter = getInstanceBreakIterator();
|
||||
breakIter.setText(getSourceSequence());
|
||||
|
||||
|
||||
// TODO(ICU-22987): Remove after fixing preceding(int) to return `DONE` for negative inputs
|
||||
if (i < 0) {
|
||||
// return the same thing as we would if preceding() returned DONE
|
||||
return null;
|
||||
}
|
||||
|
||||
int start = breakIter.preceding(i);
|
||||
int limit = breakIter.previous();
|
||||
|
||||
if (start == BreakIterator.DONE || limit == BreakIterator.DONE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
assert limit <= start;
|
||||
|
||||
return new Segment(limit, start);
|
||||
}
|
||||
|
||||
default Function<Segment, CharSequence> rangeToSequenceFn() {
|
||||
return segment -> getSourceSequence().subSequence(segment.start, segment.limit);
|
||||
}
|
||||
|
||||
default IntStream boundaries() {
|
||||
return boundariesAfterIndex(-1);
|
||||
}
|
||||
|
||||
default IntStream boundariesAfterIndex(int i) {
|
||||
BreakIterator breakIter = getInstanceBreakIterator();
|
||||
breakIter.setText(getSourceSequence());
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i);
|
||||
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
|
||||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
}
|
||||
|
||||
default IntStream boundariesBeforeIndex(int i) {
|
||||
BreakIterator breakIter = getInstanceBreakIterator();
|
||||
breakIter.setText(getSourceSequence());
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i);
|
||||
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
|
||||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
}
|
||||
IntStream boundariesBeforeIndex(int i);
|
||||
|
||||
//
|
||||
// Inner enums/classes in common for other inner classes
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
package com.ibm.icu.text.segmenter;
|
||||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.segmenter.Segments.BoundaryIterable;
|
||||
import com.ibm.icu.text.segmenter.Segments.IterationDirection;
|
||||
import com.ibm.icu.text.segmenter.Segments.Segment;
|
||||
import com.ibm.icu.text.segmenter.Segments.SegmentIterable;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
public class SegmentsImplUtils {
|
||||
|
||||
public static Stream<CharSequence> subSequences(BreakIterator breakIter, CharSequence sourceSequence) {
|
||||
return ranges(breakIter, sourceSequence).map(rangeToSequenceFn(sourceSequence));
|
||||
}
|
||||
|
||||
public static Stream<Segment> ranges(BreakIterator breakIter, CharSequence sourceSequence) {
|
||||
return rangesAfterIndex(breakIter, sourceSequence, -1);
|
||||
};
|
||||
|
||||
public static Stream<Segment> rangesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.FORWARDS, i);
|
||||
return StreamSupport.stream(iterable.spliterator(), false);
|
||||
}
|
||||
|
||||
public static Stream<Segment> rangesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.BACKWARDS, i);
|
||||
return StreamSupport.stream(iterable.spliterator(), false);
|
||||
}
|
||||
|
||||
public static Segment rangeAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
int start = breakIter.following(i);
|
||||
if (start == BreakIterator.DONE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int limit = breakIter.next();
|
||||
if (limit == BreakIterator.DONE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new Segment(start, limit);
|
||||
}
|
||||
|
||||
public static Segment rangeBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
|
||||
// TODO(ICU-22987): Remove after fixing preceding(int) to return `DONE` for negative inputs
|
||||
if (i < 0) {
|
||||
// return the same thing as we would if preceding() returned DONE
|
||||
return null;
|
||||
}
|
||||
|
||||
int start = breakIter.preceding(i);
|
||||
int limit = breakIter.previous();
|
||||
|
||||
if (start == BreakIterator.DONE || limit == BreakIterator.DONE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
assert limit <= start;
|
||||
|
||||
return new Segment(limit, start);
|
||||
}
|
||||
|
||||
public static Function<Segment, CharSequence> rangeToSequenceFn(CharSequence sourceSequence) {
|
||||
return segment -> sourceSequence.subSequence(segment.start, segment.limit);
|
||||
}
|
||||
|
||||
public static IntStream boundaries(BreakIterator breakIter, CharSequence sourceSequence) {
|
||||
return boundariesAfterIndex(breakIter, sourceSequence, -1);
|
||||
}
|
||||
|
||||
public static IntStream boundariesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i);
|
||||
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
|
||||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
}
|
||||
|
||||
public static IntStream boundariesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i);
|
||||
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
|
||||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Reference in a new issue