diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java index 8840b1c96fc..020f158ed59 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java @@ -2,7 +2,9 @@ package com.ibm.icu.text.segmenter; import com.ibm.icu.text.BreakIterator; import java.util.Iterator; +import java.util.Spliterator; import java.util.function.Function; +import java.util.function.IntConsumer; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -158,29 +160,12 @@ public interface Segments { // Inner classes for BoundaryIterable and BoundaryIterator // - class BoundaryIterable implements Iterable { - BreakIterator breakIter; - IterationDirection direction; - int startIdx; - - BoundaryIterable(BreakIterator breakIter, IterationDirection direction, int startIdx) { - this.breakIter = breakIter; - this.direction = direction; - this.startIdx = startIdx; - } - - @Override - public Iterator iterator() { - return new BoundaryIterator(this.breakIter, this.direction, this.startIdx); - } - } - - class BoundaryIterator implements Iterator { + class BoundaryIteratorOfInts { BreakIterator breakIter; IterationDirection direction; int currIdx; - BoundaryIterator(BreakIterator breakIter, IterationDirection direction, int startIdx) { + BoundaryIteratorOfInts(BreakIterator breakIter, IterationDirection direction, int startIdx) { this.breakIter = breakIter; this.direction = direction; @@ -198,12 +183,10 @@ public interface Segments { } } - @Override public boolean hasNext() { return this.currIdx != BreakIterator.DONE; } - @Override public Integer next() { int result = this.currIdx; @@ -218,4 +201,55 @@ public interface Segments { } } + class SegmentSpliterator implements Spliterator.OfInt { + + private final BoundaryIteratorOfInts iter; + + SegmentSpliterator(BreakIterator breakIter, IterationDirection direction, int startIdx) { + iter = new BoundaryIteratorOfInts(breakIter, direction, startIdx); + } + + @Override + public OfInt trySplit() { + // The elements of the Stream represent an iteration through a string, and is thus inherently + // stateful. Therefore, splitting this Stream does not make sense. Ex: splitting the Stream + // is tantamount to discarding the segment subtended by the end value (index into the input + // string) of one substream and the beginning value of the next substream. + return null; + } + + @Override + public long estimateSize() { + // The number of segments per input size depends on language, script, and + // the content of the input string, and thus is hard to estimate without + // sacrificing performance. Thus, returning `Long.MAX_VALUE`, according + // to the API, to mean "unknown, or too expensive to compute". + return Long.MAX_VALUE; + } + + @Override + public int characteristics() { + return Spliterator.DISTINCT // BreakIterator always advances + | Spliterator.IMMUTABLE // design of Segmenter API is to provide an immutable view of + // segmentation by preventing the input string from mutating + // in the underlying BreakIterator + | Spliterator.NONNULL // primtive int is non-null + | Spliterator.ORDERED // BreakIterator always advances, and in a single direction + ; + } + + @Override + public boolean tryAdvance(IntConsumer action) { + if (action == null) { + throw new NullPointerException(); + } + if (iter.hasNext()) { + action.accept(iter.next()); + return true; + } else { + return false; + } + } + } + } diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java index 09a521c2e6d..253a577d037 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java @@ -1,10 +1,10 @@ package com.ibm.icu.text.segmenter; import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.segmenter.Segments.BoundaryIterable; import com.ibm.icu.text.segmenter.Segments.IterationDirection; import com.ibm.icu.text.segmenter.Segments.Segment; import com.ibm.icu.text.segmenter.Segments.SegmentIterable; +import com.ibm.icu.text.segmenter.Segments.SegmentSpliterator; import java.util.function.Function; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -86,6 +86,8 @@ public class SegmentsImplUtils { return new Segment(start, limit, sourceSequence); } + // TODO(ICU-22987): Remove unused segmentBeforeIndex / segmentAfterIndex after + // ensuring fix for preceding(int) to return `DONE` for negative inputs public static Segment segmentBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { breakIter.setText(sourceSequence); @@ -120,10 +122,7 @@ public class SegmentsImplUtils { breakIter.setText(sourceSequence); // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager - // TODO: optimize IntStream creation to avoid autoboxing - BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i); - Stream boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false); - return boundariesAsIntegers.mapToInt(Integer::intValue); + return StreamSupport.intStream(new SegmentSpliterator(breakIter, IterationDirection.FORWARDS, i), false); } public static IntStream boundariesBackFrom(BreakIterator breakIter, CharSequence sourceSequence, int i) { @@ -139,10 +138,7 @@ public class SegmentsImplUtils { int backFromIdx = isOnBoundary ? i + 1 : i; // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager - // TODO: optimize IntStream creation to avoid autoboxing - BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, backFromIdx); - Stream boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false); - return boundariesAsIntegers.mapToInt(Integer::intValue); + return StreamSupport.intStream(new SegmentSpliterator(breakIter, IterationDirection.BACKWARDS, backFromIdx), false); } }