mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-22789 Replace Stream of boundary Integer with efficient IntStream
This commit is contained in:
parent
465f620158
commit
3dbbdb4e4b
2 changed files with 60 additions and 30 deletions
|
@ -2,7 +2,9 @@ package com.ibm.icu.text.segmenter;
|
|||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Spliterator;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.IntConsumer;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
|
@ -158,29 +160,12 @@ public interface Segments {
|
|||
// Inner classes for BoundaryIterable and BoundaryIterator
|
||||
//
|
||||
|
||||
class BoundaryIterable implements Iterable<Integer> {
|
||||
BreakIterator breakIter;
|
||||
IterationDirection direction;
|
||||
int startIdx;
|
||||
|
||||
BoundaryIterable(BreakIterator breakIter, IterationDirection direction, int startIdx) {
|
||||
this.breakIter = breakIter;
|
||||
this.direction = direction;
|
||||
this.startIdx = startIdx;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Integer> iterator() {
|
||||
return new BoundaryIterator(this.breakIter, this.direction, this.startIdx);
|
||||
}
|
||||
}
|
||||
|
||||
class BoundaryIterator implements Iterator<Integer> {
|
||||
class BoundaryIteratorOfInts {
|
||||
BreakIterator breakIter;
|
||||
IterationDirection direction;
|
||||
int currIdx;
|
||||
|
||||
BoundaryIterator(BreakIterator breakIter, IterationDirection direction, int startIdx) {
|
||||
BoundaryIteratorOfInts(BreakIterator breakIter, IterationDirection direction, int startIdx) {
|
||||
this.breakIter = breakIter;
|
||||
this.direction = direction;
|
||||
|
||||
|
@ -198,12 +183,10 @@ public interface Segments {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return this.currIdx != BreakIterator.DONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer next() {
|
||||
int result = this.currIdx;
|
||||
|
||||
|
@ -218,4 +201,55 @@ public interface Segments {
|
|||
}
|
||||
}
|
||||
|
||||
class SegmentSpliterator implements Spliterator.OfInt {
|
||||
|
||||
private final BoundaryIteratorOfInts iter;
|
||||
|
||||
SegmentSpliterator(BreakIterator breakIter, IterationDirection direction, int startIdx) {
|
||||
iter = new BoundaryIteratorOfInts(breakIter, direction, startIdx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public OfInt trySplit() {
|
||||
// The elements of the Stream represent an iteration through a string, and is thus inherently
|
||||
// stateful. Therefore, splitting this Stream does not make sense. Ex: splitting the Stream
|
||||
// is tantamount to discarding the segment subtended by the end value (index into the input
|
||||
// string) of one substream and the beginning value of the next substream.
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long estimateSize() {
|
||||
// The number of segments per input size depends on language, script, and
|
||||
// the content of the input string, and thus is hard to estimate without
|
||||
// sacrificing performance. Thus, returning `Long.MAX_VALUE`, according
|
||||
// to the API, to mean "unknown, or too expensive to compute".
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int characteristics() {
|
||||
return Spliterator.DISTINCT // BreakIterator always advances
|
||||
| Spliterator.IMMUTABLE // design of Segmenter API is to provide an immutable view of
|
||||
// segmentation by preventing the input string from mutating
|
||||
// in the underlying BreakIterator
|
||||
| Spliterator.NONNULL // primtive int is non-null
|
||||
| Spliterator.ORDERED // BreakIterator always advances, and in a single direction
|
||||
;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tryAdvance(IntConsumer action) {
|
||||
if (action == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
if (iter.hasNext()) {
|
||||
action.accept(iter.next());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
package com.ibm.icu.text.segmenter;
|
||||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.segmenter.Segments.BoundaryIterable;
|
||||
import com.ibm.icu.text.segmenter.Segments.IterationDirection;
|
||||
import com.ibm.icu.text.segmenter.Segments.Segment;
|
||||
import com.ibm.icu.text.segmenter.Segments.SegmentIterable;
|
||||
import com.ibm.icu.text.segmenter.Segments.SegmentSpliterator;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
@ -86,6 +86,8 @@ public class SegmentsImplUtils {
|
|||
return new Segment(start, limit, sourceSequence);
|
||||
}
|
||||
|
||||
// TODO(ICU-22987): Remove unused segmentBeforeIndex / segmentAfterIndex after
|
||||
// ensuring fix for preceding(int) to return `DONE` for negative inputs
|
||||
public static Segment segmentBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
breakIter.setText(sourceSequence);
|
||||
|
||||
|
@ -120,10 +122,7 @@ public class SegmentsImplUtils {
|
|||
breakIter.setText(sourceSequence);
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
// TODO: optimize IntStream creation to avoid autoboxing
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i);
|
||||
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
|
||||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
return StreamSupport.intStream(new SegmentSpliterator(breakIter, IterationDirection.FORWARDS, i), false);
|
||||
}
|
||||
|
||||
public static IntStream boundariesBackFrom(BreakIterator breakIter, CharSequence sourceSequence, int i) {
|
||||
|
@ -139,10 +138,7 @@ public class SegmentsImplUtils {
|
|||
int backFromIdx = isOnBoundary ? i + 1 : i;
|
||||
|
||||
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
|
||||
// TODO: optimize IntStream creation to avoid autoboxing
|
||||
BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, backFromIdx);
|
||||
Stream<Integer> boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false);
|
||||
return boundariesAsIntegers.mapToInt(Integer::intValue);
|
||||
return StreamSupport.intStream(new SegmentSpliterator(breakIter, IterationDirection.BACKWARDS, backFromIdx), false);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue