From 5b6eaddcedd6fa85811461d70f6211f397d5a18b Mon Sep 17 00:00:00 2001 From: Elango Cheran Date: Tue, 31 Dec 2024 15:29:50 -0800 Subject: [PATCH] ICU-22789 Refactor default impls of `Segments` interface into reusable static util fns for concrete classes --- .../text/segmenter/LocalizedSegmenter.java | 55 +++++++++- .../text/segmenter/RuleBasedSegmenter.java | 51 ++++++++- .../com/ibm/icu/text/segmenter/Segments.java | 100 ++--------------- .../icu/text/segmenter/SegmentsImplUtils.java | 103 ++++++++++++++++++ 4 files changed, 209 insertions(+), 100 deletions(-) create mode 100644 icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java index 44d67096125..0ffb8094037 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java @@ -2,6 +2,9 @@ package com.ibm.icu.text.segmenter; import com.ibm.icu.text.BreakIterator; import com.ibm.icu.util.ULocale; +import java.util.function.Function; +import java.util.stream.IntStream; +import java.util.stream.Stream; public class LocalizedSegmenter implements Segmenter { @@ -76,7 +79,7 @@ public class LocalizedSegmenter implements Segmenter { } - public static class LocalizedSegments implements Segments { + public class LocalizedSegments implements Segments { private CharSequence source; @@ -92,17 +95,57 @@ public class LocalizedSegmenter implements Segmenter { @Override public CharSequence getSourceSequence() { - return source; + return this.source; } @Override - public Segmenter getSegmenter() { - return segmenter; + public Stream subSequences() { + return SegmentsImplUtils.subSequences(this.breakIter, this.source); } @Override - public BreakIterator getInstanceBreakIterator() { - return this.breakIter; + public Stream ranges() { + return SegmentsImplUtils.ranges(this.breakIter, this.source); + } + + @Override + public Stream rangesAfterIndex(int i) { + return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i); + } + + @Override + public Stream rangesBeforeIndex(int i) { + return SegmentsImplUtils.rangesBeforeIndex(this.breakIter, this.source, i); + } + + @Override + public Segment rangeAfterIndex(int i) { + return SegmentsImplUtils.rangeAfterIndex(this.breakIter, this.source, i); + } + + @Override + public Segment rangeBeforeIndex(int i) { + return SegmentsImplUtils.rangeBeforeIndex(this.breakIter, this.source, i); + } + + @Override + public Function rangeToSequenceFn() { + return SegmentsImplUtils.rangeToSequenceFn(this.source); + } + + @Override + public IntStream boundaries() { + return SegmentsImplUtils.boundaries(this.breakIter, this.source); + } + + @Override + public IntStream boundariesAfterIndex(int i) { + return SegmentsImplUtils.boundariesAfterIndex(this.breakIter, this.source, i); + } + + @Override + public IntStream boundariesBeforeIndex(int i) { + return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i); } } diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java index e69f4b89e27..4babe4265a7 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java @@ -2,6 +2,9 @@ package com.ibm.icu.text.segmenter; import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.RuleBasedBreakIterator; +import java.util.function.Function; +import java.util.stream.IntStream; +import java.util.stream.Stream; public class RuleBasedSegmenter implements Segmenter { @@ -65,13 +68,53 @@ public class RuleBasedSegmenter implements Segmenter { } @Override - public Segmenter getSegmenter() { - return segmenter; + public Stream subSequences() { + return SegmentsImplUtils.subSequences(this.breakIter, this.source); } @Override - public BreakIterator getInstanceBreakIterator() { - return this.breakIter; + public Stream ranges() { + return SegmentsImplUtils.ranges(this.breakIter, this.source); + } + + @Override + public Stream rangesAfterIndex(int i) { + return SegmentsImplUtils.rangesAfterIndex(this.breakIter, this.source, i); + } + + @Override + public Stream rangesBeforeIndex(int i) { + return SegmentsImplUtils.rangesBeforeIndex(this.breakIter, this.source, i); + } + + @Override + public Segment rangeAfterIndex(int i) { + return SegmentsImplUtils.rangeAfterIndex(this.breakIter, this.source, i); + } + + @Override + public Segment rangeBeforeIndex(int i) { + return SegmentsImplUtils.rangeBeforeIndex(this.breakIter, this.source, i); + } + + @Override + public Function rangeToSequenceFn() { + return SegmentsImplUtils.rangeToSequenceFn(this.source); + } + + @Override + public IntStream boundaries() { + return SegmentsImplUtils.boundaries(this.breakIter, this.source); + } + + @Override + public IntStream boundariesAfterIndex(int i) { + return SegmentsImplUtils.boundariesAfterIndex(this.breakIter, this.source, i); + } + + @Override + public IntStream boundariesBeforeIndex(int i) { + return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i); } } } diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java index 66897108252..b2db9594fdc 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java @@ -11,105 +11,25 @@ public interface Segments { CharSequence getSourceSequence(); - @Deprecated - Segmenter getSegmenter(); + Stream subSequences(); - @Deprecated - BreakIterator getInstanceBreakIterator(); + Stream ranges(); - default Stream subSequences() { - return ranges().map(rangeToSequenceFn()); - } + Stream rangesAfterIndex(int i); - default Stream ranges() { - return rangesAfterIndex(-1); - }; + Stream rangesBeforeIndex(int i); - default Stream rangesAfterIndex(int i) { - BreakIterator breakIter = getInstanceBreakIterator(); - breakIter.setText(getSourceSequence()); + Segment rangeAfterIndex(int i); - // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager - SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.FORWARDS, i); - return StreamSupport.stream(iterable.spliterator(), false); - } + Segment rangeBeforeIndex(int i); - default Stream rangesBeforeIndex(int i) { - BreakIterator breakIter = getInstanceBreakIterator(); - breakIter.setText(getSourceSequence()); + Function rangeToSequenceFn(); - // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager - SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.BACKWARDS, i); - return StreamSupport.stream(iterable.spliterator(), false); - } + IntStream boundaries(); - default Segment rangeAfterIndex(int i) { - BreakIterator breakIter = getInstanceBreakIterator(); - breakIter.setText(getSourceSequence()); + IntStream boundariesAfterIndex(int i); - int start = breakIter.following(i); - if (start == BreakIterator.DONE) { - return null; - } - - int limit = breakIter.next(); - if (limit == BreakIterator.DONE) { - return null; - } - - return new Segment(start, limit); - } - - default Segment rangeBeforeIndex(int i) { - BreakIterator breakIter = getInstanceBreakIterator(); - breakIter.setText(getSourceSequence()); - - - // TODO(ICU-22987): Remove after fixing preceding(int) to return `DONE` for negative inputs - if (i < 0) { - // return the same thing as we would if preceding() returned DONE - return null; - } - - int start = breakIter.preceding(i); - int limit = breakIter.previous(); - - if (start == BreakIterator.DONE || limit == BreakIterator.DONE) { - return null; - } - - assert limit <= start; - - return new Segment(limit, start); - } - - default Function rangeToSequenceFn() { - return segment -> getSourceSequence().subSequence(segment.start, segment.limit); - } - - default IntStream boundaries() { - return boundariesAfterIndex(-1); - } - - default IntStream boundariesAfterIndex(int i) { - BreakIterator breakIter = getInstanceBreakIterator(); - breakIter.setText(getSourceSequence()); - - // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager - BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i); - Stream boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false); - return boundariesAsIntegers.mapToInt(Integer::intValue); - } - - default IntStream boundariesBeforeIndex(int i) { - BreakIterator breakIter = getInstanceBreakIterator(); - breakIter.setText(getSourceSequence()); - - // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager - BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i); - Stream boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false); - return boundariesAsIntegers.mapToInt(Integer::intValue); - } + IntStream boundariesBeforeIndex(int i); // // Inner enums/classes in common for other inner classes diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java new file mode 100644 index 00000000000..95e2888c621 --- /dev/null +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java @@ -0,0 +1,103 @@ +package com.ibm.icu.text.segmenter; + +import com.ibm.icu.text.BreakIterator; +import com.ibm.icu.text.segmenter.Segments.BoundaryIterable; +import com.ibm.icu.text.segmenter.Segments.IterationDirection; +import com.ibm.icu.text.segmenter.Segments.Segment; +import com.ibm.icu.text.segmenter.Segments.SegmentIterable; +import java.util.function.Function; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public class SegmentsImplUtils { + + public static Stream subSequences(BreakIterator breakIter, CharSequence sourceSequence) { + return ranges(breakIter, sourceSequence).map(rangeToSequenceFn(sourceSequence)); + } + + public static Stream ranges(BreakIterator breakIter, CharSequence sourceSequence) { + return rangesAfterIndex(breakIter, sourceSequence, -1); + }; + + public static Stream rangesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { + breakIter.setText(sourceSequence); + + // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager + SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.FORWARDS, i); + return StreamSupport.stream(iterable.spliterator(), false); + } + + public static Stream rangesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { + breakIter.setText(sourceSequence); + + // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager + SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.BACKWARDS, i); + return StreamSupport.stream(iterable.spliterator(), false); + } + + public static Segment rangeAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { + breakIter.setText(sourceSequence); + + int start = breakIter.following(i); + if (start == BreakIterator.DONE) { + return null; + } + + int limit = breakIter.next(); + if (limit == BreakIterator.DONE) { + return null; + } + + return new Segment(start, limit); + } + + public static Segment rangeBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { + breakIter.setText(sourceSequence); + + + // TODO(ICU-22987): Remove after fixing preceding(int) to return `DONE` for negative inputs + if (i < 0) { + // return the same thing as we would if preceding() returned DONE + return null; + } + + int start = breakIter.preceding(i); + int limit = breakIter.previous(); + + if (start == BreakIterator.DONE || limit == BreakIterator.DONE) { + return null; + } + + assert limit <= start; + + return new Segment(limit, start); + } + + public static Function rangeToSequenceFn(CharSequence sourceSequence) { + return segment -> sourceSequence.subSequence(segment.start, segment.limit); + } + + public static IntStream boundaries(BreakIterator breakIter, CharSequence sourceSequence) { + return boundariesAfterIndex(breakIter, sourceSequence, -1); + } + + public static IntStream boundariesAfterIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { + breakIter.setText(sourceSequence); + + // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager + BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.FORWARDS, i); + Stream boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false); + return boundariesAsIntegers.mapToInt(Integer::intValue); + } + + public static IntStream boundariesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { + breakIter.setText(sourceSequence); + + // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager + BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i); + Stream boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false); + return boundariesAsIntegers.mapToInt(Integer::intValue); + } + +}