From 4163a6d89890d1870e8e30db4e3812273c3ea828 Mon Sep 17 00:00:00 2001 From: Elango Cheran Date: Fri, 3 Jan 2025 08:42:12 -0800 Subject: [PATCH] ICU-22789 Rename and adjust boundary logic for boundariesBackFrom API for Segments interface --- .../icu/text/segmenter/LocalizedSegmenter.java | 4 ++-- .../icu/text/segmenter/RuleBasedSegmenter.java | 4 ++-- .../com/ibm/icu/text/segmenter/Segments.java | 2 +- .../icu/text/segmenter/SegmentsImplUtils.java | 15 +++++++++++++-- .../dev/test/text/segmenter/SegmentsTest.java | 18 ++++++++++-------- 5 files changed, 28 insertions(+), 15 deletions(-) diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java index 095227777d9..323081d372f 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/LocalizedSegmenter.java @@ -120,8 +120,8 @@ public class LocalizedSegmenter implements Segmenter { } @Override - public IntStream boundariesBeforeIndex(int i) { - return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i); + public IntStream boundariesBackFrom(int i) { + return SegmentsImplUtils.boundariesBackFrom(this.breakIter, this.source, i); } } diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java index a8526deed4f..6a30516b70c 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/RuleBasedSegmenter.java @@ -93,8 +93,8 @@ public class RuleBasedSegmenter implements Segmenter { } @Override - public IntStream boundariesBeforeIndex(int i) { - return SegmentsImplUtils.boundariesBeforeIndex(this.breakIter, this.source, i); + public IntStream boundariesBackFrom(int i) { + return SegmentsImplUtils.boundariesBackFrom(this.breakIter, this.source, i); } } } diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java index de7ebef497c..58da59ef4bd 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java @@ -33,7 +33,7 @@ public interface Segments { IntStream boundariesAfter(int i); - IntStream boundariesBeforeIndex(int i); + IntStream boundariesBackFrom(int i); // // Inner enums/classes in common for other inner classes diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java index 56dfe5ad2f1..f23a1d64663 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/SegmentsImplUtils.java @@ -10,6 +10,8 @@ import java.util.stream.IntStream; import java.util.stream.Stream; import java.util.stream.StreamSupport; + +// Global TODO: make initialization of breakIterator a prerequisite public class SegmentsImplUtils { public static boolean isBoundary(BreakIterator breakIter, CharSequence source, int i) { @@ -97,11 +99,20 @@ public class SegmentsImplUtils { return boundariesAsIntegers.mapToInt(Integer::intValue); } - public static IntStream boundariesBeforeIndex(BreakIterator breakIter, CharSequence sourceSequence, int i) { + public static IntStream boundariesBackFrom(BreakIterator breakIter, CharSequence sourceSequence, int i) { + // TODO: make initialization of breakIterator a prerequisite breakIter.setText(sourceSequence); + int sourceLength = sourceSequence.length(); + if (i < 0) { + return IntStream.empty(); + } + + boolean isOnBoundary = i <= sourceLength && isBoundary(breakIter, sourceSequence, i); + int backFromIdx = isOnBoundary ? i + 1 : i; + // create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager - BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, i); + BoundaryIterable iterable = new BoundaryIterable(breakIter, IterationDirection.BACKWARDS, backFromIdx); Stream boundariesAsIntegers = StreamSupport.stream(iterable.spliterator(), false); return boundariesAsIntegers.mapToInt(Integer::intValue); } diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/text/segmenter/SegmentsTest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/text/segmenter/SegmentsTest.java index c8b8d12676b..46088fe66f0 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/text/segmenter/SegmentsTest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/text/segmenter/SegmentsTest.java @@ -329,7 +329,7 @@ public class SegmentsTest extends CoreTestFmwk { } @Test - public void testBoundariesBeforeIndex() { + public void testBoundariesBackFrom() { Segmenter enWordSegmenter = new LocalizedSegmenterBuilder() .setLocale(ULocale.ENGLISH) @@ -343,10 +343,12 @@ public class SegmentsTest extends CoreTestFmwk { Segments segments = enWordSegmenter.segment(source); Object[][] casesData = { - {"first " + TAKE_LIMIT + " before beginning", -2, new int[0]}, - {"first " + TAKE_LIMIT + " at the beginning", 0, new int[0]}, - {"first " + TAKE_LIMIT + " in the middle of the 2nd to last", 42, new int[]{41, 40, 36, 35, 32}}, - {"first " + TAKE_LIMIT + " after the end", source.length()+1, new int[]{45, 44, 41, 40, 36}}, + {"first " + TAKE_LIMIT + " before beginning", -2, new int[0]}, + {"first " + TAKE_LIMIT + " at the beginning", 0, new int[]{0}}, + {"first " + TAKE_LIMIT + " from the start of the 2nd to last segment", 41, new int[]{41, 40, 36, 35, 32}}, + {"first " + TAKE_LIMIT + " in the middle of the 2nd to last segment", 42, new int[]{41, 40, 36, 35, 32}}, + {"first " + TAKE_LIMIT + " at the end", source.length(), new int[]{45, 44, 41, 40, 36}}, + {"first " + TAKE_LIMIT + " after the end", source.length()+1, new int[]{45, 44, 41, 40, 36}}, }; for (Object[] caseDatum : casesData) { @@ -354,11 +356,11 @@ public class SegmentsTest extends CoreTestFmwk { int startIdx = (int) caseDatum[1]; int[] exp = (int[]) caseDatum[2]; - int[] act = segments.boundariesBeforeIndex(startIdx).limit(TAKE_LIMIT).toArray(); + int[] act = segments.boundariesBackFrom(startIdx).limit(TAKE_LIMIT).toArray(); - assertThat(act, is(exp)); + assertThat(desc, act, is(exp)); - if (startIdx == -2) { + if (startIdx < 0) { logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0"); } }