diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java index 8bafd955a13..072dde6ecb5 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/segmenter/Segments.java @@ -12,28 +12,103 @@ public interface Segments { Stream subSequences(); + /** + * Return the segment that contains index {@code i}. Containment is inclusive of the start index + * and exclusive of the limit index. + * + *

Specifically, the containing segment is defined as the segment with start {@code s} and limit + * {@code l} such that {@code s ≤ i < l}.

+ * @param i index in the input {@code CharSequence} to the {@code Segmenter} + * @throws IllegalArgumentException if {@code i} is less than 0 or greater than the length of the + * input {@code CharSequence} to the {@code Segmenter} + * @return A segment that either starts at or contains index {@code i} + */ Segment segmentAt(int i); + /** + * Return a {@code Stream} of all {@code Segment}s in the source sequence. Start with the first + * and iterate forwards until the end of the sequence. + * + *

This is equivalent to {@code segmentsFrom(0)}.

+ * @return a {@code Stream} of all {@code Segments} in the source sequence. + */ Stream segments(); + /** + * Return a {@code Stream} of all {@code Segment}s in the source sequence where all segment limits + * {@code l} satisfy {@code i < l}. Iteration moves forwards. + * + *

This means that the first segment in the stream is the same + * as what is returned by {@code segmentAt(i)}.

+ * + *

The word "from" is used here to mean "at or after", with the semantics of "at" for a + * {@code Segment} defined by {@link #segmentAt(int)}}. We cannot describe the segments all as + * being "after" since the first segment might contain {@code i} in the middle, meaning that + * in the forward direction, its start position precedes {@code i}.

+ * + *

{@code segmentsFrom} and {@link #segmentsBefore(int)} create a partitioning of the space of + * all {@code Segment}s.

+ * @param i index in the input {@code CharSequence} to the {@code Segmenter} + * @return a {@code Stream} of all {@code Segment}s at or after {@code i} + */ Stream segmentsFrom(int i); + /** + * Return a {@code Stream} of all {@code Segment}s in the source sequence where all segment limits + * {@code l} satisfy {@code l ≤ i}. Iteration moves backwards. + * + *

This means that the all segments in the stream come before the one that + * is returned by {@code segmentAt(i)}. A segment is not considered to contain index {@code i} if + * {code i} is equal to limit {@code l}. Thus, "before" encapsulates the invariant + * {@code l ≤ i}.

+ * @param i index in the input {@code CharSequence} to the {@code Segmenter} + * @return a {@code Stream} of all {@code Segment}s before {@code i} + */ Stream segmentsBefore(int i); Function segmentToSequenceFn(); /** * Returns whether offset {@code i} is a segmentation boundary. Throws an exception when - * {@code i} is not a valid boundary position for the source sequence. - * @param i - * @return + * {@code i} is not a valid index position for the source sequence. + * @param i index in the input {@code CharSequence} to the {@code Segmenter} + * @throws IllegalArgumentException if {@code i} is less than 0 or greater than the length of the + * input {@code CharSequence} to the {@code Segmenter} + * @return Returns whether offset {@code i} is a segmentation boundary. */ boolean isBoundary(int i); + /** + * Return all segmentation boundaries, starting from the beginning and moving forwards. + * + *

Note: {@code boundaries() != boundariesAfter(0)}. + * This difference naturally results from the strict inequality condition in boundariesAfter, + * and the fact that 0 is the first boundary returned from the start of an input sequence.

+ * @return An {@code IntStream} of all segmentation boundaries, starting at the first + * boundary with index 0, and moving forwards in the input sequence. + */ IntStream boundaries(); + /** + * Return all segmentation boundaries after the provided index. Iteration moves forwards. + * @param i index in the input {@code CharSequence} to the {@code Segmenter} + * @return An {@code IntStream} of all boundaries {@code b} such that {@code b > i} + */ IntStream boundariesAfter(int i); + /** + * Return all segmentation boundaries on or before the provided index. Iteration moves backwards. + * + *

The phrase "back from" is used to indicate both that: 1) boundaries are "on or before" the + * input index; 2) the direction of iteration is backwards (towards the beginning). + * "on or before" indicates that the result set is {@code b} where {@code b ≤ i}, which is a weak + * inequality, while "before" might suggest the strict inequality {@code b < i}.

+ * + *

{@code boundariesBackFrom} and {@link #boundariesAfter(int)} create a partitioning of the + * space of all boundaries.

+ * @param i index in the input {@code CharSequence} to the {@code Segmenter} + * @return An {@code IntStream} of all boundaries {@code b} such that {@code b ≤ i} + */ IntStream boundariesBackFrom(int i); //