ICU-22789 Refactor Range into Segment

This commit is contained in:
Elango Cheran 2024-12-13 14:14:56 -08:00
parent 55226ac569
commit 60ea6f3f3b
2 changed files with 50 additions and 57 deletions

View file

@ -21,29 +21,29 @@ public interface Segments {
return ranges().map(rangeToSequenceFn());
}
default Stream<Range> ranges() {
default Stream<Segment> ranges() {
return rangesAfterIndex(-1);
};
default Stream<Range> rangesAfterIndex(int i) {
default Stream<Segment> rangesAfterIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
RangeIterable iterable = new RangeIterable(breakIter, IterationDirection.FORWARDS, i);
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.FORWARDS, i);
return StreamSupport.stream(iterable.spliterator(), false);
}
default Stream<Range> rangesBeforeIndex(int i) {
default Stream<Segment> rangesBeforeIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
// create a Stream from a Spliterator of an Iterable so that the Stream can be lazy, not eager
RangeIterable iterable = new RangeIterable(breakIter, IterationDirection.BACKWARDS, i);
SegmentIterable iterable = new SegmentIterable(breakIter, IterationDirection.BACKWARDS, i);
return StreamSupport.stream(iterable.spliterator(), false);
}
default Range rangeAfterIndex(int i) {
default Segment rangeAfterIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
@ -57,10 +57,10 @@ public interface Segments {
return null;
}
return new Range(start, limit);
return new Segment(start, limit);
}
default Range rangeBeforeIndex(int i) {
default Segment rangeBeforeIndex(int i) {
BreakIterator breakIter = getInstanceBreakIterator();
breakIter.setText(getSourceSequence());
@ -80,11 +80,11 @@ public interface Segments {
assert limit <= start;
return new Range(limit, start);
return new Segment(limit, start);
}
default Function<Range, CharSequence> rangeToSequenceFn() {
return range -> getSourceSequence().subSequence(range.getStart(), range.getLimit());
default Function<Segment, CharSequence> rangeToSequenceFn() {
return segment -> getSourceSequence().subSequence(segment.start(), segment.limit());
}
default IntStream boundaries() {
@ -124,52 +124,45 @@ public interface Segments {
// Inner classes for Range, RangeIterable, and RangeIterator
//
class Range {
int start;
int limit;
class Segment {
public final int start;
public final int limit;
public final int ruleStatus = 0;
public Range(int start, int limit) {
public Segment(int start, int limit) {
this.start = start;
this.limit = limit;
}
public int getStart() {
return start;
}
public int getLimit() {
return limit;
}
}
/**
* This {@code Iterable} exists to enable the creation of a {@code Spliterator} that in turn
* enables the creation of a lazy {@code Stream}.
*/
class RangeIterable implements Iterable<Range> {
class SegmentIterable implements Iterable<Segment> {
BreakIterator breakIter;
IterationDirection direction;
int startIdx;
RangeIterable(BreakIterator breakIter, IterationDirection direction, int startIdx) {
SegmentIterable(BreakIterator breakIter, IterationDirection direction, int startIdx) {
this.breakIter = breakIter;
this.direction = direction;
this.startIdx = startIdx;
}
@Override
public Iterator<Range> iterator() {
return new RangeIterator(this.breakIter, this.direction, this.startIdx);
public Iterator<Segment> iterator() {
return new SegmentIterator(this.breakIter, this.direction, this.startIdx);
}
}
class RangeIterator implements Iterator<Range> {
class SegmentIterator implements Iterator<Segment> {
BreakIterator breakIter;
IterationDirection direction;
int start;
int limit;
RangeIterator(BreakIterator breakIter, IterationDirection direction, int startIdx) {
SegmentIterator(BreakIterator breakIter, IterationDirection direction, int startIdx) {
this.breakIter = breakIter;
this.direction = direction;
@ -198,12 +191,12 @@ public interface Segments {
}
@Override
public Range next() {
Range result;
public Segment next() {
Segment result;
if (this.limit < this.start) {
result = new Range(this.limit, this.start);
result = new Segment(this.limit, this.start);
} else {
result = new Range(this.start, this.limit);
result = new Segment(this.start, this.limit);
}
this.start = this.limit;

View file

@ -7,7 +7,7 @@ import com.ibm.icu.dev.test.CoreTestFmwk;
import com.ibm.icu.text.segmenter.LocalizedSegmenter;
import com.ibm.icu.text.segmenter.Segmenter.SegmentationType;
import com.ibm.icu.text.segmenter.Segments;
import com.ibm.icu.text.segmenter.Segments.Range;
import com.ibm.icu.text.segmenter.Segments.Segment;
import com.ibm.icu.util.ULocale;
import java.util.Arrays;
import java.util.List;
@ -32,13 +32,13 @@ public class SegmentsTest extends CoreTestFmwk {
// Create new Segments for source1
Segments segments1 = enWordSegmenter.segment(source1);
List<Range> ranges = segments1.ranges().collect(Collectors.toList());
List<Segment> segments = segments1.ranges().collect(Collectors.toList());
assertEquals("first range start", 0, ranges.get(0).getStart());
assertEquals("first range limit", 3, ranges.get(0).getLimit());
assertEquals("first range start", 0, segments.get(0).start());
assertEquals("first range limit", 3, segments.get(0).limit());
assertEquals("second range start", 3, ranges.get(1).getStart());
assertEquals("second range limit", 4, ranges.get(1).getLimit());
assertEquals("second range start", 3, segments.get(1).start());
assertEquals("second range limit", 4, segments.get(1).limit());
}
@Test
@ -102,13 +102,13 @@ public class SegmentsTest extends CoreTestFmwk {
// Create new Segments for source1
Segments segments1 = enWordSegmenter.segment(source1);
List<Range> ranges = segments1.rangesAfterIndex(startIdx).collect(Collectors.toList());
List<Segment> segments = segments1.rangesAfterIndex(startIdx).collect(Collectors.toList());
assertEquals("first range start", 3, ranges.get(0).getStart());
assertEquals("first range limit", 4, ranges.get(0).getLimit());
assertEquals("first range start", 3, segments.get(0).start());
assertEquals("first range limit", 4, segments.get(0).limit());
assertEquals("second range start", 4, ranges.get(1).getStart());
assertEquals("second range limit", 9, ranges.get(1).getLimit());
assertEquals("second range start", 4, segments.get(1).start());
assertEquals("second range limit", 9, segments.get(1).limit());
}
@Test
@ -125,13 +125,13 @@ public class SegmentsTest extends CoreTestFmwk {
// Create new Segments for source1
Segments segments1 = enWordSegmenter.segment(source1);
List<Range> ranges = segments1.rangesBeforeIndex(startIdx).collect(Collectors.toList());
List<Segment> segments = segments1.rangesBeforeIndex(startIdx).collect(Collectors.toList());
assertEquals("first range start", 4, ranges.get(0).getStart());
assertEquals("first range limit", 9, ranges.get(0).getLimit());
assertEquals("first range start", 4, segments.get(0).start());
assertEquals("first range limit", 9, segments.get(0).limit());
assertEquals("second range start", 3, ranges.get(1).getStart());
assertEquals("second range limit", 4, ranges.get(1).getLimit());
assertEquals("second range start", 3, segments.get(1).start());
assertEquals("second range limit", 4, segments.get(1).limit());
}
@Test
@ -184,14 +184,14 @@ public class SegmentsTest extends CoreTestFmwk {
Integer expStart = (Integer) caseDatum[2];
Integer expLimit = (Integer) caseDatum[3];
Range range = segments.rangeAfterIndex(startIdx);
Segment segment = segments.rangeAfterIndex(startIdx);
if (expStart == null) {
assert expLimit == null;
assertThat("Out of bounds range should be null", range == null);
assertThat("Out of bounds range should be null", segment == null);
} else {
assertEquals(desc + ", start", (long) expStart.intValue(), range.getStart());
assertEquals(desc + ", limit", (long) expLimit.intValue(), range.getLimit());
assertEquals(desc + ", start", (long) expStart.intValue(), segment.start());
assertEquals(desc + ", limit", (long) expLimit.intValue(), segment.limit());
}
}
}
@ -225,7 +225,7 @@ public class SegmentsTest extends CoreTestFmwk {
Integer expStart = (Integer) caseDatum[2];
Integer expLimit = (Integer) caseDatum[3];
Range range = segments.rangeBeforeIndex(startIdx);
Segment segment = segments.rangeBeforeIndex(startIdx);
if (startIdx == -2) {
logKnownIssue("ICU-22987", "BreakIterator.preceding(-2) should return DONE, not 0");
@ -233,10 +233,10 @@ public class SegmentsTest extends CoreTestFmwk {
if (expStart == null) {
assert expLimit == null;
assertThat("Out of bounds range should be null", range == null);
assertThat("Out of bounds range should be null", segment == null);
} else {
assertEquals(desc + ", start", (long) expStart.intValue(), (long) range.getStart());
assertEquals(desc + ", limit", (long) expLimit.intValue(), (long) range.getLimit());
assertEquals(desc + ", start", (long) expStart.intValue(), (long) segment.start());
assertEquals(desc + ", limit", (long) expLimit.intValue(), (long) segment.limit());
}
}
}