U+%04lx) (read %d bytes): "
+ "0x%lx instead of 0x%lx (from bytes %lx)\n",
+ testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)(prev8-(p-s)),
+ (long)value, (long)values[i], (unsigned long)expectedBytes);
+ }
+ if(i8!=(p-s)) {
+ log_err("error: wrong end index from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx): "
+ "%ld != %ld (bytes %lx)\n",
+ testName, (int)prev8, (unsigned long)actualBytes, (long)c,
+ (long)(p-s), (long)i8, (unsigned long)expectedBytes);
+ break;
+ }
+ }
+}
+
+static void
+testTrie(const char *testName, const UCPTrie *trie,
+ UCPTrieType type, UCPTrieValueWidth valueWidth,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ testTrieGetters(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
+ testTrieGetRanges(testName, trie, NULL, UCPTRIE_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
+ if (type == UCPTRIE_TYPE_FAST) {
+ testTrieUTF16(testName, trie, valueWidth, checkRanges, countCheckRanges);
+ testTrieUTF8(testName, trie, valueWidth, checkRanges, countCheckRanges);
+ }
+}
+
+static void
+testBuilder(const char *testName, const UMutableCPTrie *mutableTrie,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ testBuilderGetters(testName, mutableTrie, checkRanges, countCheckRanges);
+ testTrieGetRanges(testName, NULL, mutableTrie, UCPTRIE_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
+}
+
+static uint32_t storage[120000];
+static uint32_t swapped[120000];
+
+static void
+testTrieSerialize(const char *testName, UMutableCPTrie *mutableTrie,
+ UCPTrieType type, UCPTrieValueWidth valueWidth, UBool withSwap,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ UCPTrie *trie;
+ int32_t length1, length2, length3;
+ UErrorCode errorCode;
+
+ /* clone the trie so that the caller can reuse the original */
+ errorCode=U_ZERO_ERROR;
+ mutableTrie = umutablecptrie_clone(mutableTrie, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: umutablecptrie_clone(%s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ return;
+ }
+
+ /*
+ * This is not a loop, but simply a block that we can exit with "break"
+ * when something goes wrong.
+ */
+ do {
+ errorCode=U_ZERO_ERROR;
+ trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ errorCode=U_ZERO_ERROR;
+ length1=ucptrie_toBinary(trie, NULL, 0, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
+ log_err("error: ucptrie_toBinary(%s) preflighting set %s != U_BUFFER_OVERFLOW_ERROR\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ errorCode=U_ZERO_ERROR;
+ length2=ucptrie_toBinary(trie, storage, sizeof(storage), &errorCode);
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ log_err("error: ucptrie_toBinary(%s) needs more memory\n", testName);
+ break;
+ }
+ if(U_FAILURE(errorCode)) {
+ log_err("error: ucptrie_toBinary(%s) failed: %s\n", testName, u_errorName(errorCode));
+ break;
+ }
+ if(length1!=length2) {
+ log_err("error: trie serialization (%s) lengths different: "
+ "preflight vs. serialize\n", testName);
+ break;
+ }
+
+ testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
+ ucptrie_close(trie);
+ trie=NULL;
+
+ if(withSwap) {
+ int32_t swappedLength;
+
+ UDataSwapper *ds;
+
+ /* swap to opposite-endian */
+ uprv_memset(swapped, 0x55, length2);
+ ds=udata_openSwapper(U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
+ !U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
+ swappedLength=ucptrie_swap(ds, storage, -1, NULL, &errorCode);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: ucptrie_swap(%s to OE preflighting) failed (%s) "
+ "or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ udata_closeSwapper(ds);
+ break;
+ }
+ swappedLength=ucptrie_swap(ds, storage, length2, swapped, &errorCode);
+ udata_closeSwapper(ds);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: ucptrie_swap(%s to OE) failed (%s) or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+
+ /* swap back to platform-endian */
+ uprv_memset(storage, 0xaa, length2);
+ ds=udata_openSwapper(!U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
+ U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
+ swappedLength=ucptrie_swap(ds, swapped, -1, NULL, &errorCode);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: ucptrie_swap(%s to PE preflighting) failed (%s) "
+ "or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ udata_closeSwapper(ds);
+ break;
+ }
+ swappedLength=ucptrie_swap(ds, swapped, length2, storage, &errorCode);
+ udata_closeSwapper(ds);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: ucptrie_swap(%s to PE) failed (%s) or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ }
+
+ trie = ucptrie_openFromBinary(type, valueWidth, storage, length2, &length3, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: ucptrie_openFromBinary(%s) failed, %s\n", testName, u_errorName(errorCode));
+ break;
+ }
+ if(type != ucptrie_getType(trie)) {
+ log_err("error: trie serialization (%s) did not preserve trie type\n", testName);
+ break;
+ }
+ if(valueWidth != ucptrie_getValueWidth(trie)) {
+ log_err("error: trie serialization (%s) did not preserve data value width\n", testName);
+ break;
+ }
+ if(length2!=length3) {
+ log_err("error: trie serialization (%s) lengths different: "
+ "serialize vs. unserialize\n", testName);
+ break;
+ }
+ /* overwrite the storage that is not supposed to be needed */
+ uprv_memset((char *)storage+length3, 0xfa, (int32_t)(sizeof(storage)-length3));
+
+ {
+ errorCode=U_ZERO_ERROR;
+ UCPTrie *any = ucptrie_openFromBinary(UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
+ storage, length3, NULL, &errorCode);
+ if (U_SUCCESS(errorCode)) {
+ if (type != ucptrie_getType(any)) {
+ log_err("error: ucptrie_openFromBinary("
+ "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getType() wrong\n");
+ }
+ if (valueWidth != ucptrie_getValueWidth(any)) {
+ log_err("error: ucptrie_openFromBinary("
+ "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getValueWidth() wrong\n");
+ }
+ ucptrie_close(any);
+ } else {
+ log_err("error: ucptrie_openFromBinary("
+ "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY) failed - %s\n",
+ u_errorName(errorCode));
+ }
+ }
+
+ errorCode=U_ZERO_ERROR;
+ testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
+ {
+ /* make a mutable trie from an immutable one */
+ uint32_t value, value2;
+ UMutableCPTrie *mutable2 = umutablecptrie_fromUCPTrie(trie, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: umutablecptrie_fromUCPTrie(unserialized %s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+
+ value=umutablecptrie_get(mutable2, 0xa1);
+ umutablecptrie_set(mutable2, 0xa1, 789, &errorCode);
+ value2=umutablecptrie_get(mutable2, 0xa1);
+ umutablecptrie_set(mutable2, 0xa1, value, &errorCode);
+ if(U_FAILURE(errorCode) || value2!=789) {
+ log_err("error: modifying a mutableTrie-from-UCPTrie (%s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ }
+ testBuilder(testName, mutable2, checkRanges, countCheckRanges);
+ umutablecptrie_close(mutable2);
+ }
+ } while(0);
+
+ umutablecptrie_close(mutableTrie);
+ ucptrie_close(trie);
+}
+
+static UMutableCPTrie *
+testTrieSerializeAllValueWidth(const char *testName,
+ UMutableCPTrie *mutableTrie, UBool withClone,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ char name[40];
+ uint32_t oredValues = 0;
+ int32_t i;
+ for (i = 0; i < countCheckRanges; ++i) {
+ oredValues |= checkRanges[i].value;
+ }
+
+ testBuilder(testName, mutableTrie, checkRanges, countCheckRanges);
+
+ if (oredValues <= 0xffff) {
+ uprv_strcpy(name, testName);
+ uprv_strcat(name, ".16");
+ testTrieSerialize(name, mutableTrie,
+ UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, withClone,
+ checkRanges, countCheckRanges);
+ }
+
+ uprv_strcpy(name, testName);
+ uprv_strcat(name, ".32");
+ testTrieSerialize(name, mutableTrie,
+ UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_32, withClone,
+ checkRanges, countCheckRanges);
+
+ if (oredValues <= 0xff) {
+ uprv_strcpy(name, testName);
+ uprv_strcat(name, ".8");
+ testTrieSerialize(name, mutableTrie,
+ UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, withClone,
+ checkRanges, countCheckRanges);
+ }
+
+ if (oredValues <= 0xffff) {
+ uprv_strcpy(name, testName);
+ uprv_strcat(name, ".small16");
+ testTrieSerialize(name, mutableTrie,
+ UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_16, withClone,
+ checkRanges, countCheckRanges);
+ }
+
+ return mutableTrie;
+}
+
+static UMutableCPTrie *
+makeTrieWithRanges(const char *testName, UBool withClone,
+ const SetRange setRanges[], int32_t countSetRanges,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ UMutableCPTrie *mutableTrie;
+ uint32_t initialValue, errorValue;
+ uint32_t value;
+ UChar32 start, limit;
+ int32_t i;
+ UErrorCode errorCode;
+
+ log_verbose("\ntesting Trie '%s'\n", testName);
+ errorCode=U_ZERO_ERROR;
+ getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
+ mutableTrie = umutablecptrie_open(initialValue, errorValue, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
+ return NULL;
+ }
+
+ /* set values from setRanges[] */
+ for(i=0; i Most users should use NORMAL instead.
+ *
+ * This option is useful for tries that map surrogate code *units* to
+ * special values optimized for UTF-16 string processing
+ * or for special error behavior for unpaired surrogates,
+ * but those values are not to be associated with the lead surrogate code *points*.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ FIXED_LEAD_SURROGATES,
+ /**
+ * getRange() enumerates all same-value ranges as stored in the trie,
+ * except that all surrogates (U+D800..U+DFFF) are treated as having the
+ * surrogateValue, which is passed to getRange() as a separate parameter.
+ * The surrogateValue is not transformed via filter().
+ * See {@link Character#isSurrogate}.
+ *
+ * Most users should use NORMAL instead.
+ *
+ * This option is useful for tries that map surrogate code *units* to
+ * special values optimized for UTF-16 string processing
+ * or for special error behavior for unpaired surrogates,
+ * but those values are not to be associated with the lead surrogate code *points*.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ FIXED_ALL_SURROGATES
+ }
+
+ /**
+ * Callback function interface: Modifies a trie value.
+ * Optionally called by getRange().
+ * The modified value will be returned by the getRange() function.
+ *
+ * Can be used to ignore some of the value bits,
+ * make a filter for one of several values,
+ * return a value index computed from the trie value, etc.
+ *
+ * @see #getRange
+ * @see #iterator
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public interface ValueFilter {
+ /**
+ * Modifies the trie value.
+ *
+ * @param value trie value
+ * @return modified value
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int apply(int value);
+ }
+
+ /**
+ * Range iteration result data.
+ * Code points from start to end map to the same value.
+ * The value may have been modified by {@link ValueFilter#apply(int)},
+ * or it may be the surrogateValue if a RangeOption other than "normal" was used.
+ *
+ * @see #getRange
+ * @see #iterator
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Range {
+ private int start;
+ private int end;
+ private int value;
+
+ /**
+ * Constructor. Sets start and end to -1 and value to 0.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Range() {
+ start = end = -1;
+ value = 0;
+ }
+
+ /**
+ * @return the start code point
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getStart() { return start; }
+ /**
+ * @return the (inclusive) end code point
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getEnd() { return end; }
+ /**
+ * @return the range value
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getValue() { return value; }
+ /**
+ * Sets the range. When using {@link #iterator()},
+ * iteration will resume after the newly set end.
+ *
+ * @param start new start code point
+ * @param end new end code point
+ * @param value new value
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void set(int start, int end, int value) {
+ this.start = start;
+ this.end = end;
+ this.value = value;
+ }
+ }
+
+ private final class RangeIterator implements Iterator This class is not intended for public subclassing.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public class StringIterator {
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected CharSequence s;
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected int sIndex;
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected int c;
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected int value;
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected StringIterator(CharSequence s, int sIndex) {
+ this.s = s;
+ this.sIndex = sIndex;
+ c = -1;
+ value = 0;
+ }
+
+ /**
+ * Resets the iterator to a new string and/or a new string index.
+ *
+ * @param s string to iterate over
+ * @param sIndex string index where the iteration will start
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void reset(CharSequence s, int sIndex) {
+ this.s = s;
+ this.sIndex = sIndex;
+ c = -1;
+ value = 0;
+ }
+
+ /**
+ * Reads the next code point, post-increments the string index,
+ * and gets a value from the trie.
+ * Sets the trie error value if the code point is an unpaired surrogate.
+ *
+ * @return true if the string index was not yet at the end of the string;
+ * otherwise the iterator did not advance
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean next() {
+ if (sIndex >= s.length()) {
+ return false;
+ }
+ c = Character.codePointAt(s, sIndex);
+ sIndex += Character.charCount(c);
+ value = get(c);
+ return true;
+ }
+
+ /**
+ * Reads the previous code point, pre-decrements the string index,
+ * and gets a value from the trie.
+ * Sets the trie error value if the code point is an unpaired surrogate.
+ *
+ * @return true if the string index was not yet at the start of the string;
+ * otherwise the iterator did not advance
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean previous() {
+ if (sIndex <= 0) {
+ return false;
+ }
+ c = Character.codePointBefore(s, sIndex);
+ sIndex -= Character.charCount(c);
+ value = get(c);
+ return true;
+ }
+ /**
+ * @return the string index
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final int getIndex() { return sIndex; }
+ /**
+ * @return the code point
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final int getCodePoint() { return c; }
+ /**
+ * @return the trie value,
+ * or the trie error value if the code point is an unpaired surrogate
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final int getValue() { return value; }
+ }
+
+ /**
+ * Returns the value for a code point as stored in the trie, with range checking.
+ * Returns the trie error value if c is not in the range 0..U+10FFFF.
+ *
+ * @param c the code point
+ * @return the trie value,
+ * or the trie error value if the code point is not in the range 0..U+10FFFF
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public abstract int get(int c);
+
+ /**
+ * Sets the range object to a range of code points beginning with the start parameter.
+ * The range end is the the last code point such that
+ * all those from start to there have the same value.
+ * Returns false if start is not 0..U+10FFFF.
+ * Can be used to efficiently iterate over all same-value ranges in a trie.
+ *
+ * If the {@link ValueFilter} parameter is not null, then
+ * the value to be delivered is passed through that filter, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that parameter is null.
+ *
+ * Example:
+ * Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally
+ * modifies the range if it overlaps with surrogate code points.
+ *
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ * or as having the surrogateValue; usually {@value RangeOption#NORMAL}
+ * @param surrogateValue value for surrogates; ignored if option=={@value RangeOption#NORMAL}
+ * @param filter an object that may modify the trie data value,
+ * or null if the values from the trie are to be used unmodified
+ * @param range the range object that will be set to the code point range and value
+ * @return true if start is 0..U+10FFFF; otherwise no new range is fetched
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean getRange(int start, RangeOption option, int surrogateValue,
+ ValueFilter filter, Range range) {
+ assert option != null;
+ if (!getRange(start, filter, range)) {
+ return false;
+ }
+ if (option == RangeOption.NORMAL) {
+ return true;
+ }
+ int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
+ int end = range.end;
+ if (end < 0xd7ff || start > surrEnd) {
+ return true;
+ }
+ // The range overlaps with surrogates, or ends just before the first one.
+ if (range.value == surrogateValue) {
+ if (end >= surrEnd) {
+ // Surrogates followed by a non-surrValue range,
+ // or surrogates are part of a larger surrValue range.
+ return true;
+ }
+ } else {
+ if (start <= 0xd7ff) {
+ range.end = 0xd7ff; // Non-surrValue range ends before surrValue surrogates.
+ return true;
+ }
+ // Start is a surrogate with a non-surrValue code *unit* value.
+ // Return a surrValue code *point* range.
+ range.value = surrogateValue;
+ if (end > surrEnd) {
+ range.end = surrEnd; // Surrogate range ends before non-surrValue rest of range.
+ return true;
+ }
+ }
+ // See if the surrValue surrogate range can be merged with
+ // an immediately following range.
+ if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) {
+ range.start = start;
+ return true;
+ }
+ range.start = start;
+ range.end = surrEnd;
+ range.value = surrogateValue;
+ return true;
+ }
+
+ /**
+ * Convenience iterator over same-trie-value code point ranges.
+ * Same as looping over all ranges with {@link #getRange(int, ValueFilter, Range)}
+ * without filtering.
+ * Adjacent ranges have different trie values.
+ *
+ * The iterator always returns the same Range object.
+ *
+ * @return a Range iterator
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public Iterator This class is not intended for public subclassing.
+ *
+ * @see MutableCodePointTrie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CodePointTrie extends CodePointMap {
+ /**
+ * Selectors for the type of a CodePointTrie.
+ * Different trade-offs for size vs. speed.
+ *
+ * Use null for {@link #fromBinary} to accept any type;
+ * {@link #getType} will return the actual type.
+ *
+ * @see MutableCodePointTrie#buildImmutable(Type, ValueWidth)
+ * @see #fromBinary
+ * @see #getType
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public enum Type {
+ /**
+ * Fast/simple/larger BMP data structure.
+ * The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points.
+ *
+ * @see Fast
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ FAST,
+ /**
+ * Small/slower BMP data structure.
+ *
+ * @see Small
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ SMALL
+ }
+
+ /**
+ * Selectors for the number of bits in a CodePointTrie data value.
+ *
+ * Use null for {@link #fromBinary} to accept any data value width;
+ * {@link #getValueWidth} will return the actual data value width.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public enum ValueWidth {
+ /**
+ * 16 bits per CodePointTrie data value.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ BITS_16,
+ /**
+ * 32 bits per CodePointTrie data value.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ BITS_32,
+ /**
+ * 8 bits per CodePointTrie data value.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ BITS_8
+ }
+
+ private CodePointTrie(char[] index, Data data, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ this.ascii = new int[ASCII_LIMIT];
+ this.index = index;
+ this.data = data;
+ this.dataLength = data.getDataLength();
+ this.highStart = highStart;
+ this.index3NullOffset = index3NullOffset;
+ this.dataNullOffset = dataNullOffset;
+
+ for (int c = 0; c < ASCII_LIMIT; ++c) {
+ ascii[c] = data.getFromIndex(c);
+ }
+
+ int nullValueOffset = dataNullOffset;
+ if (nullValueOffset >= dataLength) {
+ nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
+ }
+ nullValue = data.getFromIndex(nullValueOffset);
+ }
+
+ /**
+ * Creates a trie from its binary form,
+ * stored in the ByteBuffer starting at the current position.
+ * Advances the buffer position to just after the trie data.
+ * Inverse of {@link #toBinary(OutputStream)}.
+ *
+ * The data is copied from the buffer;
+ * later modification of the buffer will not affect the trie.
+ *
+ * @param type selects the trie type; this method throws an exception
+ * if the type does not match the binary data;
+ * use null to accept any type
+ * @param valueWidth selects the number of bits in a data value; this method throws an exception
+ * if the valueWidth does not match the binary data;
+ * use null to accept any data value width
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @see MutableCodePointTrie#MutableCodePointTrie(int, int)
+ * @see MutableCodePointTrie#buildImmutable(Type, ValueWidth)
+ * @see #toBinary(OutputStream)
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) {
+ ByteOrder outerByteOrder = bytes.order();
+ try {
+ // Enough data for a trie header?
+ if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) {
+ throw new ICUUncheckedIOException("Buffer too short for a CodePointTrie header");
+ }
+
+ // struct UCPTrieHeader
+ /** "Tri3" in big-endian US-ASCII (0x54726933) */
+ int signature = bytes.getInt();
+
+ // Check the signature.
+ switch (signature) {
+ case 0x54726933:
+ // The buffer is already set to the trie data byte order.
+ break;
+ case 0x33697254:
+ // Temporarily reverse the byte order.
+ boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
+ bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
+ signature = 0x54726933;
+ break;
+ default:
+ throw new ICUUncheckedIOException("Buffer does not contain a serialized CodePointTrie");
+ }
+
+ // struct UCPTrieHeader continued
+ /**
+ * Options bit field:
+ * Bits 15..12: Data length bits 19..16.
+ * Bits 11..8: Data null block offset bits 19..16.
+ * Bits 7..6: UCPTrieType
+ * Bits 5..3: Reserved (0).
+ * Bits 2..0: UCPTrieValueWidth
+ */
+ int options = bytes.getChar();
+
+ /** Total length of the index tables. */
+ int indexLength = bytes.getChar();
+
+ /** Data length bits 15..0. */
+ int dataLength = bytes.getChar();
+
+ /** Index-3 null block offset, 0x7fff or 0xffff if none. */
+ int index3NullOffset = bytes.getChar();
+
+ /** Data null block offset bits 15..0, 0xfffff if none. */
+ int dataNullOffset = bytes.getChar();
+
+ /**
+ * First code point of the single-value range ending with U+10ffff,
+ * rounded up and then shifted right by SHIFT_2.
+ */
+ int shiftedHighStart = bytes.getChar();
+ // struct UCPTrieHeader end
+
+ int typeInt = (options >> 6) & 3;
+ Type actualType;
+ switch (typeInt) {
+ case 0: actualType = Type.FAST; break;
+ case 1: actualType = Type.SMALL; break;
+ default:
+ throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported type");
+ }
+
+ int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK;
+ ValueWidth actualValueWidth;
+ switch (valueWidthInt) {
+ case 0: actualValueWidth = ValueWidth.BITS_16; break;
+ case 1: actualValueWidth = ValueWidth.BITS_32; break;
+ case 2: actualValueWidth = ValueWidth.BITS_8; break;
+ default:
+ throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported value width");
+ }
+
+ if ((options & OPTIONS_RESERVED_MASK) != 0) {
+ throw new ICUUncheckedIOException("CodePointTrie data header has unsupported options");
+ }
+
+ if (type == null) {
+ type = actualType;
+ }
+ if (valueWidth == null) {
+ valueWidth = actualValueWidth;
+ }
+ if (type != actualType || valueWidth != actualValueWidth) {
+ throw new ICUUncheckedIOException("CodePointTrie data header has a different type or value width than required");
+ }
+
+ // Get the length values and offsets.
+ dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4);
+ dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8);
+
+ int highStart = shiftedHighStart << SHIFT_2;
+
+ // Calculate the actual length, minus the header.
+ int actualLength = indexLength * 2;
+ if (valueWidth == ValueWidth.BITS_16) {
+ actualLength += dataLength * 2;
+ } else if (valueWidth == ValueWidth.BITS_32) {
+ actualLength += dataLength * 4;
+ } else {
+ actualLength += dataLength;
+ }
+ if (bytes.remaining() < actualLength) {
+ throw new ICUUncheckedIOException("Buffer too short for the CodePointTrie data");
+ }
+
+ char[] index = ICUBinary.getChars(bytes, indexLength, 0);
+ switch (valueWidth) {
+ case BITS_16: {
+ char[] data16 = ICUBinary.getChars(bytes, dataLength, 0);
+ return type == Type.FAST ?
+ new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) :
+ new Small16(index, data16, highStart, index3NullOffset, dataNullOffset);
+ }
+ case BITS_32: {
+ int[] data32 = ICUBinary.getInts(bytes, dataLength, 0);
+ return type == Type.FAST ?
+ new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) :
+ new Small32(index, data32, highStart, index3NullOffset, dataNullOffset);
+ }
+ case BITS_8: {
+ byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0);
+ return type == Type.FAST ?
+ new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) :
+ new Small8(index, data8, highStart, index3NullOffset, dataNullOffset);
+ }
+ default:
+ throw new AssertionError("should be unreachable");
+ }
+ } finally {
+ bytes.order(outerByteOrder);
+ }
+ }
+
+ /**
+ * Returns the trie type.
+ *
+ * @return the trie type
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public abstract Type getType();
+ /**
+ * Returns the number of bits in a trie data value.
+ *
+ * @return the number of bits in a trie data value
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final ValueWidth getValueWidth() { return data.getValueWidth(); }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public int get(int c) {
+ return data.getFromIndex(cpIndex(c));
+ }
+
+ /**
+ * Returns a trie value for an ASCII code point, without range checking.
+ *
+ * @param c the input code point; must be U+0000..U+007F
+ * @return The ASCII code point's trie value.
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final int asciiGet(int c) {
+ return ascii[c];
+ }
+
+ private static final int MAX_UNICODE = 0x10ffff;
+
+ private static final int ASCII_LIMIT = 0x80;
+
+ private static final int maybeFilterValue(int value, int trieNullValue, int nullValue,
+ ValueFilter filter) {
+ if (value == trieNullValue) {
+ value = nullValue;
+ } else if (filter != null) {
+ value = filter.apply(value);
+ }
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final boolean getRange(int start, ValueFilter filter, Range range) {
+ if (start < 0 || MAX_UNICODE < start) {
+ return false;
+ }
+ if (start >= highStart) {
+ int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
+ int value = data.getFromIndex(di);
+ if (filter != null) { value = filter.apply(value); }
+ range.set(start, MAX_UNICODE, value);
+ return true;
+ }
+
+ int nullValue = this.nullValue;
+ if (filter != null) { nullValue = filter.apply(nullValue); }
+ Type type = getType();
+
+ int prevI3Block = -1;
+ int prevBlock = -1;
+ int c = start;
+ int value = 0; // Initialize to make compiler happy. Real value when haveValue is true.
+ boolean haveValue = false;
+ do {
+ int i3Block;
+ int i3;
+ int i3BlockLength;
+ int dataBlockLength;
+ if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) {
+ i3Block = 0;
+ i3 = c >> FAST_SHIFT;
+ i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH;
+ dataBlockLength = FAST_DATA_BLOCK_LENGTH;
+ } else {
+ // Use the multi-stage index.
+ int i1 = c >> SHIFT_1;
+ if (type == Type.FAST) {
+ assert(0xffff < c && c < highStart);
+ i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
+ } else {
+ assert(c < highStart && highStart > SMALL_LIMIT);
+ i1 += SMALL_INDEX_LENGTH;
+ }
+ i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
+ if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) {
+ // The index-3 block is the same as the previous one, and filled with value.
+ assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
+ c += CP_PER_INDEX_2_ENTRY;
+ continue;
+ }
+ prevI3Block = i3Block;
+ if (i3Block == index3NullOffset) {
+ // This is the index-3 null block.
+ if (haveValue) {
+ if (nullValue != value) {
+ range.set(start, c - 1, value);
+ return true;
+ }
+ } else {
+ value = nullValue;
+ haveValue = true;
+ }
+ prevBlock = dataNullOffset;
+ c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1);
+ continue;
+ }
+ i3 = (c >> SHIFT_3) & INDEX_3_MASK;
+ i3BlockLength = INDEX_3_BLOCK_LENGTH;
+ dataBlockLength = SMALL_DATA_BLOCK_LENGTH;
+ }
+ // Enumerate data blocks for one index-3 block.
+ do {
+ int block;
+ if ((i3Block & 0x8000) == 0) {
+ block = index[i3Block + i3];
+ } else {
+ // 18-bit indexes stored in groups of 9 entries per 8 indexes.
+ int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
+ int gi = i3 & 7;
+ block = (index[group++] << (2 + (2 * gi))) & 0x30000;
+ block |= index[group + gi];
+ }
+ if (block == prevBlock && (c - start) >= dataBlockLength) {
+ // The block is the same as the previous one, and filled with value.
+ assert((c & (dataBlockLength - 1)) == 0);
+ c += dataBlockLength;
+ } else {
+ int dataMask = dataBlockLength - 1;
+ prevBlock = block;
+ if (block == dataNullOffset) {
+ // This is the data null block.
+ if (haveValue) {
+ if (nullValue != value) {
+ range.set(start, c - 1, value);
+ return true;
+ }
+ } else {
+ value = nullValue;
+ haveValue = true;
+ }
+ c = (c + dataBlockLength) & ~dataMask;
+ } else {
+ int di = block + (c & dataMask);
+ int value2 = data.getFromIndex(di);
+ value2 = maybeFilterValue(value2, this.nullValue, nullValue, filter);
+ if (haveValue) {
+ if (value2 != value) {
+ range.set(start, c - 1, value);
+ return true;
+ }
+ } else {
+ value = value2;
+ haveValue = true;
+ }
+ while ((++c & dataMask) != 0) {
+ if (maybeFilterValue(data.getFromIndex(++di),
+ this.nullValue, nullValue,
+ filter) != value) {
+ range.set(start, c - 1, value);
+ return true;
+ }
+ }
+ }
+ }
+ } while (++i3 < i3BlockLength);
+ } while (c < highStart);
+ assert(haveValue);
+ int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
+ int highValue = data.getFromIndex(di);
+ if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) {
+ --c;
+ } else {
+ c = MAX_UNICODE;
+ }
+ range.set(start, c, value);
+ return true;
+ }
+
+ /**
+ * Writes a representation of the trie to the output stream.
+ * Inverse of {@link #fromBinary}.
+ *
+ * @param os the output stream
+ * @return the number of bytes written
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final int toBinary(OutputStream os) {
+ try {
+ DataOutputStream dos = new DataOutputStream(os);
+
+ // Write the UCPTrieHeader
+ dos.writeInt(0x54726933); // signature="Tri3"
+ dos.writeChar( // options
+ ((dataLength & 0xf0000) >> 4) |
+ ((dataNullOffset & 0xf0000) >> 8) |
+ (getType().ordinal() << 6) |
+ getValueWidth().ordinal());
+ dos.writeChar(index.length);
+ dos.writeChar(dataLength);
+ dos.writeChar(index3NullOffset);
+ dos.writeChar(dataNullOffset);
+ dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart
+ int length = 16; // sizeof(UCPTrieHeader)
+
+ for (char i : index) { dos.writeChar(i); }
+ length += index.length * 2;
+ length += data.write(dos);
+ return length;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ /** @internal */
+ static final int FAST_SHIFT = 6;
+
+ /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
+ static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT;
+
+ /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
+ private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1;
+
+ /** @internal */
+ private static final int SMALL_MAX = 0xfff;
+
+ /**
+ * Offset from dataLength (to be subtracted) for fetching the
+ * value returned for out-of-range code points and ill-formed UTF-8/16.
+ * @internal
+ */
+ private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1;
+ /**
+ * Offset from dataLength (to be subtracted) for fetching the
+ * value returned for code points highStart..U+10FFFF.
+ * @internal
+ */
+ private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2;
+
+ // ucptrie_impl.h
+
+ /** The length of the BMP index table. 1024=0x400 */
+ private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT;
+
+ static final int SMALL_LIMIT = 0x1000;
+ private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT;
+
+ /** Shift size for getting the index-3 table offset. */
+ static final int SHIFT_3 = 4;
+
+ /** Shift size for getting the index-2 table offset. */
+ private static final int SHIFT_2 = 5 + SHIFT_3;
+
+ /** Shift size for getting the index-1 table offset. */
+ private static final int SHIFT_1 = 5 + SHIFT_2;
+
+ /**
+ * Difference between two shift sizes,
+ * for getting an index-2 offset from an index-3 offset. 5=9-4
+ */
+ static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3;
+
+ /**
+ * Difference between two shift sizes,
+ * for getting an index-1 offset from an index-2 offset. 5=14-9
+ */
+ static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2;
+
+ /**
+ * Number of index-1 entries for the BMP. (4)
+ * This part of the index-1 table is omitted from the serialized form.
+ */
+ private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;
+
+ /** Number of entries in an index-2 block. 32=0x20 */
+ static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;
+
+ /** Mask for getting the lower bits for the in-index-2-block offset. */
+ static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;
+
+ /** Number of code points per index-2 table entry. 512=0x200 */
+ static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2;
+
+ /** Number of entries in an index-3 block. 32=0x20 */
+ static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3;
+
+ /** Mask for getting the lower bits for the in-index-3-block offset. */
+ private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1;
+
+ /** Number of entries in a small data block. 16=0x10 */
+ static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3;
+
+ /** Mask for getting the lower bits for the in-small-data-block offset. */
+ static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1;
+
+ // ucptrie_impl.h: Constants for use with UCPTrieHeader.options.
+ private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000;
+ private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00;
+ private static final int OPTIONS_RESERVED_MASK = 0x38;
+ private static final int OPTIONS_VALUE_BITS_MASK = 7;
+ /**
+ * Value for index3NullOffset which indicates that there is no index-3 null block.
+ * Bit 15 is unused for this value because this bit is used if the index-3 contains
+ * 18-bit indexes.
+ */
+ static final int NO_INDEX3_NULL_OFFSET = 0x7fff;
+ static final int NO_DATA_NULL_OFFSET = 0xfffff;
+
+ private static abstract class Data {
+ abstract ValueWidth getValueWidth();
+ abstract int getDataLength();
+ abstract int getFromIndex(int index);
+ abstract int write(DataOutputStream dos) throws IOException;
+ }
+
+ private static final class Data16 extends Data {
+ char[] array;
+ Data16(char[] a) { array = a; }
+ @Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; }
+ @Override int getDataLength() { return array.length; }
+ @Override int getFromIndex(int index) { return array[index]; }
+ @Override int write(DataOutputStream dos) throws IOException {
+ for (char v : array) { dos.writeChar(v); }
+ return array.length * 2;
+ }
+ }
+
+ private static final class Data32 extends Data {
+ int[] array;
+ Data32(int[] a) { array = a; }
+ @Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; }
+ @Override int getDataLength() { return array.length; }
+ @Override int getFromIndex(int index) { return array[index]; }
+ @Override int write(DataOutputStream dos) throws IOException {
+ for (int v : array) { dos.writeInt(v); }
+ return array.length * 4;
+ }
+ }
+
+ private static final class Data8 extends Data {
+ byte[] array;
+ Data8(byte[] a) { array = a; }
+ @Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; }
+ @Override int getDataLength() { return array.length; }
+ @Override int getFromIndex(int index) { return array[index] & 0xff; }
+ @Override int write(DataOutputStream dos) throws IOException {
+ for (byte v : array) { dos.writeByte(v); }
+ return array.length;
+ }
+ }
+
+ /** @internal */
+ private final int[] ascii;
+
+ /** @internal */
+ private final char[] index;
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected final Data data;
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected final int dataLength;
+ /**
+ * Start of the last range which ends at U+10FFFF.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected final int highStart;
+
+ /**
+ * Internal index-3 null block offset.
+ * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
+ * @internal
+ */
+ private final int index3NullOffset;
+ /**
+ * Internal data null block offset, not shifted.
+ * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
+ * @internal
+ */
+ private final int dataNullOffset;
+ /** @internal */
+ private final int nullValue;
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected final int fastIndex(int c) {
+ return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK);
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected final int smallIndex(Type type, int c) {
+ // Split into two methods to make this part inline-friendly.
+ // In C, this part is a macro.
+ if (c >= highStart) {
+ return dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
+ }
+ return internalSmallIndex(type, c);
+ }
+
+ private final int internalSmallIndex(Type type, int c) {
+ int i1 = c >> SHIFT_1;
+ if (type == Type.FAST) {
+ assert(0xffff < c && c < highStart);
+ i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
+ } else {
+ assert(0 <= c && c < highStart && highStart > SMALL_LIMIT);
+ i1 += SMALL_INDEX_LENGTH;
+ }
+ int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
+ int i3 = (c >> SHIFT_3) & INDEX_3_MASK;
+ int dataBlock;
+ if ((i3Block & 0x8000) == 0) {
+ // 16-bit indexes
+ dataBlock = index[i3Block + i3];
+ } else {
+ // 18-bit indexes stored in groups of 9 entries per 8 indexes.
+ i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
+ i3 &= 7;
+ dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000;
+ dataBlock |= index[i3Block + i3];
+ }
+ return dataBlock + (c & SMALL_DATA_MASK);
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected abstract int cpIndex(int c);
+
+ /**
+ * A CodePointTrie with {@value Type#FAST}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static abstract class Fast extends CodePointTrie {
+ private Fast(char[] index, Data data, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, data, highStart, index3NullOffset, dataNullOffset);
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#FAST}.
+ *
+ * @param valueWidth selects the number of bits in a data value; this method throws an exception
+ * if the valueWidth does not match the binary data;
+ * use null to accept any data value width
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
+ return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes);
+ }
+
+ /**
+ * @return {@value Type#FAST}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final Type getType() { return Type.FAST; }
+
+ /**
+ * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
+ * Can be used to look up a value for a UTF-16 code unit if other parts of
+ * the string processing check for surrogates.
+ *
+ * @param c the input code point, must be U+0000..U+FFFF
+ * @return The BMP code point's trie value.
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public abstract int bmpGet(int c);
+
+ /**
+ * Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
+ * without range checking.
+ *
+ * @param c the input code point, must be U+10000..U+10FFFF
+ * @return The supplementary code point's trie value.
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public abstract int suppGet(int c);
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ @Override
+ protected final int cpIndex(int c) {
+ if (c >= 0) {
+ if (c <= 0xffff) {
+ return fastIndex(c);
+ } else if (c <= 0x10ffff) {
+ return smallIndex(Type.FAST, c);
+ }
+ }
+ return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final StringIterator stringIterator(CharSequence s, int sIndex) {
+ return new FastStringIterator(s, sIndex);
+ }
+
+ private final class FastStringIterator extends StringIterator {
+ private FastStringIterator(CharSequence s, int sIndex) {
+ super(s, sIndex);
+ }
+
+ @Override
+ public boolean next() {
+ if (sIndex >= s.length()) {
+ return false;
+ }
+ char lead = s.charAt(sIndex++);
+ c = lead;
+ int dataIndex;
+ if (!Character.isSurrogate(lead)) {
+ dataIndex = fastIndex(c);
+ } else {
+ char trail;
+ if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
+ Character.isLowSurrogate(trail = s.charAt(sIndex))) {
+ ++sIndex;
+ c = Character.toCodePoint(lead, trail);
+ dataIndex = smallIndex(Type.FAST, c);
+ } else {
+ dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
+ }
+ }
+ value = data.getFromIndex(dataIndex);
+ return true;
+ }
+
+ @Override
+ public boolean previous() {
+ if (sIndex <= 0) {
+ return false;
+ }
+ char trail = s.charAt(--sIndex);
+ c = trail;
+ int dataIndex;
+ if (!Character.isSurrogate(trail)) {
+ dataIndex = fastIndex(c);
+ } else {
+ char lead;
+ if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
+ Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
+ --sIndex;
+ c = Character.toCodePoint(lead, trail);
+ dataIndex = smallIndex(Type.FAST, c);
+ } else {
+ dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
+ }
+ }
+ value = data.getFromIndex(dataIndex);
+ return true;
+ }
+ }
+ }
+
+ /**
+ * A CodePointTrie with {@value Type#SMALL}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static abstract class Small extends CodePointTrie {
+ private Small(char[] index, Data data, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, data, highStart, index3NullOffset, dataNullOffset);
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#SMALL}.
+ *
+ * @param valueWidth selects the number of bits in a data value; this method throws an exception
+ * if the valueWidth does not match the binary data;
+ * use null to accept any data value width
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
+ return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes);
+ }
+
+ /**
+ * @return {@value Type#SMALL}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final Type getType() { return Type.SMALL; }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ @Override
+ protected final int cpIndex(int c) {
+ if (c >= 0) {
+ if (c <= SMALL_MAX) {
+ return fastIndex(c);
+ } else if (c <= 0x10ffff) {
+ return smallIndex(Type.SMALL, c);
+ }
+ }
+ return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final StringIterator stringIterator(CharSequence s, int sIndex) {
+ return new SmallStringIterator(s, sIndex);
+ }
+
+ private final class SmallStringIterator extends StringIterator {
+ private SmallStringIterator(CharSequence s, int sIndex) {
+ super(s, sIndex);
+ }
+
+ @Override
+ public boolean next() {
+ if (sIndex >= s.length()) {
+ return false;
+ }
+ char lead = s.charAt(sIndex++);
+ c = lead;
+ int dataIndex;
+ if (!Character.isSurrogate(lead)) {
+ dataIndex = cpIndex(c);
+ } else {
+ char trail;
+ if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
+ Character.isLowSurrogate(trail = s.charAt(sIndex))) {
+ ++sIndex;
+ c = Character.toCodePoint(lead, trail);
+ dataIndex = smallIndex(Type.SMALL, c);
+ } else {
+ dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
+ }
+ }
+ value = data.getFromIndex(dataIndex);
+ return true;
+ }
+
+ @Override
+ public boolean previous() {
+ if (sIndex <= 0) {
+ return false;
+ }
+ char trail = s.charAt(--sIndex);
+ c = trail;
+ int dataIndex;
+ if (!Character.isSurrogate(trail)) {
+ dataIndex = cpIndex(c);
+ } else {
+ char lead;
+ if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
+ Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
+ --sIndex;
+ c = Character.toCodePoint(lead, trail);
+ dataIndex = smallIndex(Type.SMALL, c);
+ } else {
+ dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
+ }
+ }
+ value = data.getFromIndex(dataIndex);
+ return true;
+ }
+ }
+ }
+
+ /**
+ * A CodePointTrie with {@value Type#FAST} and {@value ValueWidth#BITS_16}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Fast16 extends Fast {
+ private final char[] dataArray;
+
+ Fast16(char[] index, char[] data16, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
+ this.dataArray = data16;
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#FAST} and {@value ValueWidth#BITS_16}.
+ *
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Fast16 fromBinary(ByteBuffer bytes) {
+ return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes);
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int get(int c) {
+ return dataArray[cpIndex(c)];
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int bmpGet(int c) {
+ assert 0 <= c && c <= 0xffff;
+ return dataArray[fastIndex(c)];
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int suppGet(int c) {
+ assert 0x10000 <= c && c <= 0x10ffff;
+ return dataArray[smallIndex(Type.FAST, c)];
+ }
+ }
+
+ /**
+ * A CodePointTrie with {@value Type#FAST} and {@value ValueWidth#BITS_32}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Fast32 extends Fast {
+ private final int[] dataArray;
+
+ Fast32(char[] index, int[] data32, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
+ this.dataArray = data32;
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#FAST} and {@value ValueWidth#BITS_32}.
+ *
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Fast32 fromBinary(ByteBuffer bytes) {
+ return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes);
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int get(int c) {
+ return dataArray[cpIndex(c)];
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int bmpGet(int c) {
+ assert 0 <= c && c <= 0xffff;
+ return dataArray[fastIndex(c)];
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int suppGet(int c) {
+ assert 0x10000 <= c && c <= 0x10ffff;
+ return dataArray[smallIndex(Type.FAST, c)];
+ }
+ }
+
+ /**
+ * A CodePointTrie with {@value Type#FAST} and {@value ValueWidth#BITS_8}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Fast8 extends Fast {
+ private final byte[] dataArray;
+
+ Fast8(char[] index, byte[] data8, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
+ this.dataArray = data8;
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#FAST} and {@value ValueWidth#BITS_8}.
+ *
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Fast8 fromBinary(ByteBuffer bytes) {
+ return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes);
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int get(int c) {
+ return dataArray[cpIndex(c)] & 0xff;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int bmpGet(int c) {
+ assert 0 <= c && c <= 0xffff;
+ return dataArray[fastIndex(c)] & 0xff;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public final int suppGet(int c) {
+ assert 0x10000 <= c && c <= 0x10ffff;
+ return dataArray[smallIndex(Type.FAST, c)] & 0xff;
+ }
+ }
+
+ /**
+ * A CodePointTrie with {@value Type#SMALL} and {@value ValueWidth#BITS_16}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Small16 extends Small {
+ Small16(char[] index, char[] data16, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#SMALL} and {@value ValueWidth#BITS_16}.
+ *
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Small16 fromBinary(ByteBuffer bytes) {
+ return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes);
+ }
+ }
+
+ /**
+ * A CodePointTrie with {@value Type#SMALL} and {@value ValueWidth#BITS_32}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Small32 extends Small {
+ Small32(char[] index, int[] data32, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#SMALL} and {@value ValueWidth#BITS_32}.
+ *
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Small32 fromBinary(ByteBuffer bytes) {
+ return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes);
+ }
+ }
+
+ /**
+ * A CodePointTrie with {@value Type#SMALL} and {@value ValueWidth#BITS_8}.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Small8 extends Small {
+ Small8(char[] index, byte[] data8, int highStart,
+ int index3NullOffset, int dataNullOffset) {
+ super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
+ }
+
+ /**
+ * Creates a trie from its binary form.
+ * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
+ * with {@value Type#SMALL} and {@value ValueWidth#BITS_8}.
+ *
+ * @param bytes a buffer containing the binary data of a CodePointTrie
+ * @return the trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Small8 fromBinary(ByteBuffer bytes) {
+ return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes);
+ }
+ }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/MutableCodePointTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/util/MutableCodePointTrie.java
new file mode 100644
index 00000000000..c5497818182
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/MutableCodePointTrie.java
@@ -0,0 +1,1289 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// created: 2018may04 Markus W. Scherer
+
+package com.ibm.icu.util;
+
+import java.util.Arrays;
+
+/**
+ * Mutable Unicode code point trie.
+ * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values.
+ * For details see http://site.icu-project.org/design/struct/utrie
+ *
+ * Setting values (especially ranges) and lookup is fast.
+ * The mutable trie is only somewhat space-efficient.
+ * It builds a compacted, immutable {@link CodePointTrie}.
+ *
+ * This trie can be modified while iterating over its contents.
+ * For example, it is possible to merge its values with those from another
+ * set of ranges (e.g., another @{link CodePointMap}):
+ * Iterate over those source ranges; for each of them iterate over this trie;
+ * add the source value into the value of each trie range.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+public final class MutableCodePointTrie extends CodePointMap implements Cloneable {
+ /**
+ * Constructs a mutable trie that initially maps each Unicode code point to the same value.
+ * It uses 32-bit data values until
+ * {@link #buildImmutable(com.ibm.icu.util.CodePointTrie.Type, com.ibm.icu.util.CodePointTrie.ValueWidth)}
+ * is called.
+ * buildImmutable() takes a valueWidth parameter which
+ * determines the number of bits in the data value in the resulting {@link CodePointTrie}.
+ *
+ * @param initialValue the initial value that is set for all code points
+ * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public MutableCodePointTrie(int initialValue, int errorValue) {
+ index = new int[BMP_I_LIMIT];
+ index3NullOffset = -1;
+ data = new int[INITIAL_DATA_LENGTH];
+ dataNullOffset = -1;
+ origInitialValue = initialValue;
+ this.initialValue = initialValue;
+ this.errorValue = errorValue;
+ highValue = initialValue;
+ }
+
+ /**
+ * Clones this mutable trie.
+ *
+ * @return the clone
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public MutableCodePointTrie clone() {
+ try {
+ MutableCodePointTrie builder = (MutableCodePointTrie) super.clone();
+ int iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
+ builder.index = new int[iCapacity];
+ builder.flags = new byte[UNICODE_LIMIT >> CodePointTrie.SHIFT_3];
+ for (int i = 0, iLimit = highStart >> CodePointTrie.SHIFT_3; i < iLimit; ++i) {
+ builder.index[i] = index[i];
+ builder.flags[i] = flags[i];
+ }
+ builder.index3NullOffset = index3NullOffset;
+ builder.data = data.clone();
+ builder.dataLength = dataLength;
+ builder.dataNullOffset = dataNullOffset;
+ builder.origInitialValue = origInitialValue;
+ builder.initialValue = initialValue;
+ builder.errorValue = errorValue;
+ builder.highStart = highStart;
+ builder.highValue = highValue;
+ assert index16 == null;
+ return builder;
+ } catch (CloneNotSupportedException ignored) {
+ // Unreachable: Cloning *is* supported.
+ return null;
+ }
+ }
+
+ /**
+ * Creates a mutable trie with the same contents as the {@link CodePointMap}.
+ *
+ * @param map the source map or trie
+ * @return the mutable trie
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static MutableCodePointTrie fromCodePointMap(CodePointMap map) {
+ // TODO: Consider special code branch for map instanceof CodePointTrie?
+ // Use the highValue as the initialValue to reduce the highStart.
+ int errorValue = map.get(-1);
+ int initialValue = map.get(MAX_UNICODE);
+ MutableCodePointTrie mutableTrie = new MutableCodePointTrie(initialValue, errorValue);
+ CodePointMap.Range range = new CodePointMap.Range();
+ int start = 0;
+ while (map.getRange(start, null, range)) {
+ int end = range.getEnd();
+ int value = range.getValue();
+ if (value != initialValue) {
+ if (start == end) {
+ mutableTrie.set(start, value);
+ } else {
+ mutableTrie.setRange(start, end, value);
+ }
+ }
+ start = end + 1;
+ }
+ return mutableTrie;
+ }
+
+ private void clear() {
+ index3NullOffset = dataNullOffset = -1;
+ dataLength = 0;
+ highValue = initialValue = origInitialValue;
+ highStart = 0;
+ index16 = null;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public int get(int c) {
+ if (c < 0 || MAX_UNICODE < c) {
+ return errorValue;
+ }
+ if (c >= highStart) {
+ return highValue;
+ }
+ int i = c >> CodePointTrie.SHIFT_3;
+ if (flags[i] == ALL_SAME) {
+ return index[i];
+ } else {
+ return data[index[i] + (c & CodePointTrie.SMALL_DATA_MASK)];
+ }
+ }
+
+ private static final int maybeFilterValue(int value, int initialValue, int nullValue,
+ ValueFilter filter) {
+ if (value == initialValue) {
+ value = nullValue;
+ } else if (filter != null) {
+ value = filter.apply(value);
+ }
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public boolean getRange(int start, CodePointTrie.ValueFilter filter,
+ CodePointTrie.Range range) {
+ if (start < 0 || MAX_UNICODE < start) {
+ return false;
+ }
+ if (start >= highStart) {
+ int value = highValue;
+ if (filter != null) { value = filter.apply(value); }
+ range.set(start, MAX_UNICODE, value);
+ return true;
+ }
+ int nullValue = initialValue;
+ if (filter != null) { nullValue = filter.apply(nullValue); }
+ int c = start;
+ int value = 0; // Initialize to make compiler happy. Real value when haveValue is true.
+ boolean haveValue = false;
+ int i = c >> CodePointTrie.SHIFT_3;
+ do {
+ if (flags[i] == ALL_SAME) {
+ int value2 = maybeFilterValue(index[i], initialValue, nullValue, filter);
+ if (haveValue) {
+ if (value2 != value) {
+ range.set(start, c - 1, value);
+ return true;
+ }
+ } else {
+ value = value2;
+ haveValue = true;
+ }
+ c = (c + CodePointTrie.SMALL_DATA_BLOCK_LENGTH) & ~CodePointTrie.SMALL_DATA_MASK;
+ } else /* MIXED */ {
+ int di = index[i] + (c & CodePointTrie.SMALL_DATA_MASK);
+ int value2 = maybeFilterValue(data[di], initialValue, nullValue, filter);
+ if (haveValue) {
+ if (value2 != value) {
+ range.set(start, c - 1, value);
+ return true;
+ }
+ } else {
+ value = value2;
+ haveValue = true;
+ }
+ while ((++c & CodePointTrie.SMALL_DATA_MASK) != 0) {
+ if (maybeFilterValue(data[++di], initialValue, nullValue,
+ filter) != value) {
+ range.set(start, c - 1, value);
+ return true;
+ }
+ }
+ }
+ ++i;
+ } while (c < highStart);
+ assert(haveValue);
+ if (maybeFilterValue(highValue, initialValue, nullValue, filter) != value) {
+ range.set(start, c - 1, value);
+ } else {
+ range.set(start, MAX_UNICODE, value);
+ }
+ return true;
+ }
+
+ private void writeBlock(int block, int value) {
+ int limit = block + CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
+ Arrays.fill(data, block, limit, value);
+ }
+
+ /**
+ * Sets a value for a code point.
+ *
+ * @param c the code point
+ * @param value the value
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void set(int c, int value) {
+ if (c < 0 || MAX_UNICODE < c) {
+ throw new IllegalArgumentException("invalid code point");
+ }
+
+ ensureHighStart(c);
+ int block = getDataBlock(c >> CodePointTrie.SHIFT_3);
+ data[block + (c & CodePointTrie.SMALL_DATA_MASK)] = value;
+ }
+
+ private void fillBlock(int block, int start, int limit, int value) {
+ Arrays.fill(data, block + start, block + limit, value);
+ }
+
+ /**
+ * Sets a value for each code point [start..end].
+ * Faster and more space-efficient than setting the value for each code point separately.
+ *
+ * @param start the first code point to get the value
+ * @param end the last code point to get the value (inclusive)
+ * @param value the value
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void setRange(int start, int end, int value) {
+ if (start < 0 || MAX_UNICODE < start || end < 0 || MAX_UNICODE < end || start > end) {
+ throw new IllegalArgumentException("invalid code point range");
+ }
+ ensureHighStart(end);
+
+ int limit = end + 1;
+ if ((start & CodePointTrie.SMALL_DATA_MASK) != 0) {
+ // Set partial block at [start..following block boundary[.
+ int block = getDataBlock(start >> CodePointTrie.SHIFT_3);
+ int nextStart = (start + CodePointTrie.SMALL_DATA_MASK) & ~CodePointTrie.SMALL_DATA_MASK;
+ if (nextStart <= limit) {
+ fillBlock(block, start & CodePointTrie.SMALL_DATA_MASK,
+ CodePointTrie.SMALL_DATA_BLOCK_LENGTH, value);
+ start = nextStart;
+ } else {
+ fillBlock(block, start & CodePointTrie.SMALL_DATA_MASK,
+ limit & CodePointTrie.SMALL_DATA_MASK, value);
+ return;
+ }
+ }
+
+ // Number of positions in the last, partial block.
+ int rest = limit & CodePointTrie.SMALL_DATA_MASK;
+
+ // Round down limit to a block boundary.
+ limit &= ~CodePointTrie.SMALL_DATA_MASK;
+
+ // Iterate over all-value blocks.
+ while (start < limit) {
+ int i = start >> CodePointTrie.SHIFT_3;
+ if (flags[i] == ALL_SAME) {
+ index[i] = value;
+ } else /* MIXED */ {
+ fillBlock(index[i], 0, CodePointTrie.SMALL_DATA_BLOCK_LENGTH, value);
+ }
+ start += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
+ }
+
+ if (rest > 0) {
+ // Set partial block at [last block boundary..limit[.
+ int block = getDataBlock(start >> CodePointTrie.SHIFT_3);
+ fillBlock(block, 0, rest, value);
+ }
+ }
+
+ /**
+ * Compacts the data and builds an immutable {@link CodePointTrie} according to the parameters.
+ * After this, the mutable trie will be empty.
+ *
+ * Not every possible set of mappings can be built into a CodePointTrie,
+ * because of limitations resulting from speed and space optimizations.
+ * Every Unicode assigned character can be mapped to a unique value.
+ * Typical data yields data structures far smaller than the limitations.
+ *
+ * It is possible to construct extremely unusual mappings that exceed the
+ * data structure limits.
+ * In such a case this function will throw an exception.
+ *
+ * @param type selects the trie type
+ * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits,
+ * then the values stored in the trie will be truncated first
+ *
+ * @see #fromCodePointMap(CodePointMap)
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CodePointTrie buildImmutable(CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth) {
+ if (type == null || valueWidth == null) {
+ throw new IllegalArgumentException("The type and valueWidth must be specified.");
+ }
+
+ try {
+ return build(type, valueWidth);
+ } finally {
+ clear();
+ }
+ }
+
+ private static final int MAX_UNICODE = 0x10ffff;
+
+ private static final int UNICODE_LIMIT = 0x110000;
+ private static final int BMP_LIMIT = 0x10000;
+ private static final int ASCII_LIMIT = 0x80;
+
+ private static final int I_LIMIT = UNICODE_LIMIT >> CodePointTrie.SHIFT_3;
+ private static final int BMP_I_LIMIT = BMP_LIMIT >> CodePointTrie.SHIFT_3;
+ private static final int ASCII_I_LIMIT = ASCII_LIMIT >> CodePointTrie.SHIFT_3;
+
+ private static final int SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (CodePointTrie.FAST_SHIFT - CodePointTrie.SHIFT_3));
+
+ // Flag values for data blocks.
+ private static final byte ALL_SAME = 0;
+ private static final byte MIXED = 1;
+ private static final byte SAME_AS = 2;
+
+ /** Start with allocation of 16k data entries. */
+ private static final int INITIAL_DATA_LENGTH = (1 << 14);
+
+ /** Grow about 8x each time. */
+ private static final int MEDIUM_DATA_LENGTH = (1 << 17);
+
+ /**
+ * Maximum length of the build-time data array.
+ * One entry per 0x110000 code points.
+ */
+ private static final int MAX_DATA_LENGTH = UNICODE_LIMIT;
+
+ // Flag values for index-3 blocks while compacting/building.
+ private static final byte I3_NULL = 0;
+ private static final byte I3_BMP = 1;
+ private static final byte I3_16 = 2;
+ private static final byte I3_18 = 3;
+
+ private static final int INDEX_3_18BIT_BLOCK_LENGTH = CodePointTrie.INDEX_3_BLOCK_LENGTH + CodePointTrie.INDEX_3_BLOCK_LENGTH / 8;
+
+ private int[] index;
+ private int index3NullOffset;
+ private int[] data;
+ private int dataLength;
+ private int dataNullOffset;
+
+ private int origInitialValue;
+ private int initialValue;
+ private int errorValue;
+ private int highStart;
+ private int highValue;
+
+ /** Temporary array while building the final data. */
+ private char[] index16;
+ private byte[] flags = new byte[UNICODE_LIMIT >> CodePointTrie.SHIFT_3];
+
+ private void ensureHighStart(int c) {
+ if (c >= highStart) {
+ // Round up to a CodePointTrie.CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
+ c = (c + CodePointTrie.CP_PER_INDEX_2_ENTRY) & ~(CodePointTrie.CP_PER_INDEX_2_ENTRY - 1);
+ int i = highStart >> CodePointTrie.SHIFT_3;
+ int iLimit = c >> CodePointTrie.SHIFT_3;
+ if (iLimit > index.length) {
+ int[] newIndex = new int[I_LIMIT];
+ for (int j = 0; j < i; ++j) { newIndex[j] = index[j]; }
+ index = newIndex;
+ }
+ do {
+ flags[i] = ALL_SAME;
+ index[i] = initialValue;
+ } while(++i < iLimit);
+ highStart = c;
+ }
+ }
+
+ private int allocDataBlock(int blockLength) {
+ int newBlock = dataLength;
+ int newTop = newBlock + blockLength;
+ if (newTop > data.length) {
+ int capacity;
+ if (data.length < MEDIUM_DATA_LENGTH) {
+ capacity = MEDIUM_DATA_LENGTH;
+ } else if (data.length < MAX_DATA_LENGTH) {
+ capacity = MAX_DATA_LENGTH;
+ } else {
+ // Should never occur.
+ // Either MAX_DATA_LENGTH is incorrect,
+ // or the code writes more values than should be possible.
+ throw new AssertionError();
+ }
+ int[] newData = new int[capacity];
+ for (int j = 0; j < dataLength; ++j) { newData[j] = data[j]; }
+ data = newData;
+ }
+ dataLength = newTop;
+ return newBlock;
+ }
+
+ /**
+ * No error checking for illegal arguments.
+ * The Java version always returns non-negative values.
+ */
+ private int getDataBlock(int i) {
+ if (flags[i] == MIXED) {
+ return index[i];
+ }
+ if (i < BMP_I_LIMIT) {
+ int newBlock = allocDataBlock(CodePointTrie.FAST_DATA_BLOCK_LENGTH);
+ int iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
+ int iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ do {
+ assert(flags[iStart] == ALL_SAME);
+ writeBlock(newBlock, index[iStart]);
+ flags[iStart] = MIXED;
+ index[iStart++] = newBlock;
+ newBlock += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
+ } while (iStart < iLimit);
+ return index[i];
+ } else {
+ int newBlock = allocDataBlock(CodePointTrie.SMALL_DATA_BLOCK_LENGTH);
+ if (newBlock < 0) { return newBlock; }
+ writeBlock(newBlock, index[i]);
+ flags[i] = MIXED;
+ index[i] = newBlock;
+ return newBlock;
+ }
+ }
+
+ // compaction --------------------------------------------------------------
+
+ private void maskValues(int mask) {
+ initialValue &= mask;
+ errorValue &= mask;
+ highValue &= mask;
+ int iLimit = highStart >> CodePointTrie.SHIFT_3;
+ for (int i = 0; i < iLimit; ++i) {
+ if (flags[i] == ALL_SAME) {
+ index[i] &= mask;
+ }
+ }
+ for (int i = 0; i < dataLength; ++i) {
+ data[i] &= mask;
+ }
+ }
+
+ private static boolean equalBlocks(int[] s, int si, int[] t, int ti, int length) {
+ while (length > 0 && s[si] == t[ti]) {
+ ++si;
+ ++ti;
+ --length;
+ }
+ return length == 0;
+ }
+
+ private static boolean equalBlocks(char[] s, int si, int[] t, int ti, int length) {
+ while (length > 0 && s[si] == t[ti]) {
+ ++si;
+ ++ti;
+ --length;
+ }
+ return length == 0;
+ }
+
+ private static boolean equalBlocks(char[] s, int si, char[] t, int ti, int length) {
+ while (length > 0 && s[si] == t[ti]) {
+ ++si;
+ ++ti;
+ --length;
+ }
+ return length == 0;
+ }
+
+ private static boolean allValuesSameAs(int[] p, int pi, int length, int value) {
+ int pLimit = pi + length;
+ while (pi < pLimit && p[pi] == value) { ++pi; }
+ return pi == pLimit;
+ }
+
+ /** Search for an identical block. */
+ private static int findSameBlock(int[] p, int pStart, int length,
+ int[] q, int qStart, int blockLength) {
+ // Ensure that we do not even partially get past length.
+ length -= blockLength;
+
+ while (pStart <= length) {
+ if (equalBlocks(p, pStart, q, qStart, blockLength)) {
+ return pStart;
+ }
+ ++pStart;
+ }
+ return -1;
+ }
+
+ private static int findSameBlock(char[] p, int pStart, int length,
+ int[] q, int qStart, int blockLength) {
+ // Ensure that we do not even partially get past length.
+ length -= blockLength;
+
+ while (pStart <= length) {
+ if (equalBlocks(p, pStart, q, qStart, blockLength)) {
+ return pStart;
+ }
+ ++pStart;
+ }
+ return -1;
+ }
+
+ private static int findSameBlock(char[] p, int pStart, int length,
+ char[] q, int qStart, int blockLength) {
+ // Ensure that we do not even partially get past length.
+ length -= blockLength;
+
+ while (pStart <= length) {
+ if (equalBlocks(p, pStart, q, qStart, blockLength)) {
+ return pStart;
+ }
+ ++pStart;
+ }
+ return -1;
+ }
+
+ private static int findAllSameBlock(int[] p, int length, int value, int blockLength) {
+ // Ensure that we do not even partially get past length.
+ length -= blockLength;
+
+ for (int block = 0; block <= length; ++block) {
+ if (p[block] == value) {
+ for (int i = 1;; ++i) {
+ if (i == blockLength) {
+ return block;
+ }
+ if (p[block + i] != value) {
+ block += i;
+ break;
+ }
+ }
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Look for maximum overlap of the beginning of the other block
+ * with the previous, adjacent block.
+ */
+ private static int getOverlap(int[] p, int length, int[] q, int qStart, int blockLength) {
+ int overlap = blockLength - 1;
+ assert(overlap <= length);
+ while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) {
+ --overlap;
+ }
+ return overlap;
+ }
+
+ private static int getOverlap(char[] p, int length, int[] q, int qStart, int blockLength) {
+ int overlap = blockLength - 1;
+ assert(overlap <= length);
+ while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) {
+ --overlap;
+ }
+ return overlap;
+ }
+
+ private static int getOverlap(char[] p, int length, char[] q, int qStart, int blockLength) {
+ int overlap = blockLength - 1;
+ assert(overlap <= length);
+ while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) {
+ --overlap;
+ }
+ return overlap;
+ }
+
+ private static int getAllSameOverlap(int[] p, int length, int value, int blockLength) {
+ int min = length - (blockLength - 1);
+ int i = length;
+ while (min < i && p[i - 1] == value) { --i; }
+ return length - i;
+ }
+
+ /**
+ * Finds the start of the last range in the trie by enumerating backward.
+ * Indexes for code points higher than this will be omitted.
+ */
+ private int findHighStart() {
+ int i = highStart >> CodePointTrie.SHIFT_3;
+ while (i > 0) {
+ boolean match;
+ if (flags[--i] == ALL_SAME) {
+ match = index[i] == highValue;
+ } else /* MIXED */ {
+ int p = index[i];
+ for (int j = 0;; ++j) {
+ if (j == CodePointTrie.SMALL_DATA_BLOCK_LENGTH) {
+ match = true;
+ break;
+ }
+ if (data[p + j] != highValue) {
+ match = false;
+ break;
+ }
+ }
+ }
+ if (!match) {
+ return (i + 1) << CodePointTrie.SHIFT_3;
+ }
+ }
+ return 0;
+ }
+
+ private static final class AllSameBlocks {
+ static final int NEW_UNIQUE = -1;
+ static final int OVERFLOW = -2;
+
+ AllSameBlocks() {
+ mostRecent = -1;
+ }
+
+ int findOrAdd(int index, int count, int value) {
+ if (mostRecent >= 0 && values[mostRecent] == value) {
+ refCounts[mostRecent] += count;
+ return indexes[mostRecent];
+ }
+ for (int i = 0; i < length; ++i) {
+ if (values[i] == value) {
+ mostRecent = i;
+ refCounts[i] += count;
+ return indexes[i];
+ }
+ }
+ if (length == CAPACITY) {
+ return OVERFLOW;
+ }
+ mostRecent = length;
+ indexes[length] = index;
+ values[length] = value;
+ refCounts[length++] = count;
+ return NEW_UNIQUE;
+ }
+
+ /** Replaces the block which has the lowest reference count. */
+ void add(int index, int count, int value) {
+ assert(length == CAPACITY);
+ int least = -1;
+ int leastCount = I_LIMIT;
+ for (int i = 0; i < length; ++i) {
+ assert(values[i] != value);
+ if (refCounts[i] < leastCount) {
+ least = i;
+ leastCount = refCounts[i];
+ }
+ }
+ assert(least >= 0);
+ mostRecent = least;
+ indexes[least] = index;
+ values[least] = value;
+ refCounts[least] = count;
+ }
+
+ int findMostUsed() {
+ if (length == 0) { return -1; }
+ int max = -1;
+ int maxCount = 0;
+ for (int i = 0; i < length; ++i) {
+ if (refCounts[i] > maxCount) {
+ max = i;
+ maxCount = refCounts[i];
+ }
+ }
+ return indexes[max];
+ }
+
+ private static final int CAPACITY = 32;
+
+ private int length;
+ private int mostRecent;
+
+ private int[] indexes = new int[CAPACITY];
+ private int[] values = new int[CAPACITY];
+ private int[] refCounts = new int[CAPACITY];
+ }
+
+ private int compactWholeDataBlocks(int fastILimit, AllSameBlocks allSameBlocks) {
+ // ASCII data will be stored as a linear table, even if the following code
+ // does not yet count it that way.
+ int newDataCapacity = ASCII_LIMIT;
+ // Add room for special values (errorValue, highValue) and padding.
+ newDataCapacity += 4;
+ int iLimit = highStart >> CodePointTrie.SHIFT_3;
+ int blockLength = CodePointTrie.FAST_DATA_BLOCK_LENGTH;
+ int inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ for (int i = 0; i < iLimit; i += inc) {
+ if (i == fastILimit) {
+ blockLength = CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
+ inc = 1;
+ }
+ int value = index[i];
+ if (flags[i] == MIXED) {
+ // Really mixed?
+ int p = value;
+ value = data[p];
+ if (allValuesSameAs(data, p + 1, blockLength - 1, value)) {
+ flags[i] = ALL_SAME;
+ index[i] = value;
+ // Fall through to ALL_SAME handling.
+ } else {
+ newDataCapacity += blockLength;
+ continue;
+ }
+ } else {
+ assert(flags[i] == ALL_SAME);
+ if (inc > 1) {
+ // Do all of the fast-range data block's ALL_SAME parts have the same value?
+ boolean allSame = true;
+ int next_i = i + inc;
+ for (int j = i + 1; j < next_i; ++j) {
+ assert(flags[j] == ALL_SAME);
+ if (index[j] != value) {
+ allSame = false;
+ break;
+ }
+ }
+ if (!allSame) {
+ // Turn it into a MIXED block.
+ if (getDataBlock(i) < 0) {
+ return -1;
+ }
+ continue;
+ }
+ }
+ }
+ // Is there another ALL_SAME block with the same value?
+ int other = allSameBlocks.findOrAdd(i, inc, value);
+ if (other == AllSameBlocks.OVERFLOW) {
+ // The fixed-size array overflowed. Slow check for a duplicate block.
+ int jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ for (int j = 0;; j += jInc) {
+ if (j == i) {
+ allSameBlocks.add(i, inc, value);
+ break;
+ }
+ if (j == fastILimit) {
+ jInc = 1;
+ }
+ if (flags[j] == ALL_SAME && index[j] == value) {
+ allSameBlocks.add(j, jInc + inc, value);
+ other = j;
+ break;
+ // We could keep counting blocks with the same value
+ // before we add the first one, which may improve compaction in rare cases,
+ // but it would make it slower.
+ }
+ }
+ }
+ if (other >= 0) {
+ flags[i] = SAME_AS;
+ index[i] = other;
+ } else {
+ // New unique same-value block.
+ newDataCapacity += blockLength;
+ }
+ }
+ return newDataCapacity;
+ }
+
+ /**
+ * Compacts a build-time trie.
+ *
+ * The compaction
+ * - removes blocks that are identical with earlier ones
+ * - overlaps each new non-duplicate block as much as possible with the previously-written one
+ * - works with fast-range data blocks whose length is a multiple of that of
+ * higher-code-point data blocks
+ *
+ * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
+ */
+ private int compactData(int fastILimit, int[] newData) {
+ // The linear ASCII data has been copied into newData already.
+ int newDataLength = 0;
+ for (int i = 0; newDataLength < ASCII_LIMIT;
+ newDataLength += CodePointTrie.FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
+ index[i] = newDataLength;
+ }
+
+ int iLimit = highStart >> CodePointTrie.SHIFT_3;
+ int blockLength = CodePointTrie.FAST_DATA_BLOCK_LENGTH;
+ int inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ for (int i = ASCII_I_LIMIT; i < iLimit; i += inc) {
+ if (i == fastILimit) {
+ blockLength = CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
+ inc = 1;
+ }
+ if (flags[i] == ALL_SAME) {
+ int value = index[i];
+ int n = findAllSameBlock(newData, newDataLength, value, blockLength);
+ if (n >= 0) {
+ index[i] = n;
+ } else {
+ n = getAllSameOverlap(newData, newDataLength, value, blockLength);
+ index[i] = newDataLength - n;
+ while (n < blockLength) {
+ newData[newDataLength++] = value;
+ ++n;
+ }
+ }
+ } else if (flags[i] == MIXED) {
+ int block = index[i];
+ int n = findSameBlock(newData, 0, newDataLength, data, block, blockLength);
+ if (n >= 0) {
+ index[i] = n;
+ } else {
+ n = getOverlap(newData, newDataLength, data, block, blockLength);
+ index[i] = newDataLength - n;
+ while (n < blockLength) {
+ newData[newDataLength++] = data[block + n++];
+ }
+ }
+ } else /* SAME_AS */ {
+ int j = index[i];
+ index[i] = index[j];
+ }
+ }
+
+ return newDataLength;
+ }
+
+ private int compactIndex(int fastILimit) {
+ int fastIndexLength = fastILimit >> (CodePointTrie.FAST_SHIFT - CodePointTrie.SHIFT_3);
+ if ((highStart >> CodePointTrie.FAST_SHIFT) <= fastIndexLength) {
+ // Only the linear fast index, no multi-stage index tables.
+ index3NullOffset = CodePointTrie.NO_INDEX3_NULL_OFFSET;
+ return fastIndexLength;
+ }
+
+ // Condense the fast index table.
+ // Also, does it contain an index-3 block with all dataNullOffset?
+ char[] fastIndex = new char[fastIndexLength];
+ int i3FirstNull = -1;
+ for (int i = 0, j = 0; i < fastILimit; ++j) {
+ int i3 = index[i];
+ fastIndex[j] = (char)i3;
+ if (i3 == dataNullOffset) {
+ if (i3FirstNull < 0) {
+ i3FirstNull = j;
+ } else if (index3NullOffset < 0 &&
+ (j - i3FirstNull + 1) == CodePointTrie.INDEX_3_BLOCK_LENGTH) {
+ index3NullOffset = i3FirstNull;
+ }
+ } else {
+ i3FirstNull = -1;
+ }
+ // Set the index entries that compactData() skipped.
+ // Needed when the multi-stage index covers the fast index range as well.
+ int iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ while (++i < iNext) {
+ i3 += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
+ index[i] = i3;
+ }
+ }
+
+ // Examine index-3 blocks. For each determine one of:
+ // - same as the index-3 null block
+ // - same as a fast-index block
+ // - 16-bit indexes
+ // - 18-bit indexes
+ // We store this in the first flags entry for the index-3 block.
+ //
+ // Also determine an upper limit for the index-3 table length.
+ int index3Capacity = 0;
+ i3FirstNull = index3NullOffset;
+ // If the fast index covers the whole BMP, then
+ // the multi-stage index is only for supplementary code points.
+ // Otherwise, the multi-stage index covers all of Unicode.
+ int iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
+ int iLimit = highStart >> CodePointTrie.SHIFT_3;
+ for (int i = iStart; i < iLimit;) {
+ int j = i;
+ int jLimit = i + CodePointTrie.INDEX_3_BLOCK_LENGTH;
+ int oredI3 = 0;
+ boolean isNull = true;
+ do {
+ int i3 = index[j];
+ oredI3 |= i3;
+ if (i3 != dataNullOffset) {
+ isNull = false;
+ }
+ } while (++j < jLimit);
+ if (isNull) {
+ flags[i] = I3_NULL;
+ if (i3FirstNull < 0) {
+ if (oredI3 <= 0xffff) {
+ index3Capacity += CodePointTrie.INDEX_3_BLOCK_LENGTH;
+ } else {
+ index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+ }
+ i3FirstNull = 0;
+ }
+ } else {
+ if (oredI3 <= 0xffff) {
+ int n = findSameBlock(fastIndex, 0, fastIndexLength,
+ index, i, CodePointTrie.INDEX_3_BLOCK_LENGTH);
+ if (n >= 0) {
+ flags[i] = I3_BMP;
+ index[i] = n;
+ } else {
+ flags[i] = I3_16;
+ index3Capacity += CodePointTrie.INDEX_3_BLOCK_LENGTH;
+ }
+ } else {
+ flags[i] = I3_18;
+ index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+ }
+ }
+ i = j;
+ }
+
+ int index2Capacity = (iLimit - iStart) >> CodePointTrie.SHIFT_2_3;
+
+ // Length of the index-1 table, rounded up.
+ int index1Length = (index2Capacity + CodePointTrie.INDEX_2_MASK) >> CodePointTrie.SHIFT_1_2;
+
+ // Index table: Fast index, index-1, index-3, index-2.
+ // +1 for possible index table padding.
+ int index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
+ index16 = Arrays.copyOf(fastIndex, index16Capacity);
+
+ // Compact the index-3 table and write an uncompacted version of the index-2 table.
+ char[] index2 = new char[index2Capacity];
+ int i2Length = 0;
+ i3FirstNull = index3NullOffset;
+ int index3Start = fastIndexLength + index1Length;
+ int indexLength = index3Start;
+ for (int i = iStart; i < iLimit; i += CodePointTrie.INDEX_3_BLOCK_LENGTH) {
+ int i3;
+ byte f = flags[i];
+ if (f == I3_NULL && i3FirstNull < 0) {
+ // First index-3 null block. Write & overlap it like a normal block, then remember it.
+ f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
+ i3FirstNull = 0;
+ }
+ if (f == I3_NULL) {
+ i3 = index3NullOffset;
+ } else if (f == I3_BMP) {
+ i3 = index[i];
+ } else if (f == I3_16) {
+ int n = findSameBlock(index16, index3Start, indexLength,
+ index, i, CodePointTrie.INDEX_3_BLOCK_LENGTH);
+ if (n >= 0) {
+ i3 = n;
+ } else {
+ if (indexLength == index3Start) {
+ // No overlap at the boundary between the index-1 and index-3 tables.
+ n = 0;
+ } else {
+ n = getOverlap(index16, indexLength,
+ index, i, CodePointTrie.INDEX_3_BLOCK_LENGTH);
+ }
+ i3 = indexLength - n;
+ while (n < CodePointTrie.INDEX_3_BLOCK_LENGTH) {
+ index16[indexLength++] = (char)index[i + n++];
+ }
+ }
+ } else {
+ assert(f == I3_18);
+ // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
+ int j = i;
+ int jLimit = i + CodePointTrie.INDEX_3_BLOCK_LENGTH;
+ int k = indexLength;
+ do {
+ ++k;
+ int v = index[j++];
+ int upperBits = (v & 0x30000) >> 2;
+ index16[k++] = (char)v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 4;
+ index16[k++] = (char)v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 6;
+ index16[k++] = (char)v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 8;
+ index16[k++] = (char)v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 10;
+ index16[k++] = (char)v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 12;
+ index16[k++] = (char)v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 14;
+ index16[k++] = (char)v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 16;
+ index16[k++] = (char)v;
+ index16[k - 9] = (char)upperBits;
+ } while (j < jLimit);
+ int n = findSameBlock(index16, index3Start, indexLength,
+ index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
+ if (n >= 0) {
+ i3 = n | 0x8000;
+ } else {
+ if (indexLength == index3Start) {
+ // No overlap at the boundary between the index-1 and index-3 tables.
+ n = 0;
+ } else {
+ n = getOverlap(index16, indexLength,
+ index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
+ }
+ i3 = (indexLength - n) | 0x8000;
+ if (n > 0) {
+ int start = indexLength;
+ while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
+ index16[indexLength++] = index16[start + n++];
+ }
+ } else {
+ indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
+ }
+ }
+ }
+ if (index3NullOffset < 0 && i3FirstNull >= 0) {
+ index3NullOffset = i3;
+ }
+ // Set the index-2 table entry.
+ index2[i2Length++] = (char)i3;
+ }
+ assert(i2Length == index2Capacity);
+ assert(indexLength <= index3Start + index3Capacity);
+
+ if (index3NullOffset < 0) {
+ index3NullOffset = CodePointTrie.NO_INDEX3_NULL_OFFSET;
+ }
+ if (indexLength >= (CodePointTrie.NO_INDEX3_NULL_OFFSET + CodePointTrie.INDEX_3_BLOCK_LENGTH)) {
+ // The index-3 offsets exceed 15 bits, or
+ // the last one cannot be distinguished from the no-null-block value.
+ // TODO: review exceptions / error codes
+ throw new IndexOutOfBoundsException(
+ "The trie data exceeds limitations of the data structure.");
+ }
+
+ // Compact the index-2 table and write the index-1 table.
+ int blockLength = CodePointTrie.INDEX_2_BLOCK_LENGTH;
+ int i1 = fastIndexLength;
+ for (int i = 0; i < i2Length; i += blockLength) {
+ if ((i2Length - i) < blockLength) {
+ // highStart is inside the last index-2 block. Shorten it.
+ blockLength = i2Length - i;
+ }
+ int i2;
+ int n = findSameBlock(index16, index3Start, indexLength,
+ index2, i, blockLength);
+ if (n >= 0) {
+ i2 = n;
+ } else {
+ if (indexLength == index3Start) {
+ // No overlap at the boundary between the index-1 and index-3/2 tables.
+ n = 0;
+ } else {
+ n = getOverlap(index16, indexLength, index2, i, blockLength);
+ }
+ i2 = indexLength - n;
+ while (n < blockLength) {
+ index16[indexLength++] = index2[i + n++];
+ }
+ }
+ // Set the index-1 table entry.
+ index16[i1++] = (char)i2;
+ }
+ assert(i1 == index3Start);
+ assert(indexLength <= index16Capacity);
+
+ return indexLength;
+ }
+
+ private int compactTrie(int fastILimit) {
+ // Find the real highStart and round it up.
+ assert((highStart & (CodePointTrie.CP_PER_INDEX_2_ENTRY - 1)) == 0);
+ highValue = get(MAX_UNICODE);
+ int realHighStart = findHighStart();
+ realHighStart = (realHighStart + (CodePointTrie.CP_PER_INDEX_2_ENTRY - 1)) &
+ ~(CodePointTrie.CP_PER_INDEX_2_ENTRY - 1);
+ if (realHighStart == UNICODE_LIMIT) {
+ highValue = initialValue;
+ }
+
+ // We always store indexes and data values for the fast range.
+ // Pin highStart to the top of that range while building.
+ int fastLimit = fastILimit << CodePointTrie.SHIFT_3;
+ if (realHighStart < fastLimit) {
+ for (int i = (realHighStart >> CodePointTrie.SHIFT_3); i < fastILimit; ++i) {
+ flags[i] = ALL_SAME;
+ index[i] = highValue;
+ }
+ highStart = fastLimit;
+ } else {
+ highStart = realHighStart;
+ }
+
+ int[] asciiData = new int[ASCII_LIMIT];
+ for (int i = 0; i < ASCII_LIMIT; ++i) {
+ asciiData[i] = get(i);
+ }
+
+ // First we look for which data blocks have the same value repeated over the whole block,
+ // deduplicate such blocks, find a good null data block (for faster enumeration),
+ // and get an upper bound for the necessary data array length.
+ AllSameBlocks allSameBlocks = new AllSameBlocks();
+ int newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
+ int[] newData = Arrays.copyOf(asciiData, newDataCapacity);
+
+ int newDataLength = compactData(fastILimit, newData);
+ assert(newDataLength <= newDataCapacity);
+ data = newData;
+ dataLength = newDataLength;
+ if (dataLength > (0x3ffff + CodePointTrie.SMALL_DATA_BLOCK_LENGTH)) {
+ // The offset of the last data block is too high to be stored in the index table.
+ // TODO: review exceptions / error codes
+ throw new IndexOutOfBoundsException(
+ "The trie data exceeds limitations of the data structure.");
+ }
+
+ int dataNullIndex = allSameBlocks.findMostUsed();
+ if (dataNullIndex >= 0) {
+ dataNullOffset = index[dataNullIndex];
+ initialValue = data[dataNullOffset];
+ } else {
+ dataNullOffset = CodePointTrie.NO_DATA_NULL_OFFSET;
+ }
+
+ int indexLength = compactIndex(fastILimit);
+ highStart = realHighStart;
+ return indexLength;
+ }
+
+ private CodePointTrie build(CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth) {
+ // The mutable trie always stores 32-bit values.
+ // When we build a UCPTrie for a smaller value width, we first mask off unused bits
+ // before compacting the data.
+ switch (valueWidth) {
+ case BITS_32:
+ break;
+ case BITS_16:
+ maskValues(0xffff);
+ break;
+ case BITS_8:
+ maskValues(0xff);
+ break;
+ default:
+ // Should be unreachable.
+ throw new IllegalArgumentException();
+ }
+
+ int fastLimit = type == CodePointTrie.Type.FAST ? BMP_LIMIT : CodePointTrie.SMALL_LIMIT;
+ int indexLength = compactTrie(fastLimit >> CodePointTrie.SHIFT_3);
+
+ // Ensure data table alignment: The index length must be even for uint32_t data.
+ if (valueWidth == CodePointTrie.ValueWidth.BITS_32 && (indexLength & 1) != 0) {
+ index16[indexLength++] = 0xffee; // arbitrary value
+ }
+
+ // Make the total trie structure length a multiple of 4 bytes by padding the data table,
+ // and store special values as the last two data values.
+ int length = indexLength * 2;
+ if (valueWidth == CodePointTrie.ValueWidth.BITS_16) {
+ if (((indexLength ^ dataLength) & 1) != 0) {
+ // padding
+ data[dataLength++] = errorValue;
+ }
+ if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
+ data[dataLength++] = highValue;
+ data[dataLength++] = errorValue;
+ }
+ length += dataLength * 2;
+ } else if (valueWidth == CodePointTrie.ValueWidth.BITS_32) {
+ // 32-bit data words never need padding to a multiple of 4 bytes.
+ if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
+ if (data[dataLength - 1] != highValue) {
+ data[dataLength++] = highValue;
+ }
+ data[dataLength++] = errorValue;
+ }
+ length += dataLength * 4;
+ } else {
+ int and3 = (length + dataLength) & 3;
+ if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
+ // all set
+ } else if(and3 == 3 && data[dataLength - 1] == highValue) {
+ data[dataLength++] = errorValue;
+ } else {
+ while (and3 != 2) {
+ data[dataLength++] = highValue;
+ and3 = (and3 + 1) & 3;
+ }
+ data[dataLength++] = highValue;
+ data[dataLength++] = errorValue;
+ }
+ length += dataLength;
+ }
+ assert((length & 3) == 0);
+
+ // Fill the index and data arrays.
+ char[] trieIndex;
+ if (highStart <= fastLimit) {
+ // Condense only the fast index from the mutable-trie index.
+ trieIndex = new char[indexLength];
+ for (int i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
+ trieIndex[j] = (char)index[i];
+ }
+ } else {
+ if (indexLength == index16.length) {
+ trieIndex = index16;
+ index16 = null;
+ } else {
+ trieIndex = Arrays.copyOf(index16, indexLength);
+ }
+ }
+
+ // Write the data array.
+ switch (valueWidth) {
+ case BITS_16: {
+ // Write 16-bit data values.
+ char[] data16 = new char[dataLength];
+ for (int i = 0; i < dataLength; ++i) { data16[i] = (char)data[i]; }
+ return type == CodePointTrie.Type.FAST ?
+ new CodePointTrie.Fast16(trieIndex, data16, highStart,
+ index3NullOffset, dataNullOffset) :
+ new CodePointTrie.Small16(trieIndex, data16, highStart,
+ index3NullOffset, dataNullOffset);
+ }
+ case BITS_32: {
+ // Write 32-bit data values.
+ int[] data32 = Arrays.copyOf(data, dataLength);
+ return type == CodePointTrie.Type.FAST ?
+ new CodePointTrie.Fast32(trieIndex, data32, highStart,
+ index3NullOffset, dataNullOffset) :
+ new CodePointTrie.Small32(trieIndex, data32, highStart,
+ index3NullOffset, dataNullOffset);
+ }
+ case BITS_8: {
+ // Write 8-bit data values.
+ byte[] data8 = new byte[dataLength];
+ for (int i = 0; i < dataLength; ++i) { data8[i] = (byte)data[i]; }
+ return type == CodePointTrie.Type.FAST ?
+ new CodePointTrie.Fast8(trieIndex, data8, highStart,
+ index3NullOffset, dataNullOffset) :
+ new CodePointTrie.Small8(trieIndex, data8, highStart,
+ index3NullOffset, dataNullOffset);
+ }
+ default:
+ // Should be unreachable.
+ throw new IllegalArgumentException();
+ }
+ }
+}
diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar
old mode 100755
new mode 100644
index 93c64fdb11f..c864412f230
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:70c249360d5cc010c75203f5add8040cbcc4f33229e1d82d34b6185d69832143
-size 12510210
+oid sha256:a8be41753876c867630b4e740d692e0ae7ced119086a22cd4844ea7bf174d6f7
+size 12509408
diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar
index 8b02fe62204..67e57e3ad38 100755
--- a/icu4j/main/shared/data/icutzdata.jar
+++ b/icu4j/main/shared/data/icutzdata.jar
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:93a0bf4221a173b33aeda78f4646092caad816a6832310a89278de249ec18634
+oid sha256:55923dda88f8bf3affc2cf6d774a92a49e5fbc4be5583769bfe90fc7f319d2b1
size 92857
diff --git a/icu4j/main/shared/data/testdata.jar b/icu4j/main/shared/data/testdata.jar
old mode 100755
new mode 100644
index 96345bd896f..91438132198
--- a/icu4j/main/shared/data/testdata.jar
+++ b/icu4j/main/shared/data/testdata.jar
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:47978ca4c19730c3d4387d9058679115dbf1e21964b993a889a38680fd3dfe47
-size 813186
+oid sha256:0d399ead8487d2beff526c723212022ba354501bb3777481f16b53241d24a8d1
+size 813119
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java
index 0e7f9aef1b3..26c4191fd22 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java
@@ -2632,9 +2632,14 @@ public class BasicTest extends TestFmwk {
@Test
public void TestCustomComp() {
String [][] pairs={
- { "\\uD801\\uE000\\uDFFE", "" },
- { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
- { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
+ // ICU 63 normalization with CodePointTrie requires inert surrogate code points.
+ // { "\\uD801\\uE000\\uDFFE", "" },
+ // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
+ // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
+ { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" },
+ { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" },
+ { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" },
+
{ "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" },
{ "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" },
{ "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" },
@@ -2661,9 +2666,14 @@ public class BasicTest extends TestFmwk {
@Test
public void TestCustomFCC() {
String[][] pairs={
- { "\\uD801\\uE000\\uDFFE", "" },
- { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
- { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
+ // ICU 63 normalization with CodePointTrie requires inert surrogate code points.
+ // { "\\uD801\\uE000\\uDFFE", "" },
+ // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
+ // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
+ { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" },
+ { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" },
+ { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" },
+
// The following expected result is different from CustomComp
// because of only-contiguous composition.
{ "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" },
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CodePointTrieTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CodePointTrieTest.java
new file mode 100644
index 00000000000..819800ad71f
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CodePointTrieTest.java
@@ -0,0 +1,985 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// created: 2018jul10 Markus W. Scherer
+
+// This is a fairly straight port from cintltst/ucptrietest.c.
+// It wants to remain close to the C code, rather than be completely colloquial Java.
+
+package com.ibm.icu.dev.test.util;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
+import com.ibm.icu.util.CodePointMap;
+import com.ibm.icu.util.CodePointTrie;
+import com.ibm.icu.util.MutableCodePointTrie;
+
+@RunWith(JUnit4.class)
+public final class CodePointTrieTest extends TestFmwk {
+ /* Values for setting possibly overlapping, out-of-order ranges of values */
+ private static class SetRange {
+ SetRange(int start, int limit, int value) {
+ this.start = start;
+ this.limit = limit;
+ this.value = value;
+ }
+
+ final int start, limit;
+ final int value;
+ }
+
+ // Returned from getSpecialValues(). Values extracted from an array of CheckRange.
+ private static class SpecialValues {
+ SpecialValues(int i, int initialValue, int errorValue) {
+ this.i = i;
+ this.initialValue = initialValue;
+ this.errorValue = errorValue;
+ }
+
+ final int i;
+ final int initialValue;
+ final int errorValue;
+ }
+
+ /*
+ * Values for testing:
+ * value is set from the previous boundary's limit to before
+ * this boundary's limit
+ *
+ * There must be an entry with limit 0 and the intialValue.
+ * It may be preceded by an entry with negative limit and the errorValue.
+ */
+ private static class CheckRange {
+ CheckRange(int limit, int value) {
+ this.limit = limit;
+ this.value = value;
+ }
+
+ final int limit;
+ final int value;
+ }
+
+ private static int skipSpecialValues(CheckRange checkRanges[]) {
+ int i;
+ for(i=0; i
+ * void onString(CodePointMap map, CharSequence s, int start) {
+ * CodePointMap.StringIterator iter = map.stringIterator(s, start);
+ * while (iter.next()) {
+ * int end = iter.getIndex(); // code point from between start and end
+ * useValue(s, start, end, iter.getCodePoint(), iter.getValue());
+ * start = end;
+ * }
+ * }
+ *
+ *
+ *
+ * int start = 0;
+ * CodePointMap.Range range = new CodePointMap.Range();
+ * while (trie.getRange(start, null, range)) {
+ * int end = range.getEnd();
+ * int value = range.getValue();
+ * // Work with the range start..end and its value.
+ * start = end + 1;
+ * }
+ *
+ *
+ * @param start range start
+ * @param filter an object that may modify the trie data value,
+ * or null if the values from the trie are to be used unmodified
+ * @param range the range object that will be set to the code point range and value
+ * @return true if start is 0..U+10FFFF; otherwise no new range is fetched
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ public abstract boolean getRange(int start, ValueFilter filter, Range range);
+
+ /**
+ * Sets the range object to a range of code points beginning with the start parameter.
+ * The range end is the the last code point such that
+ * all those from start to there have the same value.
+ * Returns false if start is not 0..U+10FFFF.
+ *
+ *