ICU-13530 fix bugs, add tests, clarify docs (#83)

* ICU-13530 test & fix cptrie.getRange() when small null data block matches the start of a non-null fast data block

* ICU-13530 fix test bug

* ICU-13530 test & fix bug calculating worst-case data array capacity at start of compaction

* ICU-13530 docs: clarify buildImmutable() discards upper bits, trie then returns unsigned ints; range iteration slower than if ranges were stored directly

* ICU-13530 accept feedback on docs
This commit is contained in:
Markus Scherer 2018-08-28 12:53:34 -07:00 committed by Shane Carr
parent e9e50b5aa3
commit 000c266045
No known key found for this signature in database
GPG key ID: FCED3B24AAB18B5C
9 changed files with 331 additions and 28 deletions

View file

@ -274,6 +274,11 @@ U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
* The header.dataNullOffset (split across two header fields, high bits in header.options)
* is the offset of a widely shared data block filled with one single value.
* It helps quickly skip over large ranges of data with that value.
* The builder must ensure that if the start of any data block (fast or small)
* matches the dataNullOffset, then the whole block must be filled with the null value.
* Special care must be taken if there is no fast null data block
* but a small one, which is shorter, and it matches the *start* of some fast data block.
*
* Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
* where all index entries point to the dataNullOffset.
* If there is no such data or index-3 block, then these offsets are set to

View file

@ -91,7 +91,7 @@ private:
void maskValues(uint32_t mask);
UChar32 findHighStart() const;
int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
int32_t compactData(int32_t fastILimit, uint32_t *newData);
int32_t compactData(int32_t fastILimit, uint32_t *newData, int32_t dataNullIndex);
int32_t compactIndex(int32_t fastILimit, UErrorCode &errorCode);
int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
@ -599,12 +599,12 @@ int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
return -1;
}
int32_t findAllSameBlock(const uint32_t *p, int32_t length, uint32_t value,
int32_t blockLength) {
// Ensure that we do not even partially get past length.
length -= blockLength;
int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
uint32_t value, int32_t blockLength) {
// Ensure that we do not even partially get past limit.
limit -= blockLength;
for (int32_t block = 0; block <= length; ++block) {
for (int32_t block = start; block <= limit; ++block) {
if (p[block] == value) {
for (int32_t i = 1;; ++i) {
if (i == blockLength) {
@ -665,6 +665,15 @@ int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
return length - i;
}
bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
if (index[i] == dataOffset) {
return true;
}
}
return false;
}
/**
* Finds the start of the last range in the trie by enumerating backward.
* Indexes for code points higher than this will be omitted.
@ -775,6 +784,9 @@ int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSame
// ASCII data will be stored as a linear table, even if the following code
// does not yet count it that way.
int32_t newDataCapacity = ASCII_LIMIT;
// Add room for a small data null block in case it would match the start of
// a fast data block where dataNullOffset must not be set in that case.
newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
// Add room for special values (errorValue, highValue) and padding.
newDataCapacity += 4;
int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
@ -816,6 +828,7 @@ int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSame
if (getDataBlock(i) < 0) {
return -1;
}
newDataCapacity += blockLength;
continue;
}
}
@ -918,7 +931,8 @@ void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
*
* It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
*/
int32_t MutableCodePointTrie::compactData(int32_t fastILimit, uint32_t *newData) {
int32_t MutableCodePointTrie::compactData(int32_t fastILimit,
uint32_t *newData, int32_t dataNullIndex) {
#ifdef UCPTRIE_DEBUG
int32_t countSame=0, sumOverlaps=0;
bool printData = dataLength == 29088 /* line.brk */ ||
@ -941,14 +955,31 @@ int32_t MutableCodePointTrie::compactData(int32_t fastILimit, uint32_t *newData)
int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
int32_t fastLength = 0;
for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
if (i == fastILimit) {
blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
inc = 1;
fastLength = newDataLength;
}
if (flags[i] == ALL_SAME) {
uint32_t value = index[i];
int32_t n = findAllSameBlock(newData, newDataLength, value, blockLength);
int32_t n;
// Find an earlier part of the data array of length blockLength
// that is filled with this value.
// If we find a match, and the current block is the data null block,
// and it is not a fast block but matches the start of a fast block,
// then we need to continue looking.
// This is because this small block is shorter than the fast block,
// and not all of the rest of the fast block is filled with this value.
// Otherwise trie.getRange() would detect that the fast block starts at
// dataNullOffset and assume incorrectly that it is filled with the null value.
for (int32_t start = 0;
(n = findAllSameBlock(newData, start, newDataLength,
value, blockLength)) >= 0 &&
i == dataNullIndex && i >= fastILimit && n < fastLength &&
isStartOfSomeFastBlock(n, index, fastILimit);
start = n + 1) {}
if (n >= 0) {
DEBUG_DO(++countSame);
index[i] = n;
@ -1306,7 +1337,8 @@ int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorC
}
uprv_memcpy(newData, asciiData, sizeof(asciiData));
int32_t newDataLength = compactData(fastILimit, newData);
int32_t dataNullIndex = allSameBlocks.findMostUsed();
int32_t newDataLength = compactData(fastILimit, newData, dataNullIndex);
U_ASSERT(newDataLength <= newDataCapacity);
uprv_free(data);
data = newData;
@ -1318,7 +1350,6 @@ int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorC
return 0;
}
int32_t dataNullIndex = allSameBlocks.findMostUsed();
if (dataNullIndex >= 0) {
dataNullOffset = index[dataNullIndex];
#ifdef UCPTRIE_DEBUG

View file

@ -89,17 +89,19 @@ enum UCPTrieValueWidth {
*/
UCPTRIE_VALUE_BITS_ANY = -1,
/**
* 16 bits per UCPTrie data value.
* The trie stores 16 bits per data value.
* It returns them as unsigned values 0..0xffff=65535.
* @draft ICU 63
*/
UCPTRIE_VALUE_BITS_16,
/**
* 32 bits per UCPTrie data value.
* The trie stores 32 bits per data value.
* @draft ICU 63
*/
UCPTRIE_VALUE_BITS_32,
/**
* 8 bits per UCPTrie data value.
* The trie stores 8 bits per data value.
* It returns them as unsigned values 0..0xff=255.
* @draft ICU 63
*/
UCPTRIE_VALUE_BITS_8
@ -272,6 +274,8 @@ UCPTrieValueFilter(const void *context, uint32_t value);
/**
* Returns the last code point such that all those from start to there have the same value.
* Can be used to efficiently iterate over all same-value ranges in a trie.
* (This is normally faster than iterating over code points and get()ting each value,
* but much slower than a data structure that stores ranges directly.)
*
* If the UCPTrieValueFilter function pointer is not NULL, then
* the value to be delivered is passed through that function, and the return value is the end

View file

@ -129,6 +129,9 @@ umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);
/**
* Returns the last code point such that all those from start to there have the same value.
* Can be used to efficiently iterate over all same-value ranges in a trie.
* (This is normally faster than iterating over code points and get()ting each value,
* but much slower than a data structure that stores ranges directly.)
*
* The trie can be modified between calls to this function.
*
* If the UCPTrieValueFilter function pointer is not NULL, then
@ -189,6 +192,15 @@ umutablecptrie_setRange(UMutableCPTrie *trie,
* Compacts the data and builds an immutable UCPTrie according to the parameters.
* After this, the mutable trie will be empty.
*
* The mutable trie stores 32-bit values until buildImmutable() is called.
* If values shorter than 32 bits are to be stored in the immutable trie,
* then the upper bits are discarded.
* For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581,
* and the value width is 8 bits, then each of these is stored as 0x81
* and the immutable trie will return that as an unsigned value.
* (Some implementations may want to make productive temporary use of the upper bits
* until buildImmutable() discards them.)
*
* Not every possible set of mappings can be built into a UCPTrie,
* because of limitations resulting from speed and space optimizations.
* Every Unicode assigned character can be mapped to a unique value.

View file

@ -1490,6 +1490,75 @@ TrieTestGetRangesFixedSurr(void) {
umutablecptrie_close(mutableTrie);
}
static void TestSmallNullBlockMatchesFast(void) {
// The initial builder+getRange code had a bug:
// When there is no null data block in the fast-index range,
// but a fast-range data block starts with enough values to match a small data block,
// then getRange() got confused.
// The builder must prevent this.
static const SetRange setRanges[] = {
{ 0, 0x880, 1 },
// U+0880..U+088F map to initial value 0, potential match for small null data block.
{ 0x890, 0x1040, 2 },
// U+1040..U+1050 map to 0.
// First small null data block in a small-type trie.
// In a fast-type trie, it is ok to match a small null data block at U+1041
// but not at U+1040.
{ 0x1051, 0x10000, 3 },
// No fast data block (block length 64) filled with 0 regardless of trie type.
// Need more blocks filled with 0 than the largest range above,
// and need a highStart above that so that it actually counts.
{ 0x20000, 0x110000, 9 }
};
static const CheckRange checkRanges[] = {
{ 0x0880, 1 },
{ 0x0890, 0 },
{ 0x1040, 2 },
{ 0x1051, 0 },
{ 0x10000, 3 },
{ 0x20000, 0 },
{ 0x110000, 9 }
};
testTrieRanges("small0-in-fast", FALSE,
setRanges, UPRV_LENGTHOF(setRanges),
checkRanges, UPRV_LENGTHOF(checkRanges));
}
static void ShortAllSameBlocksTest(void) {
static const char *const testName = "short-all-same";
// Many all-same-value blocks but only of the small block length used in the mutable trie.
// The builder code needs to turn a group of short ALL_SAME blocks below fastLimit
// into a MIXED block, and reserve data array capacity for that.
UErrorCode errorCode = U_ZERO_ERROR;
UMutableCPTrie *mutableTrie = umutablecptrie_open(0, 0xad, &errorCode);
CheckRange checkRanges[0x101];
int32_t i;
if (U_FAILURE(errorCode)) {
log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
return;
}
for (i = 0; i < 0x1000; i += 0x10) {
uint32_t value = i >> 4;
umutablecptrie_setRange(mutableTrie, i, i + 0xf, value, &errorCode);
checkRanges[value].limit = i + 0x10;
checkRanges[value].value = value;
}
checkRanges[0x100].limit = 0x110000;
checkRanges[0x100].value = 0;
if (U_FAILURE(errorCode)) {
log_err("error: setting values into a mutable trie (%s) failed - %s\n",
testName, u_errorName(errorCode));
umutablecptrie_close(mutableTrie);
return;
}
mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE,
checkRanges, UPRV_LENGTHOF(checkRanges));
umutablecptrie_close(mutableTrie);
}
void
addUCPTrieTest(TestNode** root) {
addTest(root, &TrieTestSet1, "tsutil/ucptrietest/TrieTestSet1");
@ -1503,4 +1572,6 @@ addUCPTrieTest(TestNode** root) {
addTest(root, &ManyAllSameBlocksTest, "tsutil/ucptrietest/ManyAllSameBlocksTest");
addTest(root, &MuchDataTest, "tsutil/ucptrietest/MuchDataTest");
addTest(root, &TrieTestGetRangesFixedSurr, "tsutil/ucptrietest/TrieTestGetRangesFixedSurr");
addTest(root, &TestSmallNullBlockMatchesFast, "tsutil/ucptrietest/TestSmallNullBlockMatchesFast");
addTest(root, &ShortAllSameBlocksTest, "tsutil/ucptrietest/ShortAllSameBlocksTest");
}

View file

@ -331,10 +331,14 @@ public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
/**
* Sets the range object to a range of code points beginning with the start parameter.
* The range end is the the last code point such that
* The range start is the same as the start input parameter
* (even if there are preceding code points that have the same value).
* The range end is the last code point such that
* all those from start to there have the same value.
* Returns false if start is not 0..U+10FFFF.
* Can be used to efficiently iterate over all same-value ranges in a map.
* (This is normally faster than iterating over code points and get()ting each value,
* but may be much slower than a data structure that stores ranges directly.)
*
* <p>If the {@link ValueFilter} parameter is not null, then
* the value to be delivered is passed through that filter, and the return value is the end
@ -365,7 +369,9 @@ public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
/**
* Sets the range object to a range of code points beginning with the start parameter.
* The range end is the the last code point such that
* The range start is the same as the start input parameter
* (even if there are preceding code points that have the same value).
* The range end is the last code point such that
* all those from start to there have the same value.
* Returns false if start is not 0..U+10FFFF.
*

View file

@ -70,21 +70,23 @@ public abstract class CodePointTrie extends CodePointMap {
*/
public enum ValueWidth {
/**
* 16 bits per CodePointTrie data value.
* The trie stores 16 bits per data value.
* It returns them as unsigned values 0..0xffff=65535.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
BITS_16,
/**
* 32 bits per CodePointTrie data value.
* The trie stores 32 bits per data value.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
BITS_32,
/**
* 8 bits per CodePointTrie data value.
* The trie stores 8 bits per data value.
* It returns them as unsigned values 0..0xff=255.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.

View file

@ -157,6 +157,9 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
/**
* {@inheritDoc}
*
* <p>The trie can be modified between calls to this function.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
@ -309,6 +312,15 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
* Compacts the data and builds an immutable {@link CodePointTrie} according to the parameters.
* After this, the mutable trie will be empty.
*
* <p>The mutable trie stores 32-bit values until buildImmutable() is called.
* If values shorter than 32 bits are to be stored in the immutable trie,
* then the upper bits are discarded.
* For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581,
* and the value width is 8 bits, then each of these is stored as 0x81
* and the immutable trie will return that as an unsigned value.
* (Some implementations may want to make productive temporary use of the upper bits
* until buildImmutable() discards them.)
*
* <p>Not every possible set of mappings can be built into a CodePointTrie,
* because of limitations resulting from speed and space optimizations.
* Every Unicode assigned character can be mapped to a unique value.
@ -556,11 +568,12 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
return -1;
}
private static int findAllSameBlock(int[] p, int length, int value, int blockLength) {
// Ensure that we do not even partially get past length.
length -= blockLength;
private static int findAllSameBlock(int[] p, int start, int limit,
int value, int blockLength) {
// Ensure that we do not even partially get past limit.
limit -= blockLength;
for (int block = 0; block <= length; ++block) {
for (int block = start; block <= limit; ++block) {
if (p[block] == value) {
for (int i = 1;; ++i) {
if (i == blockLength) {
@ -614,6 +627,15 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
return length - i;
}
private static boolean isStartOfSomeFastBlock(int dataOffset, int[] index, int fastILimit) {
for (int i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
if (index[i] == dataOffset) {
return true;
}
}
return false;
}
/**
* Finds the start of the last range in the trie by enumerating backward.
* Indexes for code points higher than this will be omitted.
@ -720,6 +742,9 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
// ASCII data will be stored as a linear table, even if the following code
// does not yet count it that way.
int newDataCapacity = ASCII_LIMIT;
// Add room for a small data null block in case it would match the start of
// a fast data block where dataNullOffset must not be set in that case.
newDataCapacity += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
// Add room for special values (errorValue, highValue) and padding.
newDataCapacity += 4;
int iLimit = highStart >> CodePointTrie.SHIFT_3;
@ -761,6 +786,7 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
if (getDataBlock(i) < 0) {
return -1;
}
newDataCapacity += blockLength;
continue;
}
}
@ -810,7 +836,7 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
*
* It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
*/
private int compactData(int fastILimit, int[] newData) {
private int compactData(int fastILimit, int[] newData, int dataNullIndex) {
// The linear ASCII data has been copied into newData already.
int newDataLength = 0;
for (int i = 0; newDataLength < ASCII_LIMIT;
@ -821,14 +847,31 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
int iLimit = highStart >> CodePointTrie.SHIFT_3;
int blockLength = CodePointTrie.FAST_DATA_BLOCK_LENGTH;
int inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
int fastLength = 0;
for (int i = ASCII_I_LIMIT; i < iLimit; i += inc) {
if (i == fastILimit) {
blockLength = CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
inc = 1;
fastLength = newDataLength;
}
if (flags[i] == ALL_SAME) {
int value = index[i];
int n = findAllSameBlock(newData, newDataLength, value, blockLength);
// Find an earlier part of the data array of length blockLength
// that is filled with this value.
// If we find a match, and the current block is the data null block,
// and it is not a fast block but matches the start of a fast block,
// then we need to continue looking.
// This is because this small block is shorter than the fast block,
// and not all of the rest of the fast block is filled with this value.
// Otherwise trie.getRange() would detect that the fast block starts at
// dataNullOffset and assume incorrectly that it is filled with the null value.
int n;
for (int start = 0;
(n = findAllSameBlock(newData, start, newDataLength,
value, blockLength)) >= 0 &&
i == dataNullIndex && i >= fastILimit && n < fastLength &&
isStartOfSomeFastBlock(n, index, fastILimit);
start = n + 1) {}
if (n >= 0) {
index[i] = n;
} else {
@ -1068,7 +1111,6 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
if (indexLength >= (CodePointTrie.NO_INDEX3_NULL_OFFSET + CodePointTrie.INDEX_3_BLOCK_LENGTH)) {
// The index-3 offsets exceed 15 bits, or
// the last one cannot be distinguished from the no-null-block value.
// TODO: review exceptions / error codes
throw new IndexOutOfBoundsException(
"The trie data exceeds limitations of the data structure.");
}
@ -1143,18 +1185,17 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
int newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
int[] newData = Arrays.copyOf(asciiData, newDataCapacity);
int newDataLength = compactData(fastILimit, newData);
int dataNullIndex = allSameBlocks.findMostUsed();
int newDataLength = compactData(fastILimit, newData, dataNullIndex);
assert(newDataLength <= newDataCapacity);
data = newData;
dataLength = newDataLength;
if (dataLength > (0x3ffff + CodePointTrie.SMALL_DATA_BLOCK_LENGTH)) {
// The offset of the last data block is too high to be stored in the index table.
// TODO: review exceptions / error codes
throw new IndexOutOfBoundsException(
"The trie data exceeds limitations of the data structure.");
}
int dataNullIndex = allSameBlocks.findMostUsed();
if (dataNullIndex >= 0) {
dataNullOffset = index[dataNullIndex];
initialValue = data[dataNullOffset];

View file

@ -10,6 +10,7 @@ package com.ibm.icu.dev.test.util;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import org.junit.Test;
@ -18,9 +19,14 @@ import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.CodePointMap;
import com.ibm.icu.util.CodePointTrie;
import com.ibm.icu.util.MutableCodePointTrie;
import com.ibm.icu.util.VersionInfo;
@RunWith(JUnit4.class)
public final class CodePointTrieTest extends TestFmwk {
@ -32,6 +38,11 @@ public final class CodePointTrieTest extends TestFmwk {
this.value = value;
}
@Override
public String toString() {
return Utility.hex(start) + ".." + Utility.hex(limit - 1) + ':' + Utility.hex(value);
}
final int start, limit;
final int value;
}
@ -63,6 +74,11 @@ public final class CodePointTrieTest extends TestFmwk {
this.value = value;
}
@Override
public String toString() {
return "" + Utility.hex(limit - 1) + ':' + Utility.hex(value);
}
final int limit;
final int value;
}
@ -982,4 +998,119 @@ public final class CodePointTrieTest extends TestFmwk {
testGetRangesFixedSurr("fixedSurr4", mutableTrie,
CodePointMap.RangeOption.FIXED_ALL_SURROGATES, checkRangesFixedSurr4);
}
@Test
public void TestSmallNullBlockMatchesFast() {
// The initial builder+getRange code had a bug:
// When there is no null data block in the fast-index range,
// but a fast-range data block starts with enough values to match a small data block,
// then getRange() got confused.
// The builder must prevent this.
final SetRange setRanges[] = {
new SetRange(0, 0x880, 1),
// U+0880..U+088F map to initial value 0, potential match for small null data block.
new SetRange(0x890, 0x1040, 2),
// U+1040..U+1050 map to 0.
// First small null data block in a small-type trie.
// In a fast-type trie, it is ok to match a small null data block at U+1041
// but not at U+1040.
new SetRange(0x1051, 0x10000, 3),
// No fast data block (block length 64) filled with 0 regardless of trie type.
// Need more blocks filled with 0 than the largest range above,
// and need a highStart above that so that it actually counts.
new SetRange(0x20000, 0x110000, 9)
};
final CheckRange checkRanges[] = {
new CheckRange(0x0880, 1),
new CheckRange(0x0890, 0),
new CheckRange(0x1040, 2),
new CheckRange(0x1051, 0),
new CheckRange(0x10000, 3),
new CheckRange(0x20000, 0),
new CheckRange(0x110000, 9)
};
testTrieRanges("small0-in-fast", false, setRanges, checkRanges);
}
@Test
public void ShortAllSameBlocksTest() {
// Many all-same-value blocks but only of the small block length used in the mutable trie.
// The builder code needs to turn a group of short ALL_SAME blocks below fastLimit
// into a MIXED block, and reserve data array capacity for that.
MutableCodePointTrie mutableTrie = new MutableCodePointTrie(0, 0xad);
CheckRange[] checkRanges = new CheckRange[0x101];
for (int i = 0; i < 0x1000; i += 0x10) {
int value = i >> 4;
mutableTrie.setRange(i, i + 0xf, value);
checkRanges[value] = new CheckRange(i + 0x10, value);
}
checkRanges[0x100] = new CheckRange(0x110000, 0);
mutableTrie = testTrieSerializeAllValueWidth(
"short-all-same", mutableTrie, false, checkRanges);
}
private void testIntProperty(String testName, String baseSetPattern, int property) {
UnicodeSet uni11 = new UnicodeSet(baseSetPattern);
MutableCodePointTrie mutableTrie = new MutableCodePointTrie(0, 0xad);
ArrayList<CheckRange> checkRanges = new ArrayList<>();
int start = 0;
int limit = 0;
int prevValue = 0;
for (UnicodeSet.EntryRange range : uni11.ranges()) {
// Ranges are in ascending order, each range is non-empty,
// and there is a gap from one to the next.
// Each code point in a range could have a different value.
// Any of them can be 0.
// We keep initial value 0 between ranges.
if (prevValue != 0) {
mutableTrie.setRange(start, limit - 1, prevValue);
checkRanges.add(new CheckRange(limit, prevValue));
start = limit;
prevValue = 0;
}
int c = limit = range.codepoint;
do {
int value;
if (property == UProperty.AGE) {
VersionInfo version = UCharacter.getAge(c);
value = (version.getMajor() << 4) | version.getMinor();
} else {
value = UCharacter.getIntPropertyValue(c, property);
}
if (value != prevValue) {
if (start < limit) {
if (prevValue != 0) {
mutableTrie.setRange(start, limit - 1, prevValue);
}
checkRanges.add(new CheckRange(limit, prevValue));
}
start = c;
prevValue = value;
}
limit = ++c;
} while (c <= range.codepointEnd);
}
if (prevValue != 0) {
mutableTrie.setRange(start, limit - 1, prevValue);
checkRanges.add(new CheckRange(limit, prevValue));
}
if (limit < 0x110000) {
checkRanges.add(new CheckRange(0x110000, 0));
}
testTrieSerializeAllValueWidth(testName, mutableTrie, false,
checkRanges.toArray(new CheckRange[checkRanges.size()]));
}
@Test
public void AgePropertyTest() {
testIntProperty("age", "[:age=11:]", UProperty.AGE);
}
@Test
public void BlockPropertyTest() {
testIntProperty("block", "[:^blk=No_Block:]", UProperty.BLOCK);
}
}