From a7f460e761c9cd5547b4c6e7f3663ade3434ceb9 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Fri, 20 Nov 2009 23:03:58 +0000 Subject: [PATCH] ICU-7077 UTrie2 port to Java, cleanup and optimization of iteration X-SVN-Rev: 26959 --- .../core/src/com/ibm/icu/impl/Trie2.java | 87 ++++--- .../src/com/ibm/icu/impl/Trie2Writable.java | 23 +- .../core/src/com/ibm/icu/impl/Trie2_16.java | 88 +++++++ .../core/src/com/ibm/icu/impl/Trie2_32.java | 89 +++++++ .../com/ibm/icu/dev/test/util/Trie2Test.java | 220 ++++++++---------- 5 files changed, 331 insertions(+), 176 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java index dcb97d33912..aa97cc12cc9 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java @@ -26,16 +26,7 @@ import java.util.NoSuchElementException; */ public abstract class Trie2 implements Iterable { - /** - * Selectors for the width of a UTrie2 data value. - * TODO: this can probably be removed. It's no longer used in the - * primary API - */ - enum ValueWidth { - BITS_16, - BITS_32 - } - + /** * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). * The serialized format is identical between ICU4C and ICU4J, so this function @@ -215,16 +206,14 @@ public abstract class Trie2 implements Iterable { * * @param is an InputStream containing the serialized form * of a UTrie, version 1 or 2. The stream must support mark() and reset(). - * TODO: is requiring mark and reset ok? * The position of the input stream will be left unchanged. - * @param anyEndianOk If FALSE, only big-endian (Java native) serialized forms are recognized. + * @param littleEndianOk If FALSE, only big-endian (Java native) serialized forms are recognized. * If TRUE, little-endian serialized forms are recognized as well. - * TODO: dump this option, always allow either endian? Or allow only big endian? * @return the Trie version of the serialized form, or 0 if it is not * recognized as a serialized UTrie * @throws IOException on errors in reading from the input stream. */ - public static int getVersion(InputStream is, boolean anyEndianOk) throws IOException { + public static int getVersion(InputStream is, boolean littleEndianOk) throws IOException { if (! is.markSupported()) { throw new IllegalArgumentException("Input stream must support mark()."); } @@ -239,7 +228,7 @@ public abstract class Trie2 implements Iterable { if (sig[0]=='T' && sig[1]=='r' && sig[2]=='i' && sig[3]=='2') { return 2; } - if (anyEndianOk) { + if (littleEndianOk) { if (sig[0]=='e' && sig[1]=='i' && sig[2]=='r' && sig[3]=='T') { return 1; } @@ -525,7 +514,7 @@ public abstract class Trie2 implements Iterable { * * @param text A text string to be iterated over. * @param index The starting iteration position within the input text. - * @return An iterator + * @return the CharSequenceIterator */ public CharSequenceIterator charSequenceIterator(CharSequence text, int index) { return new CharSequenceIterator(text, index); @@ -626,6 +615,14 @@ public abstract class Trie2 implements Iterable { /** + * Selectors for the width of a UTrie2 data value. + */ + enum ValueWidth { + BITS_16, + BITS_32 + } + + /** * Trie2 data structure in serialized form: * * UTrie2Header header; @@ -899,7 +896,7 @@ public abstract class Trie2 implements Iterable { // Iteration over code point values. val = get(nextStart); mappedVal = mapper.map(val); - endOfRange = rangeEnd(nextStart); + endOfRange = rangeEnd(nextStart, limitCP, val); // Loop once for each range in the Trie2 with the same raw (unmapped) value. // Loop continues so long as the mapped values are the same. for (;;) { @@ -910,7 +907,7 @@ public abstract class Trie2 implements Iterable { if (mapper.map(val) != mappedVal) { break; } - endOfRange = rangeEnd(endOfRange+1); + endOfRange = rangeEnd(endOfRange+1, limitCP, val); } } else { // Iteration over the alternate lead surrogate values. @@ -949,31 +946,7 @@ public abstract class Trie2 implements Iterable { throw new UnsupportedOperationException(); } - - /** - * Find the last character in a contiguous range of characters with the - * same Trie2 value as the input character. - * - * @param c The character to begin with. - * @return The last contiguous character with the same value. - */ - private int rangeEnd(int startingC) { - // TODO: add optimizations - int c; - int val = get(startingC); - int limit = Math.min(highStart, limitCP); - - for (c = startingC+1; c < limit; c++) { - if (get(c) != val) { - break; - } - } - if (c >= highStart) { - c = limitCP; - } - return c - 1; - } - + /** * Find the last lead surrogate in a contiguous range with the * same Trie2 value as the input character. @@ -981,6 +954,11 @@ public abstract class Trie2 implements Iterable { * Use the alternate Lead Surrogate values from the Trie2, * not the code-point values. * + * Note: Trie2_16 and Trie2_32 override this implementation with optimized versions, + * meaning that the implementation here is only being used with + * Trie2Writable. The code here is logically correct with any type + * of Trie2, however. + * * @param c The character to begin with. * @return The last contiguous character with the same value. */ @@ -989,7 +967,6 @@ public abstract class Trie2 implements Iterable { return 0xdbff; } - // TODO: add optimizations int c; int val = getFromU16SingleLead(startingLS); for (c = startingLS+1; c <= 0x0dbff; c++) { @@ -1020,6 +997,28 @@ public abstract class Trie2 implements Iterable { private boolean doLeadSurrogates = true; } + /** + * Find the last character in a contiguous range of characters with the + * same Trie2 value as the input character. + * + * @param c The character to begin with. + * @return The last contiguous character with the same value. + */ + int rangeEnd(int start, int limitp, int val) { + int c; + int limit = Math.min(highStart, limitp); + + for (c = start+1; c < limit; c++) { + if (get(c) != val) { + break; + } + } + if (c >= highStart) { + c = limitp; + } + return c - 1; + } + // // Hashing implementation functions. FNV hash. Respected public domain algorithm. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java index d92d74db89f..989c907bd1a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java @@ -17,7 +17,7 @@ public class Trie2Writable extends Trie2 { /** - * Create a new, empty, writable Trie2. At build time, 32-bit data values are used. + * Create a new, empty, writable Trie2. 32-bit data values are used. * * @param initialValueP the initial value that is set for all code points * @param errorValueP the value for out-of-range code points and illegal UTF-8 @@ -136,9 +136,9 @@ public class Trie2Writable extends Trie2 { /** - * Create a new build time (modifiable) Trie2 whose contents are the same as the source Trie. + * Create a new build time (modifiable) Trie2 whose contents are the same as the source Trie2. * - * @param source the source Trie + * @param source the source Trie2. Its contents will be copied into the new Trie2. */ public Trie2Writable(Trie2 source) { init(source.initialValue, source.errorValue); @@ -524,7 +524,6 @@ public class Trie2Writable extends Trie2 { public Trie2Writable setRange(Trie2.Range range, boolean overwrite) { fHash = 0; if (range.leadSurrogate) { - // TODO: optimize this. for (int c=range.startCodePoint; c<=range.endCodePoint; c++) { if (overwrite || getFromU16SingleLead((char)c) == this.initialValue) { setForLeadSurrogateCodeUnit((char)c, range.value); @@ -549,11 +548,8 @@ public class Trie2Writable extends Trie2 { * For code units outside of the lead surrogate range, this function * behaves identically to set(). * - * TODO: ICU4C restricts this function to lead surrogates only. - * Should ICU4J match, or should ICU4C be loosened? - * * @param codeUnit A UTF-16 code unit. - * @param value the value + * @param value the value to be stored in the Trie2. */ public Trie2Writable setForLeadSurrogateCodeUnit(char codeUnit, int value) { fHash = 0; @@ -563,7 +559,7 @@ public class Trie2Writable extends Trie2 { /** - * Get the value for a code point as stored in the trie. + * Get the value for a code point as stored in the Trie2. * * @param codePoint the code point * @return the value @@ -978,13 +974,10 @@ public class Trie2Writable extends Trie2 { } - - /* serialization ------------------------------------------------------------ */ - - /** * Produce an optimized, read-only Trie2_16 from this writable Trie. - * The data values must all fit as an unsigned 16 bit value. + * The data values outside of the range that will fit in a 16 bit + * unsigned value will be truncated. */ public Trie2_16 toTrie2_16() { Trie2_16 frozenTrie = new Trie2_16(); @@ -1153,7 +1146,7 @@ public class Trie2Writable extends Trie2 { } break; } - // The writable, but compressed, Trie stays around unless the caller drops its references to it. + // The writable, but compressed, Trie2 stays around unless the caller drops its references to it. } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java index 2297728e210..e7f03b73c2f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java @@ -158,4 +158,92 @@ public final class Trie2_16 extends Trie2 { bytesWritten += dataLength*2; return bytesWritten; } + + + /** + * Given a starting code point, find the last in a range of code points, + * all with the same value. + * + * This function is part of the implementation of iterating over the + * Trie2's contents. + * @param startingCP The code point at which to begin looking. + * @return The last code point with the same value as the starting code point. + */ + @Override + int rangeEnd(int startingCP, int limit, int value) { + int cp = startingCP; + int block = 0; + int index2Block = 0; + + // Loop runs once for each of + // - a partial data block + // - a reference to the null (default) data block. + // - a reference to the index2 null block + + outerLoop: + for (;;) { + if (cp >= limit) { + break; + } + if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 16 bit data is stored in the index array itself. + index2Block = 0; + block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; + } else if (cp < 0xffff) { + // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 + index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; + block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; + } else if (cp < highStart) { + // Supplemental code point, use two-level lookup. + int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); + index2Block = index[ix]; + block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; + } else { + // Code point above highStart. + if (value == index[highValueIndex]) { + cp = limit; + } + break; + } + + if (index2Block == index2NullOffset) { + if (value != initialValue) { + break; + } + cp += UTRIE2_CP_PER_INDEX_1_ENTRY; + } else if (block == dataNullOffset) { + // The block at dataNullOffset has all values == initialValue. + // Because Trie2 iteration always proceeds in ascending order, we will always + // encounter a null block at its beginning, and can skip over + // a number of code points equal to the length of the block. + if (value != initialValue) { + break; + } + cp += UTRIE2_DATA_BLOCK_LENGTH; + } else { + // Current position refers to an ordinary data block. + // Walk over the data entries, checking the values. + int startIx = block + (cp & UTRIE2_DATA_MASK); + int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; + for (int ix = startIx; ix limit) { + cp = limit; + } + + return cp - 1; + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java index 932a5ae4bd9..a86b3e65386 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java @@ -155,5 +155,94 @@ public class Trie2_32 extends Trie2 { bytesWritten += dataLength*4; return bytesWritten; } + + + /** + * Given a starting code point, find the last in a range of code points, + * all with the same value. + * + * This function is part of the implementation of iterating over the + * Trie2's contents. + * @param startingCP The code point at which to begin looking. + * @return The last code point with the same value as the starting code point. + */ + @Override + int rangeEnd(int startingCP, int limit, int value) { + int cp = startingCP; + int block = 0; + int index2Block = 0; + + // Loop runs once for each of + // - a partial data block + // - a reference to the null (default) data block. + // - a reference to the index2 null block + + outerLoop: + for (;;) { + if (cp >= limit) { + break; + } + if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 16 bit data is stored in the index array itself. + index2Block = 0; + block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; + } else if (cp < 0xffff) { + // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 + index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; + block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; + } else if (cp < highStart) { + // Supplemental code point, use two-level lookup. + int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); + index2Block = index[ix]; + block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; + } else { + // Code point above highStart. + if (value == data32[highValueIndex]) { + cp = limit; + } + break; + } + + if (index2Block == index2NullOffset) { + if (value != initialValue) { + break; + } + cp += UTRIE2_CP_PER_INDEX_1_ENTRY; + } else if (block == dataNullOffset) { + // The block at dataNullOffset has all values == initialValue. + // Because Trie2 iteration always proceeds in ascending order, we will always + // encounter a null block at its beginning, and can skip over + // a number of code points equal to the length of the block. + if (value != initialValue) { + break; + } + cp += UTRIE2_DATA_BLOCK_LENGTH; + } else { + // Current position refers to an ordinary data block. + // Walk over the data entries, checking the values. + int startIx = block + (cp & UTRIE2_DATA_MASK); + int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; + for (int ix = startIx; ix limit) { + cp = limit; + } + + return cp - 1; + } + } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java index 7dfdc22fc05..3bb973d892b 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java @@ -55,7 +55,7 @@ public class Trie2Test extends TestFmwk { ByteArrayOutputStream os = new ByteArrayOutputStream(); trie.toTrie2_16().serialize(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); - assertEquals(where(), 2, Trie2.getVersion(is, true)); + assertEquals(null, 2, Trie2.getVersion(is, true)); } catch (IOException e) { errln(where() + e.toString()); } @@ -67,18 +67,18 @@ public class Trie2Test extends TestFmwk { Trie2Writable trieWB = new Trie2Writable(0,0); Trie2 trieA = trieWA; Trie2 trieB = trieWB; - assertTrue(where(), trieA.equals(trieB)); - assertEquals(where(), trieA, trieB); - assertEquals(where(), trieA.hashCode(), trieB.hashCode()); + assertTrue("", trieA.equals(trieB)); + assertEquals("", trieA, trieB); + assertEquals("", trieA.hashCode(), trieB.hashCode()); trieWA.set(500, 2); - assertNotEquals(where(), trieA, trieB); + assertNotEquals("", trieA, trieB); // Note that the hash codes do not strictly need to be different, // but it's highly likely that something is wrong if they are the same. - assertNotEquals(where(), trieA.hashCode(), trieB.hashCode()); + assertNotEquals("", trieA.hashCode(), trieB.hashCode()); trieWB.set(500, 2); trieA = trieWA.toTrie2_16(); - assertEquals(where(), trieA, trieB); - assertEquals(where(), trieA.hashCode(), trieB.hashCode()); + assertEquals("", trieA, trieB); + assertEquals("", trieA.hashCode(), trieB.hashCode()); } // @@ -90,32 +90,32 @@ public class Trie2Test extends TestFmwk { it = trie.iterator(); Trie2.Range r = it.next(); - assertEquals(where(), 0, r.startCodePoint); - assertEquals(where(), 0x10ffff, r.endCodePoint); - assertEquals(where(), 17, r.value); - assertEquals(where(), false, r.leadSurrogate); + assertEquals("", 0, r.startCodePoint); + assertEquals("", 0x10ffff, r.endCodePoint); + assertEquals("", 17, r.value); + assertEquals("", false, r.leadSurrogate); r = it.next(); - assertEquals(where(), 0xd800, r.startCodePoint); - assertEquals(where(), 0xdbff, r.endCodePoint); - assertEquals(where(), 17, r.value); - assertEquals(where(), true, r.leadSurrogate); + assertEquals("", 0xd800, r.startCodePoint); + assertEquals("", 0xdbff, r.endCodePoint); + assertEquals("", 17, r.value); + assertEquals("", true, r.leadSurrogate); int i = 0; for (Trie2.Range rr: trie) { switch (i) { case 0: - assertEquals(where(), 0, rr.startCodePoint); - assertEquals(where(), 0x10ffff, rr.endCodePoint); - assertEquals(where(), 17, rr.value); - assertEquals(where(), false, rr.leadSurrogate); + assertEquals("", 0, rr.startCodePoint); + assertEquals("", 0x10ffff, rr.endCodePoint); + assertEquals("", 17, rr.value); + assertEquals("", false, rr.leadSurrogate); break; case 1: - assertEquals(where(), 0xd800, rr.startCodePoint); - assertEquals(where(), 0xdbff, rr.endCodePoint); - assertEquals(where(), 17, rr.value); - assertEquals(where(), true, rr.leadSurrogate); + assertEquals("", 0xd800, rr.startCodePoint); + assertEquals("", 0xdbff, rr.endCodePoint); + assertEquals("", 17, rr.value); + assertEquals("", true, rr.leadSurrogate); break; default: errln(where() + " Unexpected iteration result"); @@ -140,10 +140,10 @@ public class Trie2Test extends TestFmwk { }; Iterator it = trie.iterator(vm); Trie2.Range r = it.next(); - assertEquals(where(), 0, r.startCodePoint); - assertEquals(where(), 0x10ffff, r.endCodePoint); - assertEquals(where(), 42, r.value); - assertEquals(where(), false, r.leadSurrogate); + assertEquals("", 0, r.startCodePoint); + assertEquals("", 0x10ffff, r.endCodePoint); + assertEquals("", 42, r.value); + assertEquals("", false, r.leadSurrogate); } @@ -154,24 +154,24 @@ public class Trie2Test extends TestFmwk { trie.set(0x2f810, 10); Iterator it = trie.iteratorForLeadSurrogate((char)0xd87e); Trie2.Range r = it.next(); - assertEquals(where(), 0x2f800, r.startCodePoint); - assertEquals(where(), 0x2f80f, r.endCodePoint); - assertEquals(where(), 0xdefa17, r.value); - assertEquals(where(), false, r.leadSurrogate); + assertEquals("", 0x2f800, r.startCodePoint); + assertEquals("", 0x2f80f, r.endCodePoint); + assertEquals("", 0xdefa17, r.value); + assertEquals("", false, r.leadSurrogate); r = it.next(); - assertEquals(where(), 0x2f810, r.startCodePoint); - assertEquals(where(), 0x2f810, r.endCodePoint); - assertEquals(where(), 10, r.value); - assertEquals(where(), false, r.leadSurrogate); + assertEquals("", 0x2f810, r.startCodePoint); + assertEquals("", 0x2f810, r.endCodePoint); + assertEquals("", 10, r.value); + assertEquals("", false, r.leadSurrogate); r = it.next(); - assertEquals(where(), 0x2f811, r.startCodePoint); - assertEquals(where(), 0x2fbff, r.endCodePoint); - assertEquals(where(), 0xdefa17, r.value); - assertEquals(where(), false, r.leadSurrogate); + assertEquals("", 0x2f811, r.startCodePoint); + assertEquals("", 0x2fbff, r.endCodePoint); + assertEquals("", 0xdefa17, r.value); + assertEquals("", false, r.leadSurrogate); - assertFalse(where(), it.hasNext()); + assertFalse("", it.hasNext()); } // Iteration over a leading surrogate range with a ValueMapper. @@ -189,12 +189,12 @@ public class Trie2Test extends TestFmwk { }; Iterator it = trie.iteratorForLeadSurrogate((char)0xd87e, m); Trie2.Range r = it.next(); - assertEquals(where(), 0x2f800, r.startCodePoint); - assertEquals(where(), 0x2fbff, r.endCodePoint); - assertEquals(where(), 0xdefa17, r.value); - assertEquals(where(), false, r.leadSurrogate); + assertEquals("", 0x2f800, r.startCodePoint); + assertEquals("", 0x2fbff, r.endCodePoint); + assertEquals("", 0xdefa17, r.value); + assertEquals("", false, r.leadSurrogate); - assertFalse(where(), it.hasNext()); + assertFalse("", it.hasNext()); } // Trie2.serialize() @@ -206,23 +206,23 @@ public class Trie2Test extends TestFmwk { trie.set(0xffee, 300); Trie2_16 frozen16 = trie.toTrie2_16(); Trie2_32 frozen32 = trie.toTrie2_32(); - assertEquals(where(), trie, frozen16); - assertEquals(where(), trie, frozen32); - assertEquals(where(), frozen16, frozen32); + assertEquals("", trie, frozen16); + assertEquals("", trie, frozen32); + assertEquals("", frozen16, frozen32); ByteArrayOutputStream os = new ByteArrayOutputStream(); try { frozen16.serialize(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); Trie2 unserialized16 = Trie2.createFromSerialized(is); - assertEquals(where(), trie, unserialized16); - assertEquals(where(), Trie2_16.class, unserialized16.getClass()); + assertEquals("", trie, unserialized16); + assertEquals("", Trie2_16.class, unserialized16.getClass()); os.reset(); frozen32.serialize(os); is = new ByteArrayInputStream(os.toByteArray()); Trie2 unserialized32 = Trie2.createFromSerialized(is); - assertEquals(where(), trie, unserialized32); - assertEquals(where(), Trie2_32.class, unserialized32.getClass()); + assertEquals("", trie, unserialized32); + assertEquals("", Trie2_32.class, unserialized32.getClass()); } catch (IOException e) { errln(where() + " Unexpected exception: " + e); } @@ -243,28 +243,28 @@ public class Trie2Test extends TestFmwk { // Constructor from another Trie2 Trie2 t2 = new Trie2Writable(t1); - assertTrue(where(), t1.equals(t2)); + assertTrue("", t1.equals(t2)); // Set / Get Trie2Writable t1w = new Trie2Writable(10, 666); t1w.set(0x4567, 99); - assertEquals(where(), 10, t1w.get(0x4566)); - assertEquals(where(), 99, t1w.get(0x4567)); - assertEquals(where(), 666, t1w.get(-1)); - assertEquals(where(), 666, t1w.get(0x110000)); + assertEquals("", 10, t1w.get(0x4566)); + assertEquals("", 99, t1w.get(0x4567)); + assertEquals("", 666, t1w.get(-1)); + assertEquals("", 666, t1w.get(0x110000)); // SetRange t1w = new Trie2Writable(10, 666); t1w.setRange(13 /*start*/, 6666 /*end*/, 7788 /*value*/, false /*overwrite */); t1w.setRange(6000, 7000, 9900, true); - assertEquals(where(), 10, t1w.get(12)); - assertEquals(where(), 7788, t1w.get(13)); - assertEquals(where(), 7788, t1w.get(5999)); - assertEquals(where(), 9900, t1w.get(6000)); - assertEquals(where(), 9900, t1w.get(7000)); - assertEquals(where(), 10, t1w.get(7001)); - assertEquals(where(), 666, t1w.get(0x110000)); + assertEquals("", 10, t1w.get(12)); + assertEquals("", 7788, t1w.get(13)); + assertEquals("", 7788, t1w.get(5999)); + assertEquals("", 9900, t1w.get(6000)); + assertEquals("", 9900, t1w.get(7000)); + assertEquals("", 10, t1w.get(7001)); + assertEquals("", 666, t1w.get(0x110000)); // setRange from a Trie2.Range // (Ranges are more commonly created by iterating over a Trie2, @@ -276,19 +276,19 @@ public class Trie2Test extends TestFmwk { r.leadSurrogate = false; t1w = new Trie2Writable(0, 0xbad); t1w.setRange(r, true); - assertEquals(where(), 0, t1w.get(49)); - assertEquals(where(), 0x12345678, t1w.get(50)); - assertEquals(where(), 0x12345678, t1w.get(52)); - assertEquals(where(), 0, t1w.get(53)); + assertEquals(null, 0, t1w.get(49)); + assertEquals("", 0x12345678, t1w.get(50)); + assertEquals("", 0x12345678, t1w.get(52)); + assertEquals("", 0, t1w.get(53)); // setForLeadSurrogateCodeUnit / getFromU16SingleLead t1w = new Trie2Writable(10, 0xbad); - assertEquals(where(), 10, t1w.getFromU16SingleLead((char)0x0d801)); + assertEquals("", 10, t1w.getFromU16SingleLead((char)0x0d801)); t1w.setForLeadSurrogateCodeUnit((char)0xd801, 5000); t1w.set(0xd801, 6000); - assertEquals(where(), 5000, t1w.getFromU16SingleLead((char)0x0d801)); - assertEquals(where(), 6000, t1w.get(0x0d801)); + assertEquals("", 5000, t1w.getFromU16SingleLead((char)0x0d801)); + assertEquals("", 6000, t1w.get(0x0d801)); // get(). Is covered by nearly every other test. @@ -298,12 +298,12 @@ public class Trie2Test extends TestFmwk { t1w.set(42, 5555); t1w.set(0x1ff00, 224); Trie2_16 t1_16 = t1w.toTrie2_16(); - assertTrue(where(), t1w.equals(t1_16)); + assertTrue("", t1w.equals(t1_16)); // alter the writable Trie2 and then re-freeze. t1w.set(152, 129); t1_16 = t1w.toTrie2_16(); - assertTrue(where(), t1w.equals(t1_16)); - assertEquals(where(), 129, t1w.get(152)); + assertTrue("", t1w.equals(t1_16)); + assertEquals("", 129, t1w.get(152)); // Trie2_32 getAsFrozen_32() // @@ -311,13 +311,13 @@ public class Trie2Test extends TestFmwk { t1w.set(42, 5555); t1w.set(0x1ff00, 224); Trie2_32 t1_32 = t1w.toTrie2_32(); - assertTrue(where(), t1w.equals(t1_32)); + assertTrue("", t1w.equals(t1_32)); // alter the writable Trie2 and then re-freeze. t1w.set(152, 129); - assertNotEquals(where(), t1_32, t1w); + assertNotEquals("", t1_32, t1w); t1_32 = t1w.toTrie2_32(); - assertTrue(where(), t1w.equals(t1_32)); - assertEquals(where(), 129, t1w.get(152)); + assertTrue("", t1w.equals(t1_32)); + assertEquals("", 129, t1w.get(152)); // serialize(OutputStream os, ValueWidth width) @@ -336,22 +336,22 @@ public class Trie2Test extends TestFmwk { int serializedLen = t1w.toTrie2_16().serialize(os); // Fragile test. Serialized length could change with changes to compaction. // But it should not change unexpectedly. - assertEquals(where(), 3508, serializedLen); + assertEquals("", 3508, serializedLen); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); Trie2 t1ws16 = Trie2.createFromSerialized(is); - assertEquals(where(), t1ws16.getClass(), Trie2_16.class); - assertEquals(where(), t1w, t1ws16); + assertEquals("", t1ws16.getClass(), Trie2_16.class); + assertEquals("", t1w, t1ws16); // Serialize to 32 bits os.reset(); serializedLen = t1w.toTrie2_32().serialize(os); // Fragile test. Serialized length could change with changes to compaction. // But it should not change unexpectedly. - assertEquals(where(), 4332, serializedLen); + assertEquals("", 4332, serializedLen); is = new ByteArrayInputStream(os.toByteArray()); Trie2 t1ws32 = Trie2.createFromSerialized(is); - assertEquals(where(), t1ws32.getClass(), Trie2_32.class); - assertEquals(where(), t1w, t1ws32); + assertEquals("", t1ws32.getClass(), Trie2_32.class); + assertEquals("", t1w, t1ws32); } catch (IOException e) { errln(where() + e.toString()); } @@ -377,14 +377,14 @@ public class Trie2Test extends TestFmwk { for (i=0; it.hasNext(); i++) { ir = it.next(); int expectedCP = Character.codePointAt(text, i); - assertEquals(where() + " i="+i, expectedCP, ir.codePoint); - assertEquals(where() + " i="+i, i, ir.index); - assertEquals(where() + " i="+i, vals.charAt(i), ir.value); + assertEquals("" + " i="+i, expectedCP, ir.codePoint); + assertEquals("" + " i="+i, i, ir.index); + assertEquals("" + " i="+i, vals.charAt(i), ir.value); if (expectedCP >= 0x10000) { i++; } } - assertEquals(where(), text.length(), i); + assertEquals("", text.length(), i); // Check reverse iteration, starting at an intermediate point. it.set(5); @@ -392,11 +392,11 @@ public class Trie2Test extends TestFmwk { ir = it.previous(); int expectedCP = Character.codePointBefore(text, i); i -= (expectedCP < 0x10000? 1 : 2); - assertEquals(where() + " i="+i, expectedCP, ir.codePoint); - assertEquals(where() + " i="+i, i, ir.index); - assertEquals(where() + " i="+i, vals.charAt(i), ir.value); + assertEquals("" + " i="+i, expectedCP, ir.codePoint); + assertEquals("" + " i="+i, i, ir.index); + assertEquals("" + " i="+i, vals.charAt(i), ir.value); } - assertEquals(where(), 0, i); + assertEquals("", 0, i); } @@ -628,10 +628,6 @@ public class Trie2Test extends TestFmwk { int start, limit; int i, countSpecials; - boolean isFrozen = trie instanceof Trie2_16 || trie instanceof Trie2_32; - - String typeName= isFrozen ? "frozen trie" : "newTrie"; - countSpecials=0; /*getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);*/ errorValue = 0x0bad; initialValue = 0; @@ -647,9 +643,10 @@ public class Trie2Test extends TestFmwk { while(start=(char)0xd800 && cp<(char)0xdc00); - assertEquals(wb, range.value, trie.getFromU16SingleLead((char)cp)); + assertTrue(testName, cp>=(char)0xd800 && cp<(char)0xdc00); + assertEquals(testName, range.value, trie.getFromU16SingleLead((char)cp)); } else { - assertEquals(wc, range.value, trie.get(cp)); + assertEquals(testName, range.value, trie.get(cp)); } } } - if (false) System.out.println("\n\n"); - } // Was testTrieRanges in ICU4C. Renamed to not conflict with ICU4J test framework. @@ -742,8 +729,8 @@ public class Trie2Test extends TestFmwk { // Run the same tests against locally contructed Tries. Trie2Writable trieW = genTrieFromSetRanges(setRanges); trieGettersTest(testName, trieW, checkRanges); - assertEquals(where(), trieW, trie16); // Locally built tries must be - assertEquals(where(), trieW, trie32); // the same as those imported from ICU4C + assertEquals("", trieW, trie16); // Locally built tries must be + assertEquals("", trieW, trie32); // the same as those imported from ICU4C Trie2_32 trie32a = trieW.toTrie2_32(); @@ -751,6 +738,7 @@ public class Trie2Test extends TestFmwk { Trie2_16 trie16a = trieW.toTrie2_16(); trieGettersTest(testName, trie16a, checkRanges); + } // Was "TrieTest" in trie2test.c @@ -765,8 +753,6 @@ public class Trie2Test extends TestFmwk { } - - // TODO: push this where() function up into the test framework implementation of assert private String where() { StackTraceElement[] st = new Throwable().getStackTrace(); String w = "File: " + st[1].getFileName() + ", Line " + st[1].getLineNumber();