mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 05:25:34 +00:00
ICU-21368 unit-test & fix BytesTrie jump delta encoding
This commit is contained in:
parent
18b23fb372
commit
767598009d
7 changed files with 132 additions and 19 deletions
|
@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
|||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
return write(i);
|
||||
} else {
|
||||
char intBytes[5];
|
||||
return write(intBytes, internalEncodeDelta(i, intBytes));
|
||||
}
|
||||
char intBytes[5];
|
||||
int32_t length;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
intBytes[0]=(char)i;
|
||||
return 1;
|
||||
}
|
||||
int32_t length=1;
|
||||
if(i<=BytesTrie::kMaxTwoByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
|
||||
length=1;
|
||||
} else {
|
||||
if(i<=BytesTrie::kMaxThreeByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
|
||||
length=2;
|
||||
} else {
|
||||
if(i<=0xffffff) {
|
||||
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
|
||||
length=3;
|
||||
} else {
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
|
||||
intBytes[1]=(char)(i>>24);
|
||||
length=4;
|
||||
length=2;
|
||||
}
|
||||
intBytes[1]=(char)(i>>16);
|
||||
intBytes[length++]=(char)(i>>16);
|
||||
}
|
||||
intBytes[1]=(char)(i>>8);
|
||||
intBytes[length++]=(char)(i>>8);
|
||||
}
|
||||
intBytes[length++]=(char)i;
|
||||
return write(intBytes, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
#include "unicode/uobject.h"
|
||||
#include "unicode/ustringtrie.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
|
@ -378,6 +380,7 @@ public:
|
|||
|
||||
private:
|
||||
friend class BytesTrieBuilder;
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
/**
|
||||
* Constructs a BytesTrie reader instance.
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/stringtriebuilder.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class BytesTrieElement;
|
||||
|
@ -125,6 +127,8 @@ public:
|
|||
BytesTrieBuilder &clear();
|
||||
|
||||
private:
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
|
||||
BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
|
||||
|
||||
|
@ -168,6 +172,7 @@ private:
|
|||
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
|
||||
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
|
||||
virtual int32_t writeDeltaTo(int32_t jumpTarget);
|
||||
static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
|
||||
|
||||
CharString *strings; // Pointer not object so we need not #include internal charstr.h.
|
||||
BytesTrieElement *elements;
|
||||
|
|
|
@ -56,6 +56,7 @@ public:
|
|||
void TestTruncatingIteratorFromLinearMatchLong();
|
||||
void TestIteratorFromBytes();
|
||||
void TestFailedIterator();
|
||||
void TestDelta();
|
||||
|
||||
void checkData(const StringAndValue data[], int32_t dataLength);
|
||||
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
|
||||
|
@ -110,6 +111,7 @@ void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
|
|||
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
|
||||
TESTCASE_AUTO(TestIteratorFromBytes);
|
||||
TESTCASE_AUTO(TestFailedIterator);
|
||||
TESTCASE_AUTO(TestDelta);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -599,6 +601,45 @@ void BytesTrieTest::TestFailedIterator() {
|
|||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::TestDelta() {
|
||||
char intBytes0[5];
|
||||
char intBytes1[5];
|
||||
static constexpr int32_t sampleDeltas[] = {
|
||||
-1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
|
||||
-2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
|
||||
-3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
|
||||
-4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
|
||||
-5, 0x1000000, 0x1000001, 0x7fffffff
|
||||
};
|
||||
int32_t expectedLength = 0;
|
||||
for (int32_t delta : sampleDeltas) {
|
||||
if (delta < 0) {
|
||||
expectedLength = -delta;
|
||||
continue;
|
||||
}
|
||||
// Encoding twice into differently-initialized arrays
|
||||
// catches bytes that are not written to.
|
||||
memset(intBytes0, 0, sizeof(intBytes0));
|
||||
memset(intBytes1, 1, sizeof(intBytes1));
|
||||
int32_t length0 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes0);
|
||||
int32_t length1 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes1);
|
||||
assertTrue(UnicodeString(u"non-zero length to encode delta ") + delta, length0 > 0);
|
||||
assertEquals(UnicodeString(u"consistent length to encode delta ") + delta, length0, length1);
|
||||
assertEquals(UnicodeString(u"expected length to encode delta ") + delta,
|
||||
expectedLength, length0);
|
||||
for (int32_t i = 0; i < length0; ++i) {
|
||||
uint8_t b0 = intBytes0[i];
|
||||
uint8_t b1 = intBytes1[i];
|
||||
assertEquals(UnicodeString(u"differently encoded delta ") + delta +
|
||||
u" at byte index " + i, b0, b1);
|
||||
}
|
||||
const uint8_t *start = (const uint8_t *)intBytes0;
|
||||
const uint8_t *pos = BytesTrie::jumpByDelta(start);
|
||||
assertEquals(UnicodeString(u"roundtrip for delta ") + delta,
|
||||
delta, (int32_t)(pos - start) - length0);
|
||||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
||||
logln("checkData(dataLength=%d, fast)", (int)dataLength);
|
||||
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
|
||||
|
|
|
@ -794,8 +794,13 @@ public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
|
|||
return skipValue(pos, leadByte);
|
||||
}
|
||||
|
||||
// Reads a jump delta and jumps.
|
||||
private static int jumpByDelta(byte[] bytes, int pos) {
|
||||
/**
|
||||
* Reads a jump delta and jumps.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
public static int jumpByDelta(byte[] bytes, int pos) {
|
||||
int delta=bytes[pos++]&0xff;
|
||||
if(delta<kMinTwoByteDeltaLead) {
|
||||
// nothing to do
|
||||
|
|
|
@ -35,8 +35,11 @@ public final class BytesTrieBuilder extends StringTrieBuilder {
|
|||
s=sequence;
|
||||
len=length;
|
||||
}
|
||||
@Override
|
||||
public char charAt(int i) { return (char)(s[i]&0xff); }
|
||||
@Override
|
||||
public int length() { return len; }
|
||||
@Override
|
||||
public CharSequence subSequence(int start, int end) { return null; }
|
||||
|
||||
private byte[] s;
|
||||
|
@ -278,30 +281,41 @@ public final class BytesTrieBuilder extends StringTrieBuilder {
|
|||
assert(i>=0);
|
||||
if(i<=BytesTrie.kMaxOneByteDelta) {
|
||||
return write(i);
|
||||
} else {
|
||||
return write(intBytes, internalEncodeDelta(i, intBytes));
|
||||
}
|
||||
int length;
|
||||
}
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final int internalEncodeDelta(int i, byte[] intBytes) {
|
||||
assert(i>=0);
|
||||
if(i<=BytesTrie.kMaxOneByteDelta) {
|
||||
intBytes[0]=(byte)i;
|
||||
return 1;
|
||||
}
|
||||
int length=1;
|
||||
if(i<=BytesTrie.kMaxTwoByteDelta) {
|
||||
intBytes[0]=(byte)(BytesTrie.kMinTwoByteDeltaLead+(i>>8));
|
||||
length=1;
|
||||
} else {
|
||||
if(i<=BytesTrie.kMaxThreeByteDelta) {
|
||||
intBytes[0]=(byte)(BytesTrie.kMinThreeByteDeltaLead+(i>>16));
|
||||
length=2;
|
||||
} else {
|
||||
if(i<=0xffffff) {
|
||||
intBytes[0]=(byte)BytesTrie.kFourByteDeltaLead;
|
||||
length=3;
|
||||
} else {
|
||||
intBytes[0]=(byte)BytesTrie.kFiveByteDeltaLead;
|
||||
intBytes[1]=(byte)(i>>24);
|
||||
length=4;
|
||||
length=2;
|
||||
}
|
||||
intBytes[1]=(byte)(i>>16);
|
||||
intBytes[length++]=(byte)(i>>16);
|
||||
}
|
||||
intBytes[1]=(byte)(i>>8);
|
||||
intBytes[length++]=(byte)(i>>8);
|
||||
}
|
||||
intBytes[length++]=(byte)i;
|
||||
return write(intBytes, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
// Byte serialization of the trie.
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import org.junit.Test;
|
||||
|
@ -531,6 +532,42 @@ public class BytesTrieTest extends TestFmwk {
|
|||
assertEquals("abc value", 300, copy.getValue());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestDelta() {
|
||||
byte[] intBytes0 = new byte[5];
|
||||
byte[] intBytes1 = new byte[5];
|
||||
int[] sampleDeltas = new int[] {
|
||||
-1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
|
||||
-2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
|
||||
-3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
|
||||
-4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
|
||||
-5, 0x1000000, 0x1000001, 0x7fffffff
|
||||
};
|
||||
int expectedLength = 0;
|
||||
for (int delta : sampleDeltas) {
|
||||
if (delta < 0) {
|
||||
expectedLength = -delta;
|
||||
continue;
|
||||
}
|
||||
// Encoding twice into differently-initialized arrays
|
||||
// catches bytes that are not written to.
|
||||
Arrays.fill(intBytes0, (byte)0);
|
||||
Arrays.fill(intBytes1, (byte)1);
|
||||
int length0 = BytesTrieBuilder.internalEncodeDelta(delta, intBytes0);
|
||||
int length1 = BytesTrieBuilder.internalEncodeDelta(delta, intBytes1);
|
||||
assertTrue("non-zero length to encode delta " + delta, length0 > 0);
|
||||
assertEquals("consistent length to encode delta " + delta, length0, length1);
|
||||
assertEquals("expected length to encode delta " + delta, expectedLength, length0);
|
||||
for (int i = 0; i < length0; ++i) {
|
||||
byte b0 = intBytes0[i];
|
||||
byte b1 = intBytes1[i];
|
||||
assertEquals("differently encoded delta " + delta + " at byte index " + i, b0, b1);
|
||||
}
|
||||
int pos = BytesTrie.jumpByDelta(intBytes0, 0);
|
||||
assertEquals("roundtrip for delta " + delta, delta, pos - length0);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkData(StringAndValue data[]) {
|
||||
checkData(data, data.length);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue