ICU-21368 unit-test & fix BytesTrie jump delta encoding

This commit is contained in:
Markus Scherer 2021-03-02 18:26:42 -08:00
parent 18b23fb372
commit 767598009d
7 changed files with 132 additions and 19 deletions

View file

@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
return write(i);
} else {
char intBytes[5];
return write(intBytes, internalEncodeDelta(i, intBytes));
}
char intBytes[5];
int32_t length;
}
int32_t
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
intBytes[0]=(char)i;
return 1;
}
int32_t length=1;
if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
length=1;
} else {
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
length=3;
} else {
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
length=4;
length=2;
}
intBytes[1]=(char)(i>>16);
intBytes[length++]=(char)(i>>16);
}
intBytes[1]=(char)(i>>8);
intBytes[length++]=(char)(i>>8);
}
intBytes[length++]=(char)i;
return write(intBytes, length);
return length;
}
U_NAMESPACE_END

View file

@ -30,6 +30,8 @@
#include "unicode/uobject.h"
#include "unicode/ustringtrie.h"
class BytesTrieTest;
U_NAMESPACE_BEGIN
class ByteSink;
@ -378,6 +380,7 @@ public:
private:
friend class BytesTrieBuilder;
friend class ::BytesTrieTest;
/**
* Constructs a BytesTrie reader instance.

View file

@ -30,6 +30,8 @@
#include "unicode/stringpiece.h"
#include "unicode/stringtriebuilder.h"
class BytesTrieTest;
U_NAMESPACE_BEGIN
class BytesTrieElement;
@ -125,6 +127,8 @@ public:
BytesTrieBuilder &clear();
private:
friend class ::BytesTrieTest;
BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
@ -168,6 +172,7 @@ private:
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
virtual int32_t writeDeltaTo(int32_t jumpTarget);
static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
CharString *strings; // Pointer not object so we need not #include internal charstr.h.
BytesTrieElement *elements;

View file

@ -56,6 +56,7 @@ public:
void TestTruncatingIteratorFromLinearMatchLong();
void TestIteratorFromBytes();
void TestFailedIterator();
void TestDelta();
void checkData(const StringAndValue data[], int32_t dataLength);
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
@ -110,6 +111,7 @@ void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
TESTCASE_AUTO(TestIteratorFromBytes);
TESTCASE_AUTO(TestFailedIterator);
TESTCASE_AUTO(TestDelta);
TESTCASE_AUTO_END;
}
@ -599,6 +601,45 @@ void BytesTrieTest::TestFailedIterator() {
}
}
void BytesTrieTest::TestDelta() {
char intBytes0[5];
char intBytes1[5];
static constexpr int32_t sampleDeltas[] = {
-1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
-2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
-3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
-4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
-5, 0x1000000, 0x1000001, 0x7fffffff
};
int32_t expectedLength = 0;
for (int32_t delta : sampleDeltas) {
if (delta < 0) {
expectedLength = -delta;
continue;
}
// Encoding twice into differently-initialized arrays
// catches bytes that are not written to.
memset(intBytes0, 0, sizeof(intBytes0));
memset(intBytes1, 1, sizeof(intBytes1));
int32_t length0 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes0);
int32_t length1 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes1);
assertTrue(UnicodeString(u"non-zero length to encode delta ") + delta, length0 > 0);
assertEquals(UnicodeString(u"consistent length to encode delta ") + delta, length0, length1);
assertEquals(UnicodeString(u"expected length to encode delta ") + delta,
expectedLength, length0);
for (int32_t i = 0; i < length0; ++i) {
uint8_t b0 = intBytes0[i];
uint8_t b1 = intBytes1[i];
assertEquals(UnicodeString(u"differently encoded delta ") + delta +
u" at byte index " + i, b0, b1);
}
const uint8_t *start = (const uint8_t *)intBytes0;
const uint8_t *pos = BytesTrie::jumpByDelta(start);
assertEquals(UnicodeString(u"roundtrip for delta ") + delta,
delta, (int32_t)(pos - start) - length0);
}
}
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
logln("checkData(dataLength=%d, fast)", (int)dataLength);
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);

View file

@ -794,8 +794,13 @@ public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
return skipValue(pos, leadByte);
}
// Reads a jump delta and jumps.
private static int jumpByDelta(byte[] bytes, int pos) {
/**
* Reads a jump delta and jumps.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int jumpByDelta(byte[] bytes, int pos) {
int delta=bytes[pos++]&0xff;
if(delta<kMinTwoByteDeltaLead) {
// nothing to do

View file

@ -35,8 +35,11 @@ public final class BytesTrieBuilder extends StringTrieBuilder {
s=sequence;
len=length;
}
@Override
public char charAt(int i) { return (char)(s[i]&0xff); }
@Override
public int length() { return len; }
@Override
public CharSequence subSequence(int start, int end) { return null; }
private byte[] s;
@ -278,30 +281,41 @@ public final class BytesTrieBuilder extends StringTrieBuilder {
assert(i>=0);
if(i<=BytesTrie.kMaxOneByteDelta) {
return write(i);
} else {
return write(intBytes, internalEncodeDelta(i, intBytes));
}
int length;
}
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static final int internalEncodeDelta(int i, byte[] intBytes) {
assert(i>=0);
if(i<=BytesTrie.kMaxOneByteDelta) {
intBytes[0]=(byte)i;
return 1;
}
int length=1;
if(i<=BytesTrie.kMaxTwoByteDelta) {
intBytes[0]=(byte)(BytesTrie.kMinTwoByteDeltaLead+(i>>8));
length=1;
} else {
if(i<=BytesTrie.kMaxThreeByteDelta) {
intBytes[0]=(byte)(BytesTrie.kMinThreeByteDeltaLead+(i>>16));
length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(byte)BytesTrie.kFourByteDeltaLead;
length=3;
} else {
intBytes[0]=(byte)BytesTrie.kFiveByteDeltaLead;
intBytes[1]=(byte)(i>>24);
length=4;
length=2;
}
intBytes[1]=(byte)(i>>16);
intBytes[length++]=(byte)(i>>16);
}
intBytes[1]=(byte)(i>>8);
intBytes[length++]=(byte)(i>>8);
}
intBytes[length++]=(byte)i;
return write(intBytes, length);
return length;
}
// Byte serialization of the trie.

View file

@ -13,6 +13,7 @@
package com.ibm.icu.dev.test.util;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.NoSuchElementException;
import org.junit.Test;
@ -531,6 +532,42 @@ public class BytesTrieTest extends TestFmwk {
assertEquals("abc value", 300, copy.getValue());
}
@Test
public void TestDelta() {
byte[] intBytes0 = new byte[5];
byte[] intBytes1 = new byte[5];
int[] sampleDeltas = new int[] {
-1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
-2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
-3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
-4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
-5, 0x1000000, 0x1000001, 0x7fffffff
};
int expectedLength = 0;
for (int delta : sampleDeltas) {
if (delta < 0) {
expectedLength = -delta;
continue;
}
// Encoding twice into differently-initialized arrays
// catches bytes that are not written to.
Arrays.fill(intBytes0, (byte)0);
Arrays.fill(intBytes1, (byte)1);
int length0 = BytesTrieBuilder.internalEncodeDelta(delta, intBytes0);
int length1 = BytesTrieBuilder.internalEncodeDelta(delta, intBytes1);
assertTrue("non-zero length to encode delta " + delta, length0 > 0);
assertEquals("consistent length to encode delta " + delta, length0, length1);
assertEquals("expected length to encode delta " + delta, expectedLength, length0);
for (int i = 0; i < length0; ++i) {
byte b0 = intBytes0[i];
byte b1 = intBytes1[i];
assertEquals("differently encoded delta " + delta + " at byte index " + i, b0, b1);
}
int pos = BytesTrie.jumpByDelta(intBytes0, 0);
assertEquals("roundtrip for delta " + delta, delta, pos - length0);
}
}
private void checkData(StringAndValue data[]) {
checkData(data, data.length);
}