ICU-7189 Fixed missing collation case bytes problem in ucol_nextSortKeyPart.

X-SVN-Rev: 26787
This commit is contained in:
Yoshito Umaoka 2009-10-14 21:45:30 +00:00
parent 29d8944fc5
commit 155dfc0605
5 changed files with 114 additions and 3 deletions

View file

@ -6280,6 +6280,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
uint8_t caseBits = 0;
for(;;) {
U_ASSERT(caseShift >= 0 && caseShift <= UCOL_CASE_SHIFT_START);
if(i == count) {
goto saveState;
}
@ -6328,6 +6329,11 @@ ucol_nextSortKeyPart(const UCollator *coll,
// this copies the case level logic from the
// sort key generation code
if(CE != 0) {
if (caseShift == 0) {
dest[i++] = caseByte;
caseShift = UCOL_CASE_SHIFT_START;
caseByte = UCOL_CASE_BYTE_START;
}
if(coll->caseFirst == UCOL_UPPER_FIRST) {
if((caseBits & 0xC0) == 0) {
caseByte |= 1 << (--caseShift);

View file

@ -14,6 +14,7 @@
#include "unicode/sortkey.h"
#include "regcoll.h"
#include "sfwdchit.h"
#include "testutil.h"
#define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
@ -1075,6 +1076,93 @@ void CollationRegressionTest::Test4146160(/* char* par */)
delete mc;
#endif
}
// Ticket 7189
//
// nextSortKeyPart incorrect for EO_S1 collation
static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t keyBufLen, UErrorCode& status) {
UCharIterator uiter;
uint32_t state[2] = {0, 0};
int32_t keyLen;
int32_t count = 8;
uiter_setString(&uiter, text, len);
keyLen = 0;
while (TRUE) {
int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
if (U_FAILURE(status)) {
return -1;
}
if (keyPartLen == 0) {
break;
}
keyLen += keyPartLen;
}
return keyLen;
}
void CollationRegressionTest::TestT7189()
{
UErrorCode status = U_ZERO_ERROR;
UCollator *coll;
uint32_t i;
static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
// "Achter De Hoven"
{0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00},
// "ABC"
{0x41, 0x42, 0x43, 0x00},
// "HELLO world!"
{0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00}
};
static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
// "Achter de Hoven"
{0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00},
// "abc"
{0x61, 0x62, 0x63, 0x00},
// "hello world!"
{0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00}
};
// Open the collator
coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
if (U_FAILURE(status)) {
errln("Failed to create a collator for short string EO_S1");
return;
}
for (i = 0; i < sizeof(text1)/(CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
uint8_t key1[100], key2[100];
int32_t len1, len2;
len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
if (U_FAILURE(status)) {
errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
break;
}
len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
if (U_FAILURE(status)) {
errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
break;
}
if (len1 == len2 && memcmp(key1, key2, len1) == 0) {
errln(UnicodeString("Failed: Identical key\n") +
" text1: " + text1[i] + "\n" +
" text2: " + text2[i] + "\n" +
" key : " + TestUtility::hex(key1, len1));
} else {
logln(UnicodeString("Keys produced -\n") +
" text1: " + text1[i] + "\n" +
" key1 : " + TestUtility::hex(key1, len1) + "\n" +
" text2: " + text2[i] + "\n" +
" key2 : " + TestUtility::hex(key2, len2));
}
}
ucol_close(coll);
}
void CollationRegressionTest::compareArray(Collator &c,
const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
int32_t testCount)
@ -1206,6 +1294,7 @@ void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
case 31: name = "TestT7189"; if (exec) TestT7189(); break;
default: name = ""; break;
}
} else {

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2001, International Business Machines Corporation and
* Copyright (c) 1997-2009, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -224,6 +224,11 @@ public:
//
void Test4146160(/* char* par */);
// Ticket 7189
//
// nextSortKeyPart incorrect for EO_S1 collation
void TestT7189();
private:
//------------------------------------------------------------------------
// Internal utilities

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2001, International Business Machines
* Copyright (C) 2001-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -35,3 +35,12 @@ UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) {
}
return result;
}
UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
UnicodeString buf;
for (int32_t i = 0; i < len; ++i) {
buf.append(HEX[0x0F & (bytes[i] >> 4)]);
buf.append(HEX[0x0F & bytes[i]]);
}
return buf;
}

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2001-2006, International Business Machines
* Copyright (C) 2001-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -26,6 +26,8 @@ public:
static UnicodeString hex(const UnicodeString& s, UChar sep);
static UnicodeString hex(const uint8_t* bytes, int32_t len);
private:
TestUtility() {} // Prevent instantiation