ICU-3984 updates based on code review and fix of memory leak during collation reordering rule parsing

X-SVN-Rev: 28999
This commit is contained in:
Stuart Gill 2010-11-04 20:12:39 +00:00
parent 174695eeac
commit b1a76e6a88
18 changed files with 25793 additions and 25644 deletions

View file

@ -812,12 +812,12 @@
<span class=""><span>4.6</span></span></td>
</tr>
<tr class="row1">
<td class="file">coll.h</td><td class="proto">uint32_t Collator::getReorderCodes(int32_t*, const uint32_t, UErrorCode&amp;) const</td><td class="">None<br>
<td class="file">coll.h</td><td class="proto">int32_t Collator::getReorderCodes(int32_t*, const int32_t, UErrorCode&amp;) const</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row0">
<td class="file">coll.h</td><td class="proto">void Collator::setReorderCodes(const int32_t*, const uint32_t, UErrorCode&amp;)</td><td class="">None<br>
<td class="file">coll.h</td><td class="proto">void Collator::setReorderCodes(const int32_t*, const int32_t, UErrorCode&amp;)</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
@ -1136,12 +1136,12 @@
<span class=""><span>4.6</span></span></td>
</tr>
<tr class="row0">
<td class="file">tblcoll.h</td><td class="proto">uint32_t RuleBasedCollator::getReorderCodes(int32_t*, const uint32_t, UErrorCode&amp;) const</td><td class="">None<br>
<td class="file">tblcoll.h</td><td class="proto">int32_t RuleBasedCollator::getReorderCodes(int32_t*, const int32_t, UErrorCode&amp;) const</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row1">
<td class="file">tblcoll.h</td><td class="proto">void RuleBasedCollator::setReorderCodes(const int32_t*, const uint32_t, UErrorCode&amp;)</td><td class="">None<br>
<td class="file">tblcoll.h</td><td class="proto">void RuleBasedCollator::setReorderCodes(const int32_t*, const int32_t, UErrorCode&amp;)</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
@ -1248,52 +1248,52 @@
<b class="bigwarn" title="A new API was introduced that was not tagged.">(untagged)</b></span></td>
</tr>
<tr class="row0">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_CURRENCY</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_CURRENCY</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row1">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_DIGIT</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_DIGIT</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row0">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_FIRST</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_FIRST</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row1">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_IGNORE</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_IGNORE</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row0">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_LIMIT</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_LIMIT</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row1">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_PUNCTUATION</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_PUNCTUATION</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row0">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_SPACE</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_SPACE</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row1">
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_SYMBOL</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_SYMBOL</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row0">
<td class="file">ucol.h</td><td class="proto">uint32_t ucol_getReorderCodes(const UCollator*, int32_t*, uint32_t, UErrorCode*)</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto">int32_t ucol_getReorderCodes(const UCollator*, int32_t*, int32_t, UErrorCode*)</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>
<tr class="row1">
<td class="file">ucol.h</td><td class="proto">void ucol_setReorderCodes(UCollator*, const int32_t*, uint32_t, UErrorCode*)</td><td class="">None<br>
<td class="file">ucol.h</td><td class="proto">void ucol_setReorderCodes(UCollator*, const int32_t*, int32_t, UErrorCode*)</td><td class="">None<br>
</td><td>Internal<br>
<span class=""><span>.</span></span></td>
</tr>

View file

@ -133,8 +133,8 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,
header.magic=ds->readUInt32(inHeader->magic);
if(!(
header.magic==UCOL_HEADER_MAGIC &&
inHeader->formatVersion[0]==3 &&
inHeader->formatVersion[1]>=0
inHeader->formatVersion[0]==3 /*&&
inHeader->formatVersion[1]>=0*/
)) {
return FALSE;
}
@ -194,8 +194,8 @@ ucol_swapBinary(const UDataSwapper *ds,
header.magic=ds->readUInt32(inHeader->magic);
if(!(
header.magic==UCOL_HEADER_MAGIC &&
inHeader->formatVersion[0]==3 &&
inHeader->formatVersion[1]>=0
inHeader->formatVersion[0]==3 /*&&
inHeader->formatVersion[1]>=0*/
)) {
udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
header.magic,
@ -351,8 +351,8 @@ ucol_swap(const UDataSwapper *ds,
pInfo->dataFormat[1]==0x43 &&
pInfo->dataFormat[2]==0x6f &&
pInfo->dataFormat[3]==0x6c &&
pInfo->formatVersion[0]==3 &&
pInfo->formatVersion[1]>=0
pInfo->formatVersion[0]==3 /*&&
pInfo->formatVersion[1]>=0*/
)) {
udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],

View file

@ -692,7 +692,7 @@
#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
#define ucol_allocWeights U_ICU_ENTRY_POINT_RENAME(ucol_allocWeights)
#define ucol_assembleTailoringTable U_ICU_ENTRY_POINT_RENAME(ucol_assembleTailoringTable)
#define ucol_buildScriptReorderTable U_ICU_ENTRY_POINT_RENAME(ucol_buildScriptReorderTable)
#define ucol_buildPermutationTable U_ICU_ENTRY_POINT_RENAME(ucol_buildPermutationTable)
#define ucol_calcSortKey U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKey)
#define ucol_calcSortKeySimpleTertiary U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKeySimpleTertiary)
#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)

File diff suppressed because it is too large Load diff

View file

@ -833,19 +833,23 @@ Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
return Locale::createFromName(loc);
}
uint32_t Collator::getReorderCodes(int32_t *dest,
const uint32_t destCapacity,
int32_t Collator::getReorderCodes(int32_t *dest,
int32_t destCapacity,
UErrorCode& status) const
{
status = U_UNSUPPORTED_ERROR;
if (U_SUCCESS(status)) {
status = U_UNSUPPORTED_ERROR;
}
return 0;
}
void Collator::setReorderCodes(const int32_t *reorderCodes,
const uint32_t reorderCodesLength,
int32_t reorderCodesLength,
UErrorCode& status)
{
status = U_UNSUPPORTED_ERROR;
if (U_SUCCESS(status)) {
status = U_UNSUPPORTED_ERROR;
}
}
// UCollator private data members ----------------------------------------

View file

@ -587,15 +587,15 @@ void RuleBasedCollator::setStrength(ECollationStrength newStrength)
ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
}
uint32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
const uint32_t destCapacity,
int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
const int32_t destCapacity,
UErrorCode& status) const
{
return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
}
void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
const uint32_t reorderCodesLength,
const int32_t reorderCodesLength,
UErrorCode& status)
{
ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);

View file

@ -771,14 +771,6 @@ void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCo
result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
result->numericCollation = (UColAttributeValue)opts->numericCollation;
result->reorderCodesLength = opts->reorderCodesLength;
if (result->reorderCodesLength > 0) {
result->reorderCodes = (int32_t*) uprv_malloc(result->reorderCodesLength * sizeof(int32_t));
uprv_memcpy(result->reorderCodes, opts->reorderCodes, result->reorderCodesLength * sizeof(int32_t));
} else {
result->reorderCodes = NULL;
}
result->caseFirstisDefault = TRUE;
result->caseLevelisDefault = TRUE;
result->frenchCollationisDefault = TRUE;
@ -4363,9 +4355,11 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(order >> 8);
/* no need to permute since the actual code values don't matter
if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
primary1 = coll->leadBytePermutationTable[primary1];
}
*/
if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|| (!notIsContinuation && wasShifted)))
@ -4796,6 +4790,7 @@ ucol_calcSortKey(const UCollator *coll,
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(order >> 8);
uint8_t originalPrimary1 = primary1;
if(notIsContinuation && coll->leadBytePermutationTable != NULL) {
primary1 = coll->leadBytePermutationTable[primary1];
}
@ -4845,7 +4840,7 @@ ucol_calcSortKey(const UCollator *coll,
/* one byter, not compressed */
*primaries++ = primary1;
leadPrimary = 0;
} else if(isCompressible(coll, primary1)) {
} else if(isCompressible(coll, originalPrimary1)) {
/* compress */
*primaries++ = leadPrimary = primary1;
if(primaries <= primarySafeEnd) {
@ -5390,6 +5385,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(order >> 8);
uint8_t originalPrimary1 = primary1;
if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
primary1 = coll->leadBytePermutationTable[primary1];
}
@ -5410,7 +5406,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
/* one byter, not compressed */
*primaries++ = primary1;
leadPrimary = 0;
} else if(isCompressible(coll, primary1)) {
} else if(isCompressible(coll, originalPrimary1)) {
/* compress */
*primaries++ = leadPrimary = primary1;
*primaries++ = primary2;
@ -7125,21 +7121,24 @@ ucol_getStrength(const UCollator *coll)
return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
}
U_INTERNAL uint32_t U_EXPORT2
U_INTERNAL int32_t U_EXPORT2
ucol_getReorderCodes(const UCollator *coll,
int32_t *dest,
uint32_t destCapacity,
int32_t destCapacity,
UErrorCode *pErrorCode) {
if (pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
if (U_FAILURE(*pErrorCode)) {
return 0;
}
if (coll->reorderCodes == NULL) {
if (destCapacity != 0) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return 0;
}
if (coll->reorderCodesLength > destCapacity) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
}
for (uint32_t i = 0; (i < coll->reorderCodesLength) && (i < destCapacity); i++) {
for (int32_t i = 0; (i < coll->reorderCodesLength) && (i < destCapacity); i++) {
dest[i] = coll->reorderCodes[i];
}
return coll->reorderCodesLength;
@ -7148,17 +7147,28 @@ ucol_getReorderCodes(const UCollator *coll,
U_INTERNAL void U_EXPORT2
ucol_setReorderCodes(UCollator *coll,
const int32_t *reorderCodes,
uint32_t reorderCodesLength,
UErrorCode *pErrorCode ){
if (coll->reorderCodes != NULL) {
uprv_free(coll->reorderCodes);
int32_t reorderCodesLength,
UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) {
return;
}
if (reorderCodes == NULL) {
if (reorderCodesLength != 0) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
uprv_free(coll->reorderCodes);
coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int32_t));
for (uint32_t i = 0; i < reorderCodesLength; i++) {
if (coll->reorderCodes == NULL) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (int32_t i = 0; i < reorderCodesLength; i++) {
coll->reorderCodes[i] = reorderCodes[i];
}
coll->reorderCodesLength = reorderCodesLength;
ucol_buildScriptReorderTable(coll, pErrorCode);
ucol_buildPermutationTable(coll, pErrorCode);
}

View file

@ -25,6 +25,7 @@
#include "unicode/udata.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/uscript.h"
#include "unicode/ustring.h"
#include "normalizer2impl.h"
#include "ucol_bld.h"
@ -33,6 +34,7 @@
#include "ucln_in.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
static const InverseUCATableHeader* _staticInvUCA = NULL;
static UDataMemory* invUCA_DATA_MEM = NULL;
@ -1400,8 +1402,8 @@ ucol_findReorderingEntry(const char* name) {
char buffer[32];
toUpper(name, buffer, 32);
for (uint32_t entry = 0; ReorderingTokenNames[entry] != NULL; entry++) {
if (strcmp(buffer, ReorderingTokenNames[entry]) == 0) {
return entry + UCOL_REORDERCODE_FIRST;
if (uprv_strcmp(buffer, ReorderingTokenNames[entry]) == 0) {
return entry + UCOL_REORDER_CODE_FIRST;
}
}
return USCRIPT_INVALID_CODE;

View file

@ -776,8 +776,6 @@ typedef struct {
/*UColAttributeValue*/ int32_t strength; /* attribute for strength */
/*UColAttributeValue*/ int32_t hiraganaQ; /* attribute for special Hiragana */
/*UColAttributeValue*/ int32_t numericCollation; /* attribute for numeric collation */
/* reorder code */ int32_t* reorderCodes;
uint32_t reorderCodesLength;
uint32_t reserved[15]; /* for future use */
} UColOptionSet;
@ -1026,7 +1024,7 @@ struct UCollator {
UVersionInfo dataVersion; /* Data info of UCA table */
int32_t* reorderCodes;
uint32_t reorderCodesLength;
int32_t reorderCodesLength;
uint8_t* leadBytePermutationTable;
};
@ -1096,7 +1094,9 @@ U_CAPI const UChar* U_EXPORT2 ucol_tok_getRulesFromBundle(
const char* type,
int32_t* pLength,
UErrorCode* status);
U_CAPI void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status);
U_CAPI void ucol_buildPermutationTable(UCollator *coll, UErrorCode *status);
#ifdef XP_CPLUSPLUS
/*

View file

@ -28,6 +28,7 @@
#include "unicode/coll.h"
#include "unicode/tblcoll.h"
#include "unicode/caniter.h"
#include "unicode/uscript.h"
#include "unicode/ustring.h"
#include "ucol_bld.h"
@ -48,6 +49,8 @@
U_NAMESPACE_USE
static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status);
// static UCA. There is only one. Collators don't use it.
// It is referenced only in ucol_initUCA and ucol_cleanup
static UCollator* _staticUCA = NULL;
@ -162,233 +165,6 @@ tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
}
int ucol_getLeadBytesForReorderCode(UCollator *coll, int reorderCode, uint16_t* returnLeadBytes, int returnLength) {
uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte));
uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte + 2 *sizeof(uint16_t));
// TODO - replace with a binary search
// reorder code index is 2 uint16_t's - reorder code + offset
for (int i = 0; i < reorderCodeIndexLength; i++) {
if (reorderCode == reorderCodeIndex[i*2]) {
uint16_t dataOffset = reorderCodeIndex[(i*2) + 1];
if ((dataOffset & 0x8000) == 0x8000) {
// offset isn't offset but instead is a single data element
if (returnLength >= 1) {
returnLeadBytes[0] = dataOffset & ~0x8000;
return 1;
}
return 0;
}
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t)));
uint16_t leadByteCount = *(dataOffsetBase + dataOffset);
leadByteCount = leadByteCount > returnLength ? returnLength : leadByteCount;
uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t));
return leadByteCount;
}
}
return 0;
}
int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* returnReorderCodes, int returnLength) {
int leadByteIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript));
uint16_t* leadByteIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript + 2 *sizeof(uint16_t));
if (leadByte >= leadByteIndexLength) {
return 0;
}
if ((leadByteIndex[leadByte] & 0x8000) == 0x8000) {
// offset isn't offset but instead is a single data element
if (returnLength >= 1) {
returnReorderCodes[0] = leadByteIndex[leadByte] & ~0x8000;
return 1;
}
return 0;
}
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)leadByteIndex + leadByteIndexLength * (2 * sizeof(uint16_t)));
uint16_t reorderCodeCount = *(dataOffsetBase + leadByteIndex[leadByte]);
reorderCodeCount = reorderCodeCount > returnLength ? returnLength : reorderCodeCount;
uprv_memcpy(returnReorderCodes, dataOffsetBase + leadByteIndex[leadByte] + 1, reorderCodeCount * sizeof(uint16_t));
return reorderCodeCount;
}
void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
uint16_t leadBytesSize = 256;
uint16_t leadBytes[256];
uint32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST);
int32_t* internalReorderCodes;
// The lowest byte that hasn't been assigned a mapping
int toBottom = 0x03;
// The highest byte that hasn't been assigned a mapping - don't include the special or trailing
int toTop = 0xe4;
// are we filling from the bottom?
bool fromTheBottom = true;
// lead bytes that have alread been assigned to the permutation table
bool newLeadByteUsed[256];
// permutation table slots that have already been filled
bool permutationSlotFilled[256];
// nothing to do
if(U_FAILURE(*status) || coll == NULL || coll->reorderCodesLength == 0) {
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
return;
}
if (coll->leadBytePermutationTable == NULL) {
coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
}
// prefill the reordering codes with the leading entries
internalReorderCodes = (int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t));
for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST); codeIndex++) {
internalReorderCodes[codeIndex] = UCOL_REORDERCODE_FIRST + codeIndex;
}
for (uint32_t codeIndex = 0; codeIndex < coll->reorderCodesLength; codeIndex++) {
uint32_t reorderCodesCode = coll->reorderCodes[codeIndex];
internalReorderCodes[codeIndex + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST)] = reorderCodesCode;
if (reorderCodesCode >= UCOL_REORDERCODE_FIRST && reorderCodesCode < UCOL_REORDERCODE_LIMIT) {
internalReorderCodes[reorderCodesCode - UCOL_REORDERCODE_FIRST] = UCOL_REORDERCODE_IGNORE;
}
}
for (int i = 0; i < 256; i++) {
if (i < toBottom || i > toTop) {
permutationSlotFilled[i] = true;
newLeadByteUsed[i] = true;
coll->leadBytePermutationTable[i] = i;
} else {
permutationSlotFilled[i] = false;
newLeadByteUsed[i] = false;
coll->leadBytePermutationTable[i] = 0;
}
}
/* Start from the front of the list and place each script we encounter at the
* earliest possible locatation in the permutation table. If we encounter
* UNKNOWN, start processing from the back, and place each script in the last
* possible location. At each step, we also need to make sure that any scripts
* that need to not be moved are copied to their same location in the final table.
*/
for (int reorderCodesIndex = 0; reorderCodesIndex < internalReorderCodesLength; reorderCodesIndex++) {
int32_t next = internalReorderCodes[reorderCodesIndex];
if (next == UCOL_REORDERCODE_IGNORE) {
continue;
}
if (next == USCRIPT_UNKNOWN) {
if (fromTheBottom == false) {
// double turnaround
*status = U_ILLEGAL_ARGUMENT_ERROR;
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
return;
}
fromTheBottom = false;
continue;
}
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, next, leadBytes, leadBytesSize);
if (fromTheBottom) {
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
// don't place a lead byte twice in the permutation table
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
// lead byte already used
*status = U_ILLEGAL_ARGUMENT_ERROR;
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
return;
}
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toBottom;
newLeadByteUsed[toBottom] = true;
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
toBottom++;
}
} else {
for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
// don't place a lead byte twice in the permutation table
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
// lead byte already used
*status = U_ILLEGAL_ARGUMENT_ERROR;
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
return;
}
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toTop;
newLeadByteUsed[toTop] = true;
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
toTop--;
}
}
}
#ifdef REORDER_DEBUG
fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
}
fprintf(stdout, "\n@@@@ Lead Byte Used Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]);
}
fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]);
}
#endif
/* Copy everything that's left over */
int reorderCode = 0;
for (int i = 0; i < 256; i++) {
if (!permutationSlotFilled[i]) {
while (reorderCode < 256 && newLeadByteUsed[reorderCode]) {
reorderCode++;
}
coll->leadBytePermutationTable[i] = reorderCode;
permutationSlotFilled[i] = true;
newLeadByteUsed[reorderCode] = true;
}
}
#ifdef REORDER_DEBUG
fprintf(stdout, "\n@@@@ Script Reordering Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
}
#endif
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
// force a regen of the latin one table since it is affected by the script reordering
coll->latinOneRegenTable = TRUE;
ucol_updateInternalState(coll, status);
}
// API in ucol_imp.h
@ -685,6 +461,7 @@ ucol_openRulesForImport( const UChar *rules,
}
uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
ucol_setOptionsFromHeader(result, opts, status);
ucol_setReorderCodesFromParser(result, &src, status);
result->freeOptionsOnClose = TRUE;
result->hasRealData = FALSE;
result->freeImageOnClose = FALSE;
@ -710,7 +487,7 @@ ucol_openRulesForImport( const UChar *rules,
result->actualLocale = NULL;
result->validLocale = NULL;
result->requestedLocale = NULL;
ucol_buildScriptReorderTable(result, status);
ucol_buildPermutationTable(result, status);
ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
} else {
@ -816,10 +593,10 @@ ucol_equals(const UCollator *source, const UCollator *target) {
return FALSE;
}
}
if(source->reorderCodesLength != target->reorderCodesLength){
if (source->reorderCodesLength != target->reorderCodesLength){
return FALSE;
}
for (int i = 0; i < source->reorderCodesLength; i++) {
for (i = 0; i < source->reorderCodesLength; i++) {
if(source->reorderCodes[i] != target->reorderCodes[i]) {
return FALSE;
}
@ -1246,4 +1023,273 @@ ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
return (USet *)tailored;
}
/*
* Collation Reordering
*/
static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status) {
if (U_FAILURE(*status)) {
return;
}
coll->reorderCodesLength = 0;
if (coll->reorderCodes != NULL) {
uprv_free(coll->reorderCodes);
}
if (parser->reorderCodesLength == 0 || parser->reorderCodes == NULL) {
return;
}
coll->reorderCodesLength = parser->reorderCodesLength;
coll->reorderCodes = (int32_t*) uprv_malloc(coll->reorderCodesLength * sizeof(int32_t));
uprv_memcpy(coll->reorderCodes, parser->reorderCodes, coll->reorderCodesLength * sizeof(int32_t));
}
static int ucol_getLeadBytesForReorderCode(UCollator *coll, int reorderCode, uint16_t* returnLeadBytes, int returnCapacity) {
uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte));
uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte + 2 *sizeof(uint16_t));
// TODO - replace with a binary search
// reorder code index is 2 uint16_t's - reorder code + offset
for (int i = 0; i < reorderCodeIndexLength; i++) {
if (reorderCode == reorderCodeIndex[i*2]) {
uint16_t dataOffset = reorderCodeIndex[(i*2) + 1];
if ((dataOffset & 0x8000) == 0x8000) {
// offset isn't offset but instead is a single data element
if (returnCapacity >= 1) {
returnLeadBytes[0] = dataOffset & ~0x8000;
return 1;
}
return 0;
}
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t)));
uint16_t leadByteCount = *(dataOffsetBase + dataOffset);
leadByteCount = leadByteCount > returnCapacity ? returnCapacity : leadByteCount;
uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t));
return leadByteCount;
}
}
return 0;
}
static int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* returnReorderCodes, int returnCapacity) {
int leadByteIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript));
uint16_t* leadByteIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript + 2 *sizeof(uint16_t));
if (leadByte >= leadByteIndexLength) {
return 0;
}
if ((leadByteIndex[leadByte] & 0x8000) == 0x8000) {
// offset isn't offset but instead is a single data element
if (returnCapacity >= 1) {
returnReorderCodes[0] = leadByteIndex[leadByte] & ~0x8000;
return 1;
}
return 0;
}
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)leadByteIndex + leadByteIndexLength * (2 * sizeof(uint16_t)));
uint16_t reorderCodeCount = *(dataOffsetBase + leadByteIndex[leadByte]);
reorderCodeCount = reorderCodeCount > returnCapacity ? returnCapacity : reorderCodeCount;
uprv_memcpy(returnReorderCodes, dataOffsetBase + leadByteIndex[leadByte] + 1, reorderCodeCount * sizeof(uint16_t));
return reorderCodeCount;
}
// used to mark ignorable reorder code slots
static const int32_t UCOL_REORDER_CODE_IGNORE = UCOL_REORDER_CODE_LIMIT + 1;
void ucol_buildPermutationTable(UCollator *coll, UErrorCode *status) {
uint16_t leadBytesSize = 256;
uint16_t leadBytes[256];
int32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST);
int32_t* internalReorderCodes;
// The lowest byte that hasn't been assigned a mapping
int toBottom = 0x03;
// The highest byte that hasn't been assigned a mapping - don't include the special or trailing
int toTop = 0xe4;
// are we filling from the bottom?
bool fromTheBottom = true;
// lead bytes that have alread been assigned to the permutation table
bool newLeadByteUsed[256];
// permutation table slots that have already been filled
bool permutationSlotFilled[256];
// nothing to do
if(U_FAILURE(*status) || coll == NULL || coll->reorderCodesLength == 0) {
if (coll != NULL) {
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
}
return;
}
if (coll->leadBytePermutationTable == NULL) {
coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
if (coll->leadBytePermutationTable == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
// prefill the reordering codes with the leading entries
internalReorderCodes = (int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t));
if (internalReorderCodes == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
return;
}
for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST); codeIndex++) {
internalReorderCodes[codeIndex] = UCOL_REORDER_CODE_FIRST + codeIndex;
}
for (int32_t codeIndex = 0; codeIndex < coll->reorderCodesLength; codeIndex++) {
uint32_t reorderCodesCode = coll->reorderCodes[codeIndex];
internalReorderCodes[codeIndex + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)] = reorderCodesCode;
if (reorderCodesCode >= UCOL_REORDER_CODE_FIRST && reorderCodesCode < UCOL_REORDER_CODE_LIMIT) {
internalReorderCodes[reorderCodesCode - UCOL_REORDER_CODE_FIRST] = UCOL_REORDER_CODE_IGNORE;
}
}
for (int i = 0; i < 256; i++) {
if (i < toBottom || i > toTop) {
permutationSlotFilled[i] = true;
newLeadByteUsed[i] = true;
coll->leadBytePermutationTable[i] = i;
} else {
permutationSlotFilled[i] = false;
newLeadByteUsed[i] = false;
coll->leadBytePermutationTable[i] = 0;
}
}
/* Start from the front of the list and place each script we encounter at the
* earliest possible locatation in the permutation table. If we encounter
* UNKNOWN, start processing from the back, and place each script in the last
* possible location. At each step, we also need to make sure that any scripts
* that need to not be moved are copied to their same location in the final table.
*/
for (int reorderCodesIndex = 0; reorderCodesIndex < internalReorderCodesLength; reorderCodesIndex++) {
int32_t next = internalReorderCodes[reorderCodesIndex];
if (next == UCOL_REORDER_CODE_IGNORE) {
continue;
}
if (next == USCRIPT_UNKNOWN) {
if (fromTheBottom == false) {
// double turnaround
*status = U_ILLEGAL_ARGUMENT_ERROR;
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
return;
}
fromTheBottom = false;
continue;
}
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, next, leadBytes, leadBytesSize);
if (fromTheBottom) {
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
// don't place a lead byte twice in the permutation table
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
// lead byte already used
*status = U_ILLEGAL_ARGUMENT_ERROR;
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
return;
}
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toBottom;
newLeadByteUsed[toBottom] = true;
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
toBottom++;
}
} else {
for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
// don't place a lead byte twice in the permutation table
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
// lead byte already used
*status = U_ILLEGAL_ARGUMENT_ERROR;
if (coll->leadBytePermutationTable != NULL) {
uprv_free(coll->leadBytePermutationTable);
coll->leadBytePermutationTable = NULL;
}
coll->reorderCodesLength = 0;
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
return;
}
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toTop;
newLeadByteUsed[toTop] = true;
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
toTop--;
}
}
}
#ifdef REORDER_DEBUG
fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
}
fprintf(stdout, "\n@@@@ Lead Byte Used Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]);
}
fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]);
}
#endif
/* Copy everything that's left over */
int reorderCode = 0;
for (int i = 0; i < 256; i++) {
if (!permutationSlotFilled[i]) {
while (reorderCode < 256 && newLeadByteUsed[reorderCode]) {
reorderCode++;
}
coll->leadBytePermutationTable[i] = reorderCode;
permutationSlotFilled[i] = true;
newLeadByteUsed[reorderCode] = true;
}
}
#ifdef REORDER_DEBUG
fprintf(stdout, "\n@@@@ Script Reordering Table\n");
for (int i = 0; i < 256; i++) {
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
}
#endif
if (internalReorderCodes != NULL) {
uprv_free(internalReorderCodes);
}
// force a regen of the latin one table since it is affected by the script reordering
coll->latinOneRegenTable = TRUE;
ucol_updateInternalState(coll, status);
}
#endif /* #if !UCONFIG_NO_COLLATION */

View file

@ -22,6 +22,7 @@
#if !UCONFIG_NO_COLLATION
#include "unicode/uscript.h"
#include "unicode/ustring.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
@ -659,8 +660,8 @@ void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status) {
*status = U_INVALID_FORMAT_ERROR;
}
src->opts->reorderCodesLength = codeCount;
src->opts->reorderCodes = (int32_t*)uprv_malloc(codeCount * sizeof(int32_t));
src->reorderCodesLength = codeCount;
src->reorderCodes = (int32_t*)uprv_malloc(codeCount * sizeof(int32_t));
current = src->current;
// eat leading whitespace
@ -678,11 +679,11 @@ void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status) {
} else {
u_UCharsToChars(current, conversion, tokenLength);
conversion[tokenLength] = '\0';
src->opts->reorderCodes[codeIndex] = ucol_findReorderingEntry(conversion);
if (src->opts->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
src->opts->reorderCodes[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
src->reorderCodes[codeIndex] = ucol_findReorderingEntry(conversion);
if (src->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
src->reorderCodes[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
}
if (src->opts->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
if (src->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
@ -2456,6 +2457,9 @@ void ucol_tok_closeTokenList(UColTokenParser *src) {
if(src->opts != NULL) {
uprv_free(src->opts);
}
if (src->reorderCodes != NULL) {
uprv_free(src->reorderCodes);
}
}
#endif /* #if !UCONFIG_NO_COLLATION */

View file

@ -135,6 +135,11 @@ typedef struct {
UBool inRange; /* Are we in a range? */
UChar32 currentRangeCp; /* Current code point in the range. */
UChar32 lastRangeCp; /* The last code point in the range. */
/* reorder codes for collation reordering */
int32_t* reorderCodes;
int32_t reorderCodesLength;
} UColTokenParser;
typedef struct {

View file

@ -606,8 +606,8 @@ public:
* @see ucol_getReorderCodes
* @internal
*/
virtual uint32_t getReorderCodes(int32_t *dest,
const uint32_t destCapacity,
virtual int32_t getReorderCodes(int32_t *dest,
const int32_t destCapacity,
UErrorCode& status) const;
/**
@ -618,7 +618,7 @@ public:
* @internal
*/
virtual void setReorderCodes(const int32_t* reorderCodes,
const uint32_t reorderCodesLength,
const int32_t reorderCodesLength,
UErrorCode& status) ;
/**

View file

@ -675,8 +675,8 @@ public:
* @see ucol_getReorderCodes
* @internal
*/
virtual uint32_t getReorderCodes(int32_t* dest,
const uint32_t destCapacity,
virtual int32_t getReorderCodes(int32_t* dest,
const int32_t destCapacity,
UErrorCode& status) const;
/**
@ -687,8 +687,8 @@ public:
* @internal
*/
virtual void setReorderCodes(const int32_t* reorderCodes,
const uint32_t reorderCodesLength,
UErrorCode& status);
const int32_t reorderCodesLength,
UErrorCode& status);
private:

View file

@ -16,7 +16,6 @@
#include "unicode/localpointer.h"
#include "unicode/parseerr.h"
#include "unicode/uloc.h"
#include "unicode/uscript.h"
#include "unicode/uset.h"
/**
@ -138,14 +137,13 @@ typedef enum {
* @internal
*/
typedef enum {
UCOL_REORDERCODE_FIRST = 0x1000,
UCOL_REORDERCODE_SPACE = 0x1000,
UCOL_REORDERCODE_PUNCTUATION = 0x1001,
UCOL_REORDERCODE_SYMBOL = 0x1002,
UCOL_REORDERCODE_CURRENCY = 0x1003,
UCOL_REORDERCODE_DIGIT = 0x1004,
UCOL_REORDERCODE_LIMIT = 0x1005,
UCOL_REORDERCODE_IGNORE = 0x7FFF
UCOL_REORDER_CODE_SPACE = 0x1000,
UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE,
UCOL_REORDER_CODE_PUNCTUATION = 0x1001,
UCOL_REORDER_CODE_SYMBOL = 0x1002,
UCOL_REORDER_CODE_CURRENCY = 0x1003,
UCOL_REORDER_CODE_DIGIT = 0x1004,
UCOL_REORDER_CODE_LIMIT = 0x1005
} UColReorderCode;
/**
@ -547,10 +545,10 @@ ucol_setStrength(UCollator *coll,
* @see ucol_setReorderCodes
* @internal
*/
U_INTERNAL uint32_t U_EXPORT2
U_INTERNAL int32_t U_EXPORT2
ucol_getReorderCodes(const UCollator* coll,
int32_t* dest,
uint32_t destCapacity,
int32_t destCapacity,
UErrorCode *pErrorCode);
/**
@ -565,7 +563,7 @@ ucol_getReorderCodes(const UCollator* coll,
U_INTERNAL void U_EXPORT2
ucol_setReorderCodes(UCollator* coll,
const int32_t* reorderCodes,
uint32_t reorderCodesLength,
int32_t reorderCodesLength,
UErrorCode *pErrorCode);
/**

View file

@ -29,7 +29,6 @@
* equlivalent to word 'one'.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@ -284,14 +283,9 @@ static void doTestVariant(UCollator* myCollation, const UChar source[], const UC
uiter_setString(&tIter, target, tLen);
compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
if(compareResultIter != result) {
log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
}
compareResultIter = ucol_strcoll(myCollation, source, sLen, target, tLen);
if(compareResultIter != result) {
log_err("different results in strcoll comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
}
/* convert the strings to UTF-8 and do try comparing with char iterator */
if(getTestOption(QUICK_OPTION) <= 0) { /*!QUICK*/
char utf8Source[256], utf8Target[256];

View file

@ -36,6 +36,7 @@
#include "unicode/parseerr.h"
#include "unicode/ucnv.h"
#include "unicode/ures.h"
#include "unicode/uscript.h"
#include "uparse.h"
#include "putilimp.h"
@ -5903,6 +5904,89 @@ static void TestBeforeRuleWithScriptReordering(void)
ucol_close(myCollation);
}
/*
* This test ensures that characters placed before a character in a different script have the same lead byte
* in their collation key before and after script reordering.
*/
static void TestNonLeadBytesDuringCollationReordering(void)
{
UParseError error;
UErrorCode status = U_ZERO_ERROR;
UCollator *myCollation;
int32_t reorderCodes[1] = {USCRIPT_GREEK};
UCollationResult collResult;
uint8_t baseKey[256];
uint32_t baseKeyLength;
uint8_t reorderKey[256];
uint32_t reorderKeyLength;
UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
int i;
log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
/* build collator tertiary */
myCollation = ucol_open("", &status);
ucol_setStrength(myCollation, UCOL_TERTIARY);
if(U_FAILURE(status)) {
log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
return;
}
baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
if(U_FAILURE(status)) {
log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
return;
}
reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
if (baseKeyLength != reorderKeyLength) {
log_err("Key lengths not the same during reordering.\n", collResult);
return;
}
for (i = 1; i < baseKeyLength; i++) {
if (baseKey[i] != reorderKey[i]) {
log_err("Collation key bytes not the same at position %d.\n", i);
return;
}
}
ucol_close(myCollation);
/* build collator quaternary */
myCollation = ucol_open("", &status);
ucol_setStrength(myCollation, UCOL_QUATERNARY);
if(U_FAILURE(status)) {
log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
return;
}
baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
if(U_FAILURE(status)) {
log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
return;
}
reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
if (baseKeyLength != reorderKeyLength) {
log_err("Key lengths not the same during reordering.\n", collResult);
return;
}
for (i = 1; i < baseKeyLength; i++) {
if (baseKey[i] != reorderKey[i]) {
log_err("Collation key bytes not the same at position %d.\n", i);
return;
}
}
ucol_close(myCollation);
}
/*
* Utility function to test one collation reordering test case.
* @param testcases Array of test cases.
@ -5910,20 +5994,20 @@ static void TestBeforeRuleWithScriptReordering(void)
* @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
* @param n_rules Size of the array str_rules.
*/
static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], uint32_t reorderTokensLen)
static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
{
int testCaseNum;
UErrorCode status = U_ZERO_ERROR;
UCollator *myCollation;
int i;
for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
myCollation = ucol_open("", &status);
if (U_FAILURE(status)) {
log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
return;
}
/*ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
ucol_setStrength(myCollation, UCOL_TERTIARY);*/
ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
if(U_FAILURE(status)) {
log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
@ -5999,9 +6083,9 @@ static void TestNonScriptReorder(void)
};
const int32_t apiRules[] = {
USCRIPT_GREEK, UCOL_REORDERCODE_SYMBOL, UCOL_REORDERCODE_DIGIT, USCRIPT_LATIN,
UCOL_REORDERCODE_PUNCTUATION, UCOL_REORDERCODE_SPACE, USCRIPT_UNKNOWN,
UCOL_REORDERCODE_CURRENCY
USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
UCOL_REORDER_CODE_CURRENCY
};
const static OneTestCase privateUseCharacterStrings[] = {
@ -6349,9 +6433,10 @@ void addMiscCollTest(TestNode** root)
TEST(TestImport);
TEST(TestImportWithType);
TEST(TestBeforeRuleWithScriptReordering);
TEST(TestNonLeadBytesDuringCollationReordering);
TEST(TestGreekFirstReorder);
TEST(TestGreekLastReorder);
TEST(TestBeforeRuleWithScriptReordering);
TEST(TestNonScriptReorder);
TEST(TestHaniReorder);
}

View file

@ -31,6 +31,7 @@
#include "rbt_pars.h"
#include "genrb.h"
#include "unicode/ustring.h"
#include "unicode/uscript.h"
#include "unicode/putil.h"
#include <stdio.h>
@ -899,7 +900,7 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
int32_t len = 0;
uint8_t *data = NULL;
UCollator *coll = NULL;
int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST)];
int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
uint32_t reorderCodeCount;
int32_t reorderCodeIndex;
UParseError parseError;
@ -934,7 +935,7 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
uprv_free(data);
reorderCodeCount = ucol_getReorderCodes(
coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST), &intStatus);
coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {