mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-3984 updates based on code review and fix of memory leak during collation reordering rule parsing
X-SVN-Rev: 28999
This commit is contained in:
parent
174695eeac
commit
b1a76e6a88
18 changed files with 25793 additions and 25644 deletions
|
@ -812,12 +812,12 @@
|
|||
<span class=""><span>4.6</span></span></td>
|
||||
</tr>
|
||||
<tr class="row1">
|
||||
<td class="file">coll.h</td><td class="proto">uint32_t Collator::getReorderCodes(int32_t*, const uint32_t, UErrorCode&) const</td><td class="">None<br>
|
||||
<td class="file">coll.h</td><td class="proto">int32_t Collator::getReorderCodes(int32_t*, const int32_t, UErrorCode&) const</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row0">
|
||||
<td class="file">coll.h</td><td class="proto">void Collator::setReorderCodes(const int32_t*, const uint32_t, UErrorCode&)</td><td class="">None<br>
|
||||
<td class="file">coll.h</td><td class="proto">void Collator::setReorderCodes(const int32_t*, const int32_t, UErrorCode&)</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
|
@ -1136,12 +1136,12 @@
|
|||
<span class=""><span>4.6</span></span></td>
|
||||
</tr>
|
||||
<tr class="row0">
|
||||
<td class="file">tblcoll.h</td><td class="proto">uint32_t RuleBasedCollator::getReorderCodes(int32_t*, const uint32_t, UErrorCode&) const</td><td class="">None<br>
|
||||
<td class="file">tblcoll.h</td><td class="proto">int32_t RuleBasedCollator::getReorderCodes(int32_t*, const int32_t, UErrorCode&) const</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row1">
|
||||
<td class="file">tblcoll.h</td><td class="proto">void RuleBasedCollator::setReorderCodes(const int32_t*, const uint32_t, UErrorCode&)</td><td class="">None<br>
|
||||
<td class="file">tblcoll.h</td><td class="proto">void RuleBasedCollator::setReorderCodes(const int32_t*, const int32_t, UErrorCode&)</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
|
@ -1248,52 +1248,52 @@
|
|||
<b class="bigwarn" title="A new API was introduced that was not tagged.">(untagged)</b></span></td>
|
||||
</tr>
|
||||
<tr class="row0">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_CURRENCY</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_CURRENCY</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row1">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_DIGIT</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_DIGIT</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row0">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_FIRST</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_FIRST</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row1">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_IGNORE</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_IGNORE</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row0">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_LIMIT</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_LIMIT</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row1">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_PUNCTUATION</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_PUNCTUATION</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row0">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_SPACE</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_SPACE</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row1">
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDERCODE_SYMBOL</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto"><tt>enum</tt> UColReorderCode::UCOL_REORDER_CODE_SYMBOL</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row0">
|
||||
<td class="file">ucol.h</td><td class="proto">uint32_t ucol_getReorderCodes(const UCollator*, int32_t*, uint32_t, UErrorCode*)</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto">int32_t ucol_getReorderCodes(const UCollator*, int32_t*, int32_t, UErrorCode*)</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
<tr class="row1">
|
||||
<td class="file">ucol.h</td><td class="proto">void ucol_setReorderCodes(UCollator*, const int32_t*, uint32_t, UErrorCode*)</td><td class="">None<br>
|
||||
<td class="file">ucol.h</td><td class="proto">void ucol_setReorderCodes(UCollator*, const int32_t*, int32_t, UErrorCode*)</td><td class="">None<br>
|
||||
</td><td>Internal<br>
|
||||
<span class=""><span>.</span></span></td>
|
||||
</tr>
|
||||
|
|
|
@ -133,8 +133,8 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,
|
|||
header.magic=ds->readUInt32(inHeader->magic);
|
||||
if(!(
|
||||
header.magic==UCOL_HEADER_MAGIC &&
|
||||
inHeader->formatVersion[0]==3 &&
|
||||
inHeader->formatVersion[1]>=0
|
||||
inHeader->formatVersion[0]==3 /*&&
|
||||
inHeader->formatVersion[1]>=0*/
|
||||
)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -194,8 +194,8 @@ ucol_swapBinary(const UDataSwapper *ds,
|
|||
header.magic=ds->readUInt32(inHeader->magic);
|
||||
if(!(
|
||||
header.magic==UCOL_HEADER_MAGIC &&
|
||||
inHeader->formatVersion[0]==3 &&
|
||||
inHeader->formatVersion[1]>=0
|
||||
inHeader->formatVersion[0]==3 /*&&
|
||||
inHeader->formatVersion[1]>=0*/
|
||||
)) {
|
||||
udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
|
||||
header.magic,
|
||||
|
@ -351,8 +351,8 @@ ucol_swap(const UDataSwapper *ds,
|
|||
pInfo->dataFormat[1]==0x43 &&
|
||||
pInfo->dataFormat[2]==0x6f &&
|
||||
pInfo->dataFormat[3]==0x6c &&
|
||||
pInfo->formatVersion[0]==3 &&
|
||||
pInfo->formatVersion[1]>=0
|
||||
pInfo->formatVersion[0]==3 /*&&
|
||||
pInfo->formatVersion[1]>=0*/
|
||||
)) {
|
||||
udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
|
|
|
@ -692,7 +692,7 @@
|
|||
#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
|
||||
#define ucol_allocWeights U_ICU_ENTRY_POINT_RENAME(ucol_allocWeights)
|
||||
#define ucol_assembleTailoringTable U_ICU_ENTRY_POINT_RENAME(ucol_assembleTailoringTable)
|
||||
#define ucol_buildScriptReorderTable U_ICU_ENTRY_POINT_RENAME(ucol_buildScriptReorderTable)
|
||||
#define ucol_buildPermutationTable U_ICU_ENTRY_POINT_RENAME(ucol_buildPermutationTable)
|
||||
#define ucol_calcSortKey U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKey)
|
||||
#define ucol_calcSortKeySimpleTertiary U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKeySimpleTertiary)
|
||||
#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -833,19 +833,23 @@ Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
|
|||
return Locale::createFromName(loc);
|
||||
}
|
||||
|
||||
uint32_t Collator::getReorderCodes(int32_t *dest,
|
||||
const uint32_t destCapacity,
|
||||
int32_t Collator::getReorderCodes(int32_t *dest,
|
||||
int32_t destCapacity,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Collator::setReorderCodes(const int32_t *reorderCodes,
|
||||
const uint32_t reorderCodesLength,
|
||||
int32_t reorderCodesLength,
|
||||
UErrorCode& status)
|
||||
{
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// UCollator private data members ----------------------------------------
|
||||
|
|
|
@ -587,15 +587,15 @@ void RuleBasedCollator::setStrength(ECollationStrength newStrength)
|
|||
ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
|
||||
}
|
||||
|
||||
uint32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
|
||||
const uint32_t destCapacity,
|
||||
int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
|
||||
}
|
||||
|
||||
void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
|
||||
const uint32_t reorderCodesLength,
|
||||
const int32_t reorderCodesLength,
|
||||
UErrorCode& status)
|
||||
{
|
||||
ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
|
||||
|
|
|
@ -771,14 +771,6 @@ void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCo
|
|||
result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
|
||||
result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
|
||||
result->numericCollation = (UColAttributeValue)opts->numericCollation;
|
||||
result->reorderCodesLength = opts->reorderCodesLength;
|
||||
if (result->reorderCodesLength > 0) {
|
||||
result->reorderCodes = (int32_t*) uprv_malloc(result->reorderCodesLength * sizeof(int32_t));
|
||||
uprv_memcpy(result->reorderCodes, opts->reorderCodes, result->reorderCodesLength * sizeof(int32_t));
|
||||
} else {
|
||||
result->reorderCodes = NULL;
|
||||
}
|
||||
|
||||
result->caseFirstisDefault = TRUE;
|
||||
result->caseLevelisDefault = TRUE;
|
||||
result->frenchCollationisDefault = TRUE;
|
||||
|
@ -4363,9 +4355,11 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
|
|||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
/* no need to permute since the actual code values don't matter
|
||||
if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
|
||||
primary1 = coll->leadBytePermutationTable[primary1];
|
||||
}
|
||||
*/
|
||||
|
||||
if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|
||||
|| (!notIsContinuation && wasShifted)))
|
||||
|
@ -4796,6 +4790,7 @@ ucol_calcSortKey(const UCollator *coll,
|
|||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
uint8_t originalPrimary1 = primary1;
|
||||
if(notIsContinuation && coll->leadBytePermutationTable != NULL) {
|
||||
primary1 = coll->leadBytePermutationTable[primary1];
|
||||
}
|
||||
|
@ -4845,7 +4840,7 @@ ucol_calcSortKey(const UCollator *coll,
|
|||
/* one byter, not compressed */
|
||||
*primaries++ = primary1;
|
||||
leadPrimary = 0;
|
||||
} else if(isCompressible(coll, primary1)) {
|
||||
} else if(isCompressible(coll, originalPrimary1)) {
|
||||
/* compress */
|
||||
*primaries++ = leadPrimary = primary1;
|
||||
if(primaries <= primarySafeEnd) {
|
||||
|
@ -5390,6 +5385,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
|||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
uint8_t originalPrimary1 = primary1;
|
||||
if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
|
||||
primary1 = coll->leadBytePermutationTable[primary1];
|
||||
}
|
||||
|
@ -5410,7 +5406,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
|||
/* one byter, not compressed */
|
||||
*primaries++ = primary1;
|
||||
leadPrimary = 0;
|
||||
} else if(isCompressible(coll, primary1)) {
|
||||
} else if(isCompressible(coll, originalPrimary1)) {
|
||||
/* compress */
|
||||
*primaries++ = leadPrimary = primary1;
|
||||
*primaries++ = primary2;
|
||||
|
@ -7125,21 +7121,24 @@ ucol_getStrength(const UCollator *coll)
|
|||
return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
|
||||
}
|
||||
|
||||
U_INTERNAL uint32_t U_EXPORT2
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucol_getReorderCodes(const UCollator *coll,
|
||||
int32_t *dest,
|
||||
uint32_t destCapacity,
|
||||
int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
if (pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if (coll->reorderCodes == NULL) {
|
||||
if (destCapacity != 0) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (coll->reorderCodesLength > destCapacity) {
|
||||
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
for (uint32_t i = 0; (i < coll->reorderCodesLength) && (i < destCapacity); i++) {
|
||||
for (int32_t i = 0; (i < coll->reorderCodesLength) && (i < destCapacity); i++) {
|
||||
dest[i] = coll->reorderCodes[i];
|
||||
}
|
||||
return coll->reorderCodesLength;
|
||||
|
@ -7148,17 +7147,28 @@ ucol_getReorderCodes(const UCollator *coll,
|
|||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setReorderCodes(UCollator *coll,
|
||||
const int32_t *reorderCodes,
|
||||
uint32_t reorderCodesLength,
|
||||
UErrorCode *pErrorCode ){
|
||||
if (coll->reorderCodes != NULL) {
|
||||
uprv_free(coll->reorderCodes);
|
||||
int32_t reorderCodesLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if (reorderCodes == NULL) {
|
||||
if (reorderCodesLength != 0) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return;
|
||||
}
|
||||
uprv_free(coll->reorderCodes);
|
||||
coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int32_t));
|
||||
for (uint32_t i = 0; i < reorderCodesLength; i++) {
|
||||
if (coll->reorderCodes == NULL) {
|
||||
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
for (int32_t i = 0; i < reorderCodesLength; i++) {
|
||||
coll->reorderCodes[i] = reorderCodes[i];
|
||||
}
|
||||
coll->reorderCodesLength = reorderCodesLength;
|
||||
ucol_buildScriptReorderTable(coll, pErrorCode);
|
||||
ucol_buildPermutationTable(coll, pErrorCode);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "unicode/udata.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "ucol_bld.h"
|
||||
|
@ -33,6 +34,7 @@
|
|||
#include "ucln_in.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
static const InverseUCATableHeader* _staticInvUCA = NULL;
|
||||
static UDataMemory* invUCA_DATA_MEM = NULL;
|
||||
|
@ -1400,8 +1402,8 @@ ucol_findReorderingEntry(const char* name) {
|
|||
char buffer[32];
|
||||
toUpper(name, buffer, 32);
|
||||
for (uint32_t entry = 0; ReorderingTokenNames[entry] != NULL; entry++) {
|
||||
if (strcmp(buffer, ReorderingTokenNames[entry]) == 0) {
|
||||
return entry + UCOL_REORDERCODE_FIRST;
|
||||
if (uprv_strcmp(buffer, ReorderingTokenNames[entry]) == 0) {
|
||||
return entry + UCOL_REORDER_CODE_FIRST;
|
||||
}
|
||||
}
|
||||
return USCRIPT_INVALID_CODE;
|
||||
|
|
|
@ -776,8 +776,6 @@ typedef struct {
|
|||
/*UColAttributeValue*/ int32_t strength; /* attribute for strength */
|
||||
/*UColAttributeValue*/ int32_t hiraganaQ; /* attribute for special Hiragana */
|
||||
/*UColAttributeValue*/ int32_t numericCollation; /* attribute for numeric collation */
|
||||
/* reorder code */ int32_t* reorderCodes;
|
||||
uint32_t reorderCodesLength;
|
||||
uint32_t reserved[15]; /* for future use */
|
||||
} UColOptionSet;
|
||||
|
||||
|
@ -1026,7 +1024,7 @@ struct UCollator {
|
|||
|
||||
UVersionInfo dataVersion; /* Data info of UCA table */
|
||||
int32_t* reorderCodes;
|
||||
uint32_t reorderCodesLength;
|
||||
int32_t reorderCodesLength;
|
||||
uint8_t* leadBytePermutationTable;
|
||||
};
|
||||
|
||||
|
@ -1096,7 +1094,9 @@ U_CAPI const UChar* U_EXPORT2 ucol_tok_getRulesFromBundle(
|
|||
const char* type,
|
||||
int32_t* pLength,
|
||||
UErrorCode* status);
|
||||
U_CAPI void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status);
|
||||
|
||||
U_CAPI void ucol_buildPermutationTable(UCollator *coll, UErrorCode *status);
|
||||
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
/*
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "unicode/coll.h"
|
||||
#include "unicode/tblcoll.h"
|
||||
#include "unicode/caniter.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
#include "ucol_bld.h"
|
||||
|
@ -48,6 +49,8 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status);
|
||||
|
||||
// static UCA. There is only one. Collators don't use it.
|
||||
// It is referenced only in ucol_initUCA and ucol_cleanup
|
||||
static UCollator* _staticUCA = NULL;
|
||||
|
@ -162,233 +165,6 @@ tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
|
|||
return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
|
||||
}
|
||||
|
||||
int ucol_getLeadBytesForReorderCode(UCollator *coll, int reorderCode, uint16_t* returnLeadBytes, int returnLength) {
|
||||
uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte));
|
||||
uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte + 2 *sizeof(uint16_t));
|
||||
|
||||
// TODO - replace with a binary search
|
||||
// reorder code index is 2 uint16_t's - reorder code + offset
|
||||
for (int i = 0; i < reorderCodeIndexLength; i++) {
|
||||
if (reorderCode == reorderCodeIndex[i*2]) {
|
||||
uint16_t dataOffset = reorderCodeIndex[(i*2) + 1];
|
||||
if ((dataOffset & 0x8000) == 0x8000) {
|
||||
// offset isn't offset but instead is a single data element
|
||||
if (returnLength >= 1) {
|
||||
returnLeadBytes[0] = dataOffset & ~0x8000;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t)));
|
||||
uint16_t leadByteCount = *(dataOffsetBase + dataOffset);
|
||||
leadByteCount = leadByteCount > returnLength ? returnLength : leadByteCount;
|
||||
uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t));
|
||||
return leadByteCount;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* returnReorderCodes, int returnLength) {
|
||||
int leadByteIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript));
|
||||
uint16_t* leadByteIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript + 2 *sizeof(uint16_t));
|
||||
if (leadByte >= leadByteIndexLength) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((leadByteIndex[leadByte] & 0x8000) == 0x8000) {
|
||||
// offset isn't offset but instead is a single data element
|
||||
if (returnLength >= 1) {
|
||||
returnReorderCodes[0] = leadByteIndex[leadByte] & ~0x8000;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)leadByteIndex + leadByteIndexLength * (2 * sizeof(uint16_t)));
|
||||
uint16_t reorderCodeCount = *(dataOffsetBase + leadByteIndex[leadByte]);
|
||||
reorderCodeCount = reorderCodeCount > returnLength ? returnLength : reorderCodeCount;
|
||||
uprv_memcpy(returnReorderCodes, dataOffsetBase + leadByteIndex[leadByte] + 1, reorderCodeCount * sizeof(uint16_t));
|
||||
return reorderCodeCount;
|
||||
}
|
||||
|
||||
void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
|
||||
uint16_t leadBytesSize = 256;
|
||||
uint16_t leadBytes[256];
|
||||
uint32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST);
|
||||
int32_t* internalReorderCodes;
|
||||
|
||||
// The lowest byte that hasn't been assigned a mapping
|
||||
int toBottom = 0x03;
|
||||
// The highest byte that hasn't been assigned a mapping - don't include the special or trailing
|
||||
int toTop = 0xe4;
|
||||
|
||||
// are we filling from the bottom?
|
||||
bool fromTheBottom = true;
|
||||
|
||||
// lead bytes that have alread been assigned to the permutation table
|
||||
bool newLeadByteUsed[256];
|
||||
// permutation table slots that have already been filled
|
||||
bool permutationSlotFilled[256];
|
||||
|
||||
// nothing to do
|
||||
if(U_FAILURE(*status) || coll == NULL || coll->reorderCodesLength == 0) {
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (coll->leadBytePermutationTable == NULL) {
|
||||
coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
|
||||
}
|
||||
|
||||
// prefill the reordering codes with the leading entries
|
||||
internalReorderCodes = (int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t));
|
||||
for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST); codeIndex++) {
|
||||
internalReorderCodes[codeIndex] = UCOL_REORDERCODE_FIRST + codeIndex;
|
||||
}
|
||||
for (uint32_t codeIndex = 0; codeIndex < coll->reorderCodesLength; codeIndex++) {
|
||||
uint32_t reorderCodesCode = coll->reorderCodes[codeIndex];
|
||||
internalReorderCodes[codeIndex + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST)] = reorderCodesCode;
|
||||
if (reorderCodesCode >= UCOL_REORDERCODE_FIRST && reorderCodesCode < UCOL_REORDERCODE_LIMIT) {
|
||||
internalReorderCodes[reorderCodesCode - UCOL_REORDERCODE_FIRST] = UCOL_REORDERCODE_IGNORE;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (i < toBottom || i > toTop) {
|
||||
permutationSlotFilled[i] = true;
|
||||
newLeadByteUsed[i] = true;
|
||||
coll->leadBytePermutationTable[i] = i;
|
||||
} else {
|
||||
permutationSlotFilled[i] = false;
|
||||
newLeadByteUsed[i] = false;
|
||||
coll->leadBytePermutationTable[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start from the front of the list and place each script we encounter at the
|
||||
* earliest possible locatation in the permutation table. If we encounter
|
||||
* UNKNOWN, start processing from the back, and place each script in the last
|
||||
* possible location. At each step, we also need to make sure that any scripts
|
||||
* that need to not be moved are copied to their same location in the final table.
|
||||
*/
|
||||
for (int reorderCodesIndex = 0; reorderCodesIndex < internalReorderCodesLength; reorderCodesIndex++) {
|
||||
int32_t next = internalReorderCodes[reorderCodesIndex];
|
||||
if (next == UCOL_REORDERCODE_IGNORE) {
|
||||
continue;
|
||||
}
|
||||
if (next == USCRIPT_UNKNOWN) {
|
||||
if (fromTheBottom == false) {
|
||||
// double turnaround
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
fromTheBottom = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, next, leadBytes, leadBytesSize);
|
||||
if (fromTheBottom) {
|
||||
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
||||
// lead byte already used
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toBottom;
|
||||
newLeadByteUsed[toBottom] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toBottom++;
|
||||
}
|
||||
} else {
|
||||
for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
||||
// lead byte already used
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toTop;
|
||||
newLeadByteUsed[toTop] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toTop--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
|
||||
}
|
||||
fprintf(stdout, "\n@@@@ Lead Byte Used Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]);
|
||||
}
|
||||
fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Copy everything that's left over */
|
||||
int reorderCode = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (!permutationSlotFilled[i]) {
|
||||
while (reorderCode < 256 && newLeadByteUsed[reorderCode]) {
|
||||
reorderCode++;
|
||||
}
|
||||
coll->leadBytePermutationTable[i] = reorderCode;
|
||||
permutationSlotFilled[i] = true;
|
||||
newLeadByteUsed[reorderCode] = true;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\n@@@@ Script Reordering Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
|
||||
// force a regen of the latin one table since it is affected by the script reordering
|
||||
coll->latinOneRegenTable = TRUE;
|
||||
ucol_updateInternalState(coll, status);
|
||||
}
|
||||
|
||||
// API in ucol_imp.h
|
||||
|
||||
|
@ -685,6 +461,7 @@ ucol_openRulesForImport( const UChar *rules,
|
|||
}
|
||||
uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
|
||||
ucol_setOptionsFromHeader(result, opts, status);
|
||||
ucol_setReorderCodesFromParser(result, &src, status);
|
||||
result->freeOptionsOnClose = TRUE;
|
||||
result->hasRealData = FALSE;
|
||||
result->freeImageOnClose = FALSE;
|
||||
|
@ -710,7 +487,7 @@ ucol_openRulesForImport( const UChar *rules,
|
|||
result->actualLocale = NULL;
|
||||
result->validLocale = NULL;
|
||||
result->requestedLocale = NULL;
|
||||
ucol_buildScriptReorderTable(result, status);
|
||||
ucol_buildPermutationTable(result, status);
|
||||
ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
|
||||
ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
|
||||
} else {
|
||||
|
@ -816,10 +593,10 @@ ucol_equals(const UCollator *source, const UCollator *target) {
|
|||
return FALSE;
|
||||
}
|
||||
}
|
||||
if(source->reorderCodesLength != target->reorderCodesLength){
|
||||
if (source->reorderCodesLength != target->reorderCodesLength){
|
||||
return FALSE;
|
||||
}
|
||||
for (int i = 0; i < source->reorderCodesLength; i++) {
|
||||
for (i = 0; i < source->reorderCodesLength; i++) {
|
||||
if(source->reorderCodes[i] != target->reorderCodes[i]) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -1246,4 +1023,273 @@ ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
|
|||
return (USet *)tailored;
|
||||
}
|
||||
|
||||
/*
|
||||
* Collation Reordering
|
||||
*/
|
||||
|
||||
static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
coll->reorderCodesLength = 0;
|
||||
if (coll->reorderCodes != NULL) {
|
||||
uprv_free(coll->reorderCodes);
|
||||
}
|
||||
|
||||
if (parser->reorderCodesLength == 0 || parser->reorderCodes == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
coll->reorderCodesLength = parser->reorderCodesLength;
|
||||
coll->reorderCodes = (int32_t*) uprv_malloc(coll->reorderCodesLength * sizeof(int32_t));
|
||||
uprv_memcpy(coll->reorderCodes, parser->reorderCodes, coll->reorderCodesLength * sizeof(int32_t));
|
||||
}
|
||||
|
||||
static int ucol_getLeadBytesForReorderCode(UCollator *coll, int reorderCode, uint16_t* returnLeadBytes, int returnCapacity) {
|
||||
uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte));
|
||||
uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte + 2 *sizeof(uint16_t));
|
||||
|
||||
// TODO - replace with a binary search
|
||||
// reorder code index is 2 uint16_t's - reorder code + offset
|
||||
for (int i = 0; i < reorderCodeIndexLength; i++) {
|
||||
if (reorderCode == reorderCodeIndex[i*2]) {
|
||||
uint16_t dataOffset = reorderCodeIndex[(i*2) + 1];
|
||||
if ((dataOffset & 0x8000) == 0x8000) {
|
||||
// offset isn't offset but instead is a single data element
|
||||
if (returnCapacity >= 1) {
|
||||
returnLeadBytes[0] = dataOffset & ~0x8000;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t)));
|
||||
uint16_t leadByteCount = *(dataOffsetBase + dataOffset);
|
||||
leadByteCount = leadByteCount > returnCapacity ? returnCapacity : leadByteCount;
|
||||
uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t));
|
||||
return leadByteCount;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* returnReorderCodes, int returnCapacity) {
|
||||
int leadByteIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript));
|
||||
uint16_t* leadByteIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript + 2 *sizeof(uint16_t));
|
||||
if (leadByte >= leadByteIndexLength) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((leadByteIndex[leadByte] & 0x8000) == 0x8000) {
|
||||
// offset isn't offset but instead is a single data element
|
||||
if (returnCapacity >= 1) {
|
||||
returnReorderCodes[0] = leadByteIndex[leadByte] & ~0x8000;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)leadByteIndex + leadByteIndexLength * (2 * sizeof(uint16_t)));
|
||||
uint16_t reorderCodeCount = *(dataOffsetBase + leadByteIndex[leadByte]);
|
||||
reorderCodeCount = reorderCodeCount > returnCapacity ? returnCapacity : reorderCodeCount;
|
||||
uprv_memcpy(returnReorderCodes, dataOffsetBase + leadByteIndex[leadByte] + 1, reorderCodeCount * sizeof(uint16_t));
|
||||
return reorderCodeCount;
|
||||
}
|
||||
|
||||
// used to mark ignorable reorder code slots
|
||||
static const int32_t UCOL_REORDER_CODE_IGNORE = UCOL_REORDER_CODE_LIMIT + 1;
|
||||
|
||||
void ucol_buildPermutationTable(UCollator *coll, UErrorCode *status) {
|
||||
uint16_t leadBytesSize = 256;
|
||||
uint16_t leadBytes[256];
|
||||
int32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST);
|
||||
int32_t* internalReorderCodes;
|
||||
|
||||
// The lowest byte that hasn't been assigned a mapping
|
||||
int toBottom = 0x03;
|
||||
// The highest byte that hasn't been assigned a mapping - don't include the special or trailing
|
||||
int toTop = 0xe4;
|
||||
|
||||
// are we filling from the bottom?
|
||||
bool fromTheBottom = true;
|
||||
|
||||
// lead bytes that have alread been assigned to the permutation table
|
||||
bool newLeadByteUsed[256];
|
||||
// permutation table slots that have already been filled
|
||||
bool permutationSlotFilled[256];
|
||||
|
||||
// nothing to do
|
||||
if(U_FAILURE(*status) || coll == NULL || coll->reorderCodesLength == 0) {
|
||||
if (coll != NULL) {
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (coll->leadBytePermutationTable == NULL) {
|
||||
coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
|
||||
if (coll->leadBytePermutationTable == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// prefill the reordering codes with the leading entries
|
||||
internalReorderCodes = (int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t));
|
||||
if (internalReorderCodes == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST); codeIndex++) {
|
||||
internalReorderCodes[codeIndex] = UCOL_REORDER_CODE_FIRST + codeIndex;
|
||||
}
|
||||
for (int32_t codeIndex = 0; codeIndex < coll->reorderCodesLength; codeIndex++) {
|
||||
uint32_t reorderCodesCode = coll->reorderCodes[codeIndex];
|
||||
internalReorderCodes[codeIndex + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)] = reorderCodesCode;
|
||||
if (reorderCodesCode >= UCOL_REORDER_CODE_FIRST && reorderCodesCode < UCOL_REORDER_CODE_LIMIT) {
|
||||
internalReorderCodes[reorderCodesCode - UCOL_REORDER_CODE_FIRST] = UCOL_REORDER_CODE_IGNORE;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (i < toBottom || i > toTop) {
|
||||
permutationSlotFilled[i] = true;
|
||||
newLeadByteUsed[i] = true;
|
||||
coll->leadBytePermutationTable[i] = i;
|
||||
} else {
|
||||
permutationSlotFilled[i] = false;
|
||||
newLeadByteUsed[i] = false;
|
||||
coll->leadBytePermutationTable[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start from the front of the list and place each script we encounter at the
|
||||
* earliest possible locatation in the permutation table. If we encounter
|
||||
* UNKNOWN, start processing from the back, and place each script in the last
|
||||
* possible location. At each step, we also need to make sure that any scripts
|
||||
* that need to not be moved are copied to their same location in the final table.
|
||||
*/
|
||||
for (int reorderCodesIndex = 0; reorderCodesIndex < internalReorderCodesLength; reorderCodesIndex++) {
|
||||
int32_t next = internalReorderCodes[reorderCodesIndex];
|
||||
if (next == UCOL_REORDER_CODE_IGNORE) {
|
||||
continue;
|
||||
}
|
||||
if (next == USCRIPT_UNKNOWN) {
|
||||
if (fromTheBottom == false) {
|
||||
// double turnaround
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
fromTheBottom = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, next, leadBytes, leadBytesSize);
|
||||
if (fromTheBottom) {
|
||||
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
||||
// lead byte already used
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toBottom;
|
||||
newLeadByteUsed[toBottom] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toBottom++;
|
||||
}
|
||||
} else {
|
||||
for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
||||
// lead byte already used
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->leadBytePermutationTable != NULL) {
|
||||
uprv_free(coll->leadBytePermutationTable);
|
||||
coll->leadBytePermutationTable = NULL;
|
||||
}
|
||||
coll->reorderCodesLength = 0;
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toTop;
|
||||
newLeadByteUsed[toTop] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toTop--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
|
||||
}
|
||||
fprintf(stdout, "\n@@@@ Lead Byte Used Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]);
|
||||
}
|
||||
fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Copy everything that's left over */
|
||||
int reorderCode = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (!permutationSlotFilled[i]) {
|
||||
while (reorderCode < 256 && newLeadByteUsed[reorderCode]) {
|
||||
reorderCode++;
|
||||
}
|
||||
coll->leadBytePermutationTable[i] = reorderCode;
|
||||
permutationSlotFilled[i] = true;
|
||||
newLeadByteUsed[reorderCode] = true;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\n@@@@ Script Reordering Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (internalReorderCodes != NULL) {
|
||||
uprv_free(internalReorderCodes);
|
||||
}
|
||||
|
||||
// force a regen of the latin one table since it is affected by the script reordering
|
||||
coll->latinOneRegenTable = TRUE;
|
||||
ucol_updateInternalState(coll, status);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
@ -659,8 +660,8 @@ void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status) {
|
|||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
||||
src->opts->reorderCodesLength = codeCount;
|
||||
src->opts->reorderCodes = (int32_t*)uprv_malloc(codeCount * sizeof(int32_t));
|
||||
src->reorderCodesLength = codeCount;
|
||||
src->reorderCodes = (int32_t*)uprv_malloc(codeCount * sizeof(int32_t));
|
||||
current = src->current;
|
||||
|
||||
// eat leading whitespace
|
||||
|
@ -678,11 +679,11 @@ void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status) {
|
|||
} else {
|
||||
u_UCharsToChars(current, conversion, tokenLength);
|
||||
conversion[tokenLength] = '\0';
|
||||
src->opts->reorderCodes[codeIndex] = ucol_findReorderingEntry(conversion);
|
||||
if (src->opts->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
src->opts->reorderCodes[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
|
||||
src->reorderCodes[codeIndex] = ucol_findReorderingEntry(conversion);
|
||||
if (src->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
src->reorderCodes[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
|
||||
}
|
||||
if (src->opts->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
if (src->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -2456,6 +2457,9 @@ void ucol_tok_closeTokenList(UColTokenParser *src) {
|
|||
if(src->opts != NULL) {
|
||||
uprv_free(src->opts);
|
||||
}
|
||||
if (src->reorderCodes != NULL) {
|
||||
uprv_free(src->reorderCodes);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
|
|
@ -135,6 +135,11 @@ typedef struct {
|
|||
UBool inRange; /* Are we in a range? */
|
||||
UChar32 currentRangeCp; /* Current code point in the range. */
|
||||
UChar32 lastRangeCp; /* The last code point in the range. */
|
||||
|
||||
/* reorder codes for collation reordering */
|
||||
int32_t* reorderCodes;
|
||||
int32_t reorderCodesLength;
|
||||
|
||||
} UColTokenParser;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -606,8 +606,8 @@ public:
|
|||
* @see ucol_getReorderCodes
|
||||
* @internal
|
||||
*/
|
||||
virtual uint32_t getReorderCodes(int32_t *dest,
|
||||
const uint32_t destCapacity,
|
||||
virtual int32_t getReorderCodes(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
|
@ -618,7 +618,7 @@ public:
|
|||
* @internal
|
||||
*/
|
||||
virtual void setReorderCodes(const int32_t* reorderCodes,
|
||||
const uint32_t reorderCodesLength,
|
||||
const int32_t reorderCodesLength,
|
||||
UErrorCode& status) ;
|
||||
|
||||
/**
|
||||
|
|
|
@ -675,8 +675,8 @@ public:
|
|||
* @see ucol_getReorderCodes
|
||||
* @internal
|
||||
*/
|
||||
virtual uint32_t getReorderCodes(int32_t* dest,
|
||||
const uint32_t destCapacity,
|
||||
virtual int32_t getReorderCodes(int32_t* dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
|
@ -687,8 +687,8 @@ public:
|
|||
* @internal
|
||||
*/
|
||||
virtual void setReorderCodes(const int32_t* reorderCodes,
|
||||
const uint32_t reorderCodesLength,
|
||||
UErrorCode& status);
|
||||
const int32_t reorderCodesLength,
|
||||
UErrorCode& status);
|
||||
|
||||
|
||||
private:
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "unicode/localpointer.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/uset.h"
|
||||
|
||||
/**
|
||||
|
@ -138,14 +137,13 @@ typedef enum {
|
|||
* @internal
|
||||
*/
|
||||
typedef enum {
|
||||
UCOL_REORDERCODE_FIRST = 0x1000,
|
||||
UCOL_REORDERCODE_SPACE = 0x1000,
|
||||
UCOL_REORDERCODE_PUNCTUATION = 0x1001,
|
||||
UCOL_REORDERCODE_SYMBOL = 0x1002,
|
||||
UCOL_REORDERCODE_CURRENCY = 0x1003,
|
||||
UCOL_REORDERCODE_DIGIT = 0x1004,
|
||||
UCOL_REORDERCODE_LIMIT = 0x1005,
|
||||
UCOL_REORDERCODE_IGNORE = 0x7FFF
|
||||
UCOL_REORDER_CODE_SPACE = 0x1000,
|
||||
UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE,
|
||||
UCOL_REORDER_CODE_PUNCTUATION = 0x1001,
|
||||
UCOL_REORDER_CODE_SYMBOL = 0x1002,
|
||||
UCOL_REORDER_CODE_CURRENCY = 0x1003,
|
||||
UCOL_REORDER_CODE_DIGIT = 0x1004,
|
||||
UCOL_REORDER_CODE_LIMIT = 0x1005
|
||||
} UColReorderCode;
|
||||
|
||||
/**
|
||||
|
@ -547,10 +545,10 @@ ucol_setStrength(UCollator *coll,
|
|||
* @see ucol_setReorderCodes
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL uint32_t U_EXPORT2
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucol_getReorderCodes(const UCollator* coll,
|
||||
int32_t* dest,
|
||||
uint32_t destCapacity,
|
||||
int32_t destCapacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
|
@ -565,7 +563,7 @@ ucol_getReorderCodes(const UCollator* coll,
|
|||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setReorderCodes(UCollator* coll,
|
||||
const int32_t* reorderCodes,
|
||||
uint32_t reorderCodesLength,
|
||||
int32_t reorderCodesLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
* equlivalent to word 'one'.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
@ -284,14 +283,9 @@ static void doTestVariant(UCollator* myCollation, const UChar source[], const UC
|
|||
uiter_setString(&tIter, target, tLen);
|
||||
compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
|
||||
if(compareResultIter != result) {
|
||||
log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
||||
log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
||||
}
|
||||
|
||||
compareResultIter = ucol_strcoll(myCollation, source, sLen, target, tLen);
|
||||
if(compareResultIter != result) {
|
||||
log_err("different results in strcoll comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
||||
}
|
||||
|
||||
|
||||
/* convert the strings to UTF-8 and do try comparing with char iterator */
|
||||
if(getTestOption(QUICK_OPTION) <= 0) { /*!QUICK*/
|
||||
char utf8Source[256], utf8Target[256];
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "unicode/parseerr.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "uparse.h"
|
||||
#include "putilimp.h"
|
||||
|
||||
|
@ -5903,6 +5904,89 @@ static void TestBeforeRuleWithScriptReordering(void)
|
|||
ucol_close(myCollation);
|
||||
}
|
||||
|
||||
/*
|
||||
* This test ensures that characters placed before a character in a different script have the same lead byte
|
||||
* in their collation key before and after script reordering.
|
||||
*/
|
||||
static void TestNonLeadBytesDuringCollationReordering(void)
|
||||
{
|
||||
UParseError error;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *myCollation;
|
||||
int32_t reorderCodes[1] = {USCRIPT_GREEK};
|
||||
UCollationResult collResult;
|
||||
|
||||
uint8_t baseKey[256];
|
||||
uint32_t baseKeyLength;
|
||||
uint8_t reorderKey[256];
|
||||
uint32_t reorderKeyLength;
|
||||
|
||||
UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
|
||||
|
||||
int i;
|
||||
|
||||
|
||||
log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
|
||||
|
||||
/* build collator tertiary */
|
||||
myCollation = ucol_open("", &status);
|
||||
ucol_setStrength(myCollation, UCOL_TERTIARY);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
|
||||
|
||||
ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
|
||||
|
||||
if (baseKeyLength != reorderKeyLength) {
|
||||
log_err("Key lengths not the same during reordering.\n", collResult);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 1; i < baseKeyLength; i++) {
|
||||
if (baseKey[i] != reorderKey[i]) {
|
||||
log_err("Collation key bytes not the same at position %d.\n", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ucol_close(myCollation);
|
||||
|
||||
/* build collator quaternary */
|
||||
myCollation = ucol_open("", &status);
|
||||
ucol_setStrength(myCollation, UCOL_QUATERNARY);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
|
||||
|
||||
ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
|
||||
|
||||
if (baseKeyLength != reorderKeyLength) {
|
||||
log_err("Key lengths not the same during reordering.\n", collResult);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 1; i < baseKeyLength; i++) {
|
||||
if (baseKey[i] != reorderKey[i]) {
|
||||
log_err("Collation key bytes not the same at position %d.\n", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ucol_close(myCollation);
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility function to test one collation reordering test case.
|
||||
* @param testcases Array of test cases.
|
||||
|
@ -5910,20 +5994,20 @@ static void TestBeforeRuleWithScriptReordering(void)
|
|||
* @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
|
||||
* @param n_rules Size of the array str_rules.
|
||||
*/
|
||||
static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], uint32_t reorderTokensLen)
|
||||
static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
|
||||
{
|
||||
int testCaseNum;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *myCollation;
|
||||
|
||||
int i;
|
||||
|
||||
for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
|
||||
myCollation = ucol_open("", &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
/*ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
||||
ucol_setStrength(myCollation, UCOL_TERTIARY);*/
|
||||
ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
|
||||
|
@ -5999,9 +6083,9 @@ static void TestNonScriptReorder(void)
|
|||
};
|
||||
|
||||
const int32_t apiRules[] = {
|
||||
USCRIPT_GREEK, UCOL_REORDERCODE_SYMBOL, UCOL_REORDERCODE_DIGIT, USCRIPT_LATIN,
|
||||
UCOL_REORDERCODE_PUNCTUATION, UCOL_REORDERCODE_SPACE, USCRIPT_UNKNOWN,
|
||||
UCOL_REORDERCODE_CURRENCY
|
||||
USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
|
||||
UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
|
||||
UCOL_REORDER_CODE_CURRENCY
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
|
@ -6349,9 +6433,10 @@ void addMiscCollTest(TestNode** root)
|
|||
TEST(TestImport);
|
||||
TEST(TestImportWithType);
|
||||
|
||||
TEST(TestBeforeRuleWithScriptReordering);
|
||||
TEST(TestNonLeadBytesDuringCollationReordering);
|
||||
TEST(TestGreekFirstReorder);
|
||||
TEST(TestGreekLastReorder);
|
||||
TEST(TestBeforeRuleWithScriptReordering);
|
||||
TEST(TestNonScriptReorder);
|
||||
TEST(TestHaniReorder);
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "rbt_pars.h"
|
||||
#include "genrb.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/putil.h"
|
||||
#include <stdio.h>
|
||||
|
||||
|
@ -899,7 +900,7 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
|
|||
int32_t len = 0;
|
||||
uint8_t *data = NULL;
|
||||
UCollator *coll = NULL;
|
||||
int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST)];
|
||||
int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
|
||||
uint32_t reorderCodeCount;
|
||||
int32_t reorderCodeIndex;
|
||||
UParseError parseError;
|
||||
|
@ -934,7 +935,7 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
|
|||
uprv_free(data);
|
||||
|
||||
reorderCodeCount = ucol_getReorderCodes(
|
||||
coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST), &intStatus);
|
||||
coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
|
||||
if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
|
||||
struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
|
||||
for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
|
||||
|
|
Loading…
Add table
Reference in a new issue