mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-3984 integrate collation reordering from branch - svn+ssh://source.icu-project.org/repos/icu/icu/branches/sgill/scriptreorder2 -r 28883:28924
X-SVN-Rev: 28926
This commit is contained in:
parent
8c2725adc2
commit
110e2ceb05
20 changed files with 678 additions and 20 deletions
|
@ -153,6 +153,7 @@ ucol_swapBinary(const UDataSwapper *ds,
|
|||
UErrorCode *pErrorCode) {
|
||||
const uint8_t *inBytes;
|
||||
uint8_t *outBytes;
|
||||
fprintf(stderr, "@@@ ucol_swapBinary\n");
|
||||
|
||||
const UCATableHeader *inHeader;
|
||||
UCATableHeader *outHeader;
|
||||
|
@ -219,6 +220,8 @@ ucol_swapBinary(const UDataSwapper *ds,
|
|||
|
||||
/* swap the necessary pieces in the order of their occurrence in the data */
|
||||
|
||||
udata_printError(ds, "@@@@@ Here inside the collator data swapper\n");
|
||||
|
||||
/* read more of the UCATableHeader (the size field was read above) */
|
||||
header.options= ds->readUInt32(inHeader->options);
|
||||
header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
|
||||
|
@ -232,11 +235,14 @@ ucol_swapBinary(const UDataSwapper *ds,
|
|||
header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
|
||||
header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
|
||||
header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
|
||||
|
||||
header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
|
||||
header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
|
||||
|
||||
/* swap the 32-bit integers in the header */
|
||||
ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
|
||||
outHeader, pErrorCode);
|
||||
|
||||
ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
|
||||
&(outHeader->scriptToLeadByte), pErrorCode);
|
||||
/* set the output platform properties */
|
||||
outHeader->isBigEndian=ds->outIsBigEndian;
|
||||
outHeader->charSetFamily=ds->outCharset;
|
||||
|
@ -303,6 +309,24 @@ ucol_swapBinary(const UDataSwapper *ds,
|
|||
ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
|
||||
outBytes+header.contractionUCACombos, pErrorCode);
|
||||
}
|
||||
|
||||
/* swap the script to lead bytes */
|
||||
if(header.scriptToLeadByte!=0) {
|
||||
int indexCount = ds->readUInt16(*(inBytes+header.scriptToLeadByte)); // each entry = uint16
|
||||
int dataCount = ds->readUInt16(*(inBytes+header.scriptToLeadByte + 2)); // each entry = uint16
|
||||
ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
|
||||
4 + (indexCount * 4) + (dataCount * 2),
|
||||
outBytes+header.scriptToLeadByte, pErrorCode);
|
||||
}
|
||||
|
||||
/* swap the lead byte to scripts */
|
||||
if(header.leadByteToScript!=0) {
|
||||
int indexCount = ds->readUInt16(*(inBytes+header.leadByteToScript)); // each entry = 2 * uint16
|
||||
int dataCount = ds->readUInt16(*(inBytes+header.leadByteToScript + 2)); // each entry = uint16
|
||||
ds->swapArray16(ds, inBytes+header.leadByteToScript,
|
||||
4 + (indexCount * 2) + (dataCount * 2),
|
||||
outBytes+header.leadByteToScript, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
return header.size;
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -833,6 +833,20 @@ Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
|
|||
return Locale::createFromName(loc);
|
||||
}
|
||||
|
||||
int32_t Collator::getScriptOrder(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
}
|
||||
|
||||
void Collator::setScriptOrder(const int32_t *scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
UErrorCode& status)
|
||||
{
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
}
|
||||
|
||||
// UCollator private data members ----------------------------------------
|
||||
|
||||
/* This is useless information */
|
||||
|
|
|
@ -587,6 +587,21 @@ void RuleBasedCollator::setStrength(ECollationStrength newStrength)
|
|||
ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
|
||||
}
|
||||
|
||||
int32_t RuleBasedCollator::getScriptOrder(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
return ucol_getScriptOrder(ucollator, dest, destCapacity, &status);
|
||||
}
|
||||
|
||||
void RuleBasedCollator::setScriptOrder(const int32_t *scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
UErrorCode& status)
|
||||
{
|
||||
ucol_setScriptOrder(ucollator, scriptOrder, scriptOrderLength);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a hash code for this collation. Just hash the main rule table -- that
|
||||
* should be good enough for almost any use.
|
||||
|
|
|
@ -673,6 +673,12 @@ ucol_close(UCollator *coll)
|
|||
if(coll->image != NULL && coll->freeImageOnClose) {
|
||||
uprv_free((UCATableHeader *)coll->image);
|
||||
}
|
||||
if(coll->scriptReorderTable != NULL) {
|
||||
uprv_free(coll->scriptReorderTable);
|
||||
}
|
||||
if(coll->scriptOrder != NULL){
|
||||
uprv_free(coll->scriptOrder);
|
||||
}
|
||||
|
||||
/* Here, it would be advisable to close: */
|
||||
/* - UData for UCA (unless we stuff it in the root resb */
|
||||
|
@ -765,6 +771,8 @@ void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCo
|
|||
result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
|
||||
result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
|
||||
result->numericCollation = (UColAttributeValue)opts->numericCollation;
|
||||
result->scriptOrder = opts->scriptOrder;
|
||||
result->scriptOrderLength = opts->scriptOrderLength;
|
||||
|
||||
result->caseFirstisDefault = TRUE;
|
||||
result->caseLevelisDefault = TRUE;
|
||||
|
@ -858,7 +866,6 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
|
|||
result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs);
|
||||
result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex);
|
||||
result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion);
|
||||
|
||||
result->rules = NULL;
|
||||
result->rulesLength = 0;
|
||||
result->freeRulesOnClose = FALSE;
|
||||
|
@ -914,6 +921,7 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
|
|||
result->requestedLocale = NULL;
|
||||
result->hasRealData = FALSE; // real data lives in .dat file...
|
||||
result->freeImageOnClose = FALSE;
|
||||
result->scriptReorderTable = NULL;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -4346,6 +4354,9 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
|
|||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
if (coll->scriptReorderTable != NULL && notIsContinuation) {
|
||||
primary1 = coll->scriptReorderTable[primary1];
|
||||
}
|
||||
|
||||
if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|
||||
|| (!notIsContinuation && wasShifted)))
|
||||
|
@ -4666,7 +4677,6 @@ ucol_calcSortKey(const UCollator *coll,
|
|||
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
|
||||
//UBool qShifted = shifted && (compareQuad == 0);
|
||||
UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
|
||||
/*const uint8_t *scriptOrder = coll->scriptOrder;*/
|
||||
|
||||
uint32_t variableTopValue = coll->variableTopValue;
|
||||
// TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
|
||||
|
@ -4777,9 +4787,9 @@ ucol_calcSortKey(const UCollator *coll,
|
|||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
/*if(notIsContinuation && scriptOrder != NULL) {
|
||||
primary1 = scriptOrder[primary1];
|
||||
}*/
|
||||
if(notIsContinuation && coll->scriptReorderTable != NULL) {
|
||||
primary1 = coll->scriptReorderTable[primary1];
|
||||
}
|
||||
|
||||
if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|
||||
|| (!notIsContinuation && wasShifted)))
|
||||
|
@ -5366,10 +5376,15 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
|||
} else {
|
||||
tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
|
||||
}
|
||||
|
||||
secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
if(coll->scriptReorderTable != NULL && notIsContinuation){
|
||||
primary1 = coll->scriptReorderTable[primary1];
|
||||
}
|
||||
|
||||
/* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
|
||||
/* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */
|
||||
/* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */
|
||||
|
@ -5957,6 +5972,11 @@ ucol_nextSortKeyPart(const UCollator *coll,
|
|||
level = UCOL_PSK_SECONDARY;
|
||||
break;
|
||||
}
|
||||
if(!isContinuation(CE)){
|
||||
if(coll->scriptReorderTable != NULL){
|
||||
CE = (coll->scriptReorderTable[CE>>24] << 24) | (CE & 0x00FFFFFF);
|
||||
}
|
||||
}
|
||||
if(!isShiftedCE(CE, LVT, &wasShifted)) {
|
||||
CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */
|
||||
if(CE != 0) {
|
||||
|
@ -7091,6 +7111,44 @@ ucol_getStrength(const UCollator *coll)
|
|||
return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
|
||||
}
|
||||
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucol_getScriptOrder(const UCollator *coll,
|
||||
int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode *pErrorCode){
|
||||
int i;
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
}
|
||||
if(coll->scriptOrder == NULL){
|
||||
return 0;
|
||||
}
|
||||
if(coll->scriptOrderLength > destCapacity){
|
||||
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
for(i = 0; (i < coll->scriptOrderLength) && (i < destCapacity); i++){
|
||||
dest[i] = coll->scriptOrder[i];
|
||||
}
|
||||
return coll->scriptOrderLength;
|
||||
}
|
||||
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setScriptOrder(UCollator *coll,
|
||||
const int32_t *scriptOrder,
|
||||
const int32_t scriptOrderLength){
|
||||
int i;
|
||||
if (coll->scriptOrder != NULL) {
|
||||
uprv_free(coll->scriptOrder);
|
||||
}
|
||||
coll->scriptOrder = (int32_t*) uprv_malloc(scriptOrderLength*sizeof(int32_t));
|
||||
for (i = 0; i < scriptOrderLength; i++) {
|
||||
coll->scriptOrder[i] = scriptOrder[i];
|
||||
}
|
||||
coll->scriptOrderLength = scriptOrderLength;
|
||||
ucol_buildScriptReorderTable(coll);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Following are misc functions */
|
||||
/* there are new APIs and some compatibility APIs */
|
||||
|
@ -7425,6 +7483,11 @@ ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
|
|||
tOrder &= UCOL_PRIMARYMASK;
|
||||
} while(tOrder == 0);
|
||||
|
||||
if(coll->scriptReorderTable != NULL){
|
||||
sOrder = (coll->scriptReorderTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
|
||||
tOrder = (coll->scriptReorderTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
|
||||
}
|
||||
|
||||
// if both primaries are the same
|
||||
if(sOrder == tOrder) {
|
||||
// and there are no more CEs, we advance to the next level
|
||||
|
@ -7476,6 +7539,9 @@ ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
|
|||
}
|
||||
}
|
||||
} else { /* regular */
|
||||
if(coll->scriptReorderTable != NULL){
|
||||
sOrder = (coll->scriptReorderTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
|
||||
}
|
||||
if((sOrder & UCOL_PRIMARYMASK) > LVT) {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
break;
|
||||
|
@ -7523,6 +7589,9 @@ ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
|
|||
}
|
||||
}
|
||||
} else { /* regular */
|
||||
if(coll->scriptReorderTable != NULL){
|
||||
tOrder = (coll->scriptReorderTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
|
||||
}
|
||||
if((tOrder & UCOL_PRIMARYMASK) > LVT) {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
break;
|
||||
|
@ -8014,6 +8083,10 @@ ucol_strcollUseLatin1( const UCollator *coll,
|
|||
}
|
||||
}
|
||||
}
|
||||
if(coll->scriptReorderTable != NULL){
|
||||
sOrder = (coll->scriptReorderTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
|
||||
tOrder = (coll->scriptReorderTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
|
||||
}
|
||||
if(endOfSource) { // source is finished, but target is not, say the result.
|
||||
return UCOL_LESS;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "unicode/udata.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "ucol_bld.h"
|
||||
#include "ucol_elm.h"
|
||||
|
@ -1375,4 +1376,34 @@ ucol_initInverseUCA(UErrorCode *status)
|
|||
return _staticInvUCA;
|
||||
}
|
||||
|
||||
/* This is the data that is used for non-script reordering codes.
|
||||
*/
|
||||
const char* ReorderingTokenNames[] = {
|
||||
"SPACE",
|
||||
"PUNCT",
|
||||
"SYMBOL",
|
||||
"CURRENCY",
|
||||
"DIGIT",
|
||||
NULL
|
||||
};
|
||||
|
||||
void toUpper(const char* src, char* dst, uint32_t length) {
|
||||
for (uint32_t i = 0; *src != '\0' && i < length - 1; ++src, ++dst, ++i) {
|
||||
*dst = toupper(*src);
|
||||
}
|
||||
*dst = '\0';
|
||||
}
|
||||
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucol_findReorderingEntry(const char* name) {
|
||||
char buffer[32];
|
||||
toUpper(name, buffer, 32);
|
||||
for (uint32_t entry = 0; ReorderingTokenNames[entry] != NULL; entry++) {
|
||||
if (strcmp(buffer, ReorderingTokenNames[entry]) == 0) {
|
||||
return entry + UCOL_REORDERCODE_FIRST;
|
||||
}
|
||||
}
|
||||
return USCRIPT_INVALID_CODE;
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -51,6 +51,8 @@ typedef struct {
|
|||
U_CFUNC uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t contCE,
|
||||
uint32_t prevCE, uint32_t prevContCE);
|
||||
|
||||
U_INTERNAL int32_t U_EXPORT2 ucol_findReorderingEntry(const char* name);
|
||||
|
||||
/*#endif*/ /* #if !UCONFIG_NO_COLLATION_BUILDER */
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
||||
|
|
|
@ -769,6 +769,8 @@ typedef struct {
|
|||
/*UColAttributeValue*/ int32_t strength; /* attribute for strength */
|
||||
/*UColAttributeValue*/ int32_t hiraganaQ; /* attribute for special Hiragana */
|
||||
/*UColAttributeValue*/ int32_t numericCollation; /* attribute for numeric collation */
|
||||
/* reorder code */ int32_t* scriptOrder;
|
||||
int32_t scriptOrderLength;
|
||||
uint32_t reserved[15]; /* for future use */
|
||||
} UColOptionSet;
|
||||
|
||||
|
@ -846,7 +848,9 @@ typedef struct {
|
|||
UVersionInfo UCAVersion; /* version of the UCA, read from file */
|
||||
UVersionInfo UCDVersion; /* UCD version, obtained by u_getUnicodeVersion */
|
||||
UVersionInfo formatVersion; /* format version from the UDataInfo header */
|
||||
uint8_t reserved[84]; /* for future use */
|
||||
uint32_t scriptToLeadByte; /* offset to script to lead collation byte mapping data */
|
||||
uint32_t leadByteToScript; /* offset to lead collation byte to script mapping data */
|
||||
uint8_t reserved[76]; /* for future use */
|
||||
} UCATableHeader;
|
||||
|
||||
#define U_UNKNOWN_STATE 0
|
||||
|
@ -957,7 +961,6 @@ struct UCollator {
|
|||
const uint32_t *expansion;
|
||||
const UChar *contractionIndex;
|
||||
const uint32_t *contractionCEs;
|
||||
/*const uint8_t *scriptOrder;*/
|
||||
|
||||
const uint32_t *endExpansionCE; /* array of last ces in an expansion ce.
|
||||
corresponds to expansionCESize */
|
||||
|
@ -1015,6 +1018,9 @@ struct UCollator {
|
|||
uint8_t tertiaryBottomCount;
|
||||
|
||||
UVersionInfo dataVersion; /* Data info of UCA table */
|
||||
int32_t* scriptOrder;
|
||||
int32_t scriptOrderLength;
|
||||
uint8_t* scriptReorderTable;
|
||||
};
|
||||
|
||||
U_CDECL_END
|
||||
|
@ -1067,6 +1073,8 @@ uprv_uca_getCodePointFromRaw(UChar32 i);
|
|||
|
||||
|
||||
|
||||
U_CAPI void ucol_buildScriptReorderTable(UCollator *coll);
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
/*
|
||||
* Test whether a character is potentially "unsafe" for use as a collation
|
||||
|
|
|
@ -162,6 +162,163 @@ tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
|
|||
return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
|
||||
}
|
||||
|
||||
int ucol_getLeadBytesForReorderCode(UCollator *coll, int reorderCode, uint16_t* returnLeadBytes, int returnLength) {
|
||||
uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte));
|
||||
uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte + 2 *sizeof(uint16_t));
|
||||
|
||||
// TODO - replace with a binary search
|
||||
// reorder code index is 2 uint16_t's - reorder code + offset
|
||||
for (int i = 0; i < reorderCodeIndexLength; i++) {
|
||||
if (reorderCode == reorderCodeIndex[i*2]) {
|
||||
uint16_t dataOffset = reorderCodeIndex[(i*2) + 1];
|
||||
if ((dataOffset & 0x8000) == 0x8000) {
|
||||
// offset isn't offset but instead is a single data element
|
||||
if (returnLength >= 1) {
|
||||
returnLeadBytes[0] = dataOffset & ~0x8000;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t)));
|
||||
uint16_t leadByteCount = *(dataOffsetBase + dataOffset);
|
||||
leadByteCount = leadByteCount > returnLength ? returnLength : leadByteCount;
|
||||
uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t));
|
||||
return leadByteCount;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* returnReorderCodes, int returnLength) {
|
||||
int leadByteIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript));
|
||||
uint16_t* leadByteIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript + 2 *sizeof(uint16_t));
|
||||
if (leadByte >= leadByteIndexLength) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((leadByteIndex[leadByte] & 0x8000) == 0x8000) {
|
||||
// offset isn't offset but instead is a single data element
|
||||
if (returnLength >= 1) {
|
||||
returnReorderCodes[0] = leadByteIndex[leadByte] & ~0x8000;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)leadByteIndex + leadByteIndexLength * (2 * sizeof(uint16_t)));
|
||||
uint16_t reorderCodeCount = *(dataOffsetBase + leadByteIndex[leadByte]);
|
||||
reorderCodeCount = reorderCodeCount > returnLength ? returnLength : reorderCodeCount;
|
||||
uprv_memcpy(returnReorderCodes, dataOffsetBase + leadByteIndex[leadByte] + 1, reorderCodeCount * sizeof(uint16_t));
|
||||
return reorderCodeCount;
|
||||
}
|
||||
|
||||
void ucol_buildScriptReorderTable(UCollator *coll) {
|
||||
int32_t *next;
|
||||
uint16_t leadBytesSize = 256;
|
||||
uint16_t leadBytes[256];
|
||||
uint16_t reorderCodesSize = 256;
|
||||
int16_t reorderCodes[256];
|
||||
|
||||
// The lowest byte that hasn't been assigned a mapping
|
||||
int toBottom = 0x03;
|
||||
// The highest byte that hasn't been assigned a mapping - don't include the special or trailing
|
||||
int toTop = 0xe4;
|
||||
|
||||
// are we filling from the bottom?
|
||||
bool fromTheBottom = true;
|
||||
|
||||
// lead bytes that have alread been assigned to the permutation table
|
||||
bool leadByteUsed[256];
|
||||
// permutation table slots that have already been filled
|
||||
bool permutationSlotFilled[256];
|
||||
|
||||
// nothing to do
|
||||
if (coll->scriptOrderLength == 0) {
|
||||
if (coll->scriptReorderTable != NULL) {
|
||||
uprv_free(coll->scriptReorderTable);
|
||||
coll->scriptReorderTable = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (coll->scriptReorderTable == NULL) {
|
||||
coll->scriptReorderTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
|
||||
}
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (i < toBottom || i > toTop) {
|
||||
permutationSlotFilled[i] = true;
|
||||
leadByteUsed[i] = true;
|
||||
coll->scriptReorderTable[i] = i;
|
||||
} else {
|
||||
permutationSlotFilled[i] = false;
|
||||
leadByteUsed[i] = false;
|
||||
coll->scriptReorderTable[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start from the front of the list and place each script we encounter at the
|
||||
* earliest possible locatation in the permutation table. If we encounter
|
||||
* UNKNOWN, start processing from the back, and place each script in the last
|
||||
* possible location. At each step, we also need to make sure that any scripts
|
||||
* that need to not be moved are copied to their same location in the final table.
|
||||
*/
|
||||
next = coll->scriptOrder;
|
||||
while (next < coll->scriptOrder + coll->scriptOrderLength) {
|
||||
if (*next == UCOL_REORDERCODE_IGNORE) {
|
||||
next++;
|
||||
continue;
|
||||
}
|
||||
if (*next == USCRIPT_UNKNOWN) {
|
||||
if (fromTheBottom == false) {
|
||||
//TODO - error condition - bad script order
|
||||
}
|
||||
fromTheBottom = false;
|
||||
next++;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, *next, leadBytes, leadBytesSize);
|
||||
if (fromTheBottom) {
|
||||
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (leadByteUsed[leadBytes[leadByteIndex]]) {
|
||||
// TODO - or should this be an error condition?
|
||||
continue;
|
||||
}
|
||||
|
||||
coll->scriptReorderTable[leadBytes[leadByteIndex]] = toBottom;
|
||||
leadByteUsed[toBottom] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toBottom++;
|
||||
}
|
||||
} else {
|
||||
for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (leadByteUsed[leadBytes[leadByteIndex]]) {
|
||||
// TODO - or should this be an error condition?
|
||||
continue;
|
||||
}
|
||||
|
||||
coll->scriptReorderTable[leadBytes[leadByteIndex]] = toTop;
|
||||
leadByteUsed[toTop] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toTop--;
|
||||
}
|
||||
}
|
||||
next++;
|
||||
}
|
||||
|
||||
/* Copy everything that's left over */
|
||||
int reorderCode = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (!permutationSlotFilled[i]) {
|
||||
while (reorderCode < 256 && leadByteUsed[reorderCode++]) {
|
||||
;
|
||||
}
|
||||
coll->scriptReorderTable[i] = reorderCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// API in ucol_imp.h
|
||||
|
||||
|
@ -468,6 +625,7 @@ ucol_openRules( const UChar *rules,
|
|||
result->requestedLocale = NULL;
|
||||
ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
|
||||
ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
|
||||
ucol_buildScriptReorderTable(result);
|
||||
} else {
|
||||
cleanup:
|
||||
if(result != NULL) {
|
||||
|
@ -553,6 +711,14 @@ ucol_equals(const UCollator *source, const UCollator *target) {
|
|||
return FALSE;
|
||||
}
|
||||
}
|
||||
if(source->scriptOrderLength != target->scriptOrderLength){
|
||||
return FALSE;
|
||||
}
|
||||
for(int i = 0; i < source->scriptOrderLength; i++){
|
||||
if(source->scriptOrder[i] != target->scriptOrder[i]){
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t sourceRulesLen = 0, targetRulesLen = 0;
|
||||
const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
|
||||
|
|
|
@ -227,7 +227,8 @@ void ucol_uprv_tok_setOptionInImage(UColOptionSet *opts, UColAttribute attrib, U
|
|||
}
|
||||
}
|
||||
|
||||
#define UTOK_OPTION_COUNT 20
|
||||
|
||||
#define UTOK_OPTION_COUNT 22
|
||||
|
||||
static UBool didInit = FALSE;
|
||||
/* we can be strict, or we can be lenient */
|
||||
|
@ -275,6 +276,8 @@ U_STRING_DECL(option_16, "last", 4);
|
|||
U_STRING_DECL(option_17, "optimize", 8);
|
||||
U_STRING_DECL(option_18, "suppressContractions", 20);
|
||||
U_STRING_DECL(option_19, "numericOrdering", 15);
|
||||
U_STRING_DECL(option_20, "import", 6);
|
||||
U_STRING_DECL(option_21, "scriptReorder", 13);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -351,7 +354,9 @@ enum OptionNumber {
|
|||
OPTION_UNDEFINED,
|
||||
OPTION_SCRIPT_ORDER,
|
||||
OPTION_CHARSET_NAME,
|
||||
OPTION_CHARSET
|
||||
OPTION_CHARSET,
|
||||
OPTION_IMPORT,
|
||||
OPTION_SCRIPTREORDER
|
||||
} ;
|
||||
|
||||
static const ucolTokOption rulesOptions[UTOK_OPTION_COUNT] = {
|
||||
|
@ -374,7 +379,9 @@ static const ucolTokOption rulesOptions[UTOK_OPTION_COUNT] = {
|
|||
/*16*/ {option_00, 9, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"undefined" */
|
||||
/*17*/ {option_09, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"scriptOrder" */
|
||||
/*18*/ {option_10, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charsetname" */
|
||||
/*19*/ {option_11, 7, NULL, 0, UCOL_ATTRIBUTE_COUNT} /*"charset" */
|
||||
/*19*/ {option_11, 7, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charset" */
|
||||
/*20*/ {option_20, 6, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"import" */
|
||||
/*21*/ {option_21, 13, NULL, 0, UCOL_ATTRIBUTE_COUNT} /*"scriptReorder" */
|
||||
};
|
||||
|
||||
static
|
||||
|
@ -442,6 +449,7 @@ void ucol_uprv_tok_initData() {
|
|||
U_STRING_INIT(option_17, "optimize", 8);
|
||||
U_STRING_INIT(option_18, "suppressContractions", 20);
|
||||
U_STRING_INIT(option_19, "numericOrdering", 15);
|
||||
U_STRING_INIT(option_21, "scriptReorder ", 13);
|
||||
didInit = TRUE;
|
||||
}
|
||||
}
|
||||
|
@ -570,6 +578,85 @@ int32_t ucol_uprv_tok_readOption(const UChar *start, const UChar *end, const UCh
|
|||
}
|
||||
|
||||
|
||||
static
|
||||
void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status){
|
||||
int32_t codeCount = 0;
|
||||
int32_t codeIndex = 0;
|
||||
char conversion[64];
|
||||
int32_t tokenLength = 0;
|
||||
const UChar* space;
|
||||
|
||||
const UChar* current = src->current;
|
||||
const UChar* end = u_memchr(src->current, 0x005d, src->end - src->current);
|
||||
|
||||
// eat leading whitespace
|
||||
while(current < end && u_isWhitespace(*current)) {
|
||||
current++;
|
||||
}
|
||||
|
||||
while(current < end) {
|
||||
space = u_memchr(current, 0x0020, end - current);
|
||||
space = space == 0 ? end : space;
|
||||
tokenLength = space - current;
|
||||
if (tokenLength < 4) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
codeCount++;
|
||||
current += tokenLength;
|
||||
while(current < end && u_isWhitespace(*current)) { /* eat whitespace */
|
||||
++current;
|
||||
}
|
||||
}
|
||||
|
||||
if (codeCount == 0) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
||||
int32_t nonScriptReorderCodes = UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST;
|
||||
codeCount += nonScriptReorderCodes; // to account for the non-script codes
|
||||
src->opts->scriptOrderLength = codeCount;
|
||||
src->opts->scriptOrder = (int32_t*)uprv_malloc(codeCount * sizeof(int32_t));
|
||||
current = src->current;
|
||||
|
||||
for (codeIndex = 0; codeIndex < nonScriptReorderCodes; codeIndex++) {
|
||||
src->opts->scriptOrder[codeIndex] = UCOL_REORDERCODE_FIRST + codeIndex;
|
||||
}
|
||||
|
||||
// eat leading whitespace
|
||||
while(current < end && u_isWhitespace(*current)) {
|
||||
current++;
|
||||
}
|
||||
|
||||
while(current < end) {
|
||||
space = u_memchr(current, 0x0020, end - current);
|
||||
space = space == 0 ? end : space;
|
||||
tokenLength = space - current;
|
||||
if (tokenLength < 4) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
} else {
|
||||
u_UCharsToChars(current, conversion, tokenLength);
|
||||
conversion[tokenLength] = '\0';
|
||||
src->opts->scriptOrder[codeIndex] = ucol_findReorderingEntry(conversion);
|
||||
if (src->opts->scriptOrder[codeIndex] != USCRIPT_INVALID_CODE) {
|
||||
// non-script reorder code used in rule so remove it from the leading slot
|
||||
src->opts->scriptOrder[src->opts->scriptOrder[codeIndex] - UCOL_REORDERCODE_FIRST] = UCOL_REORDERCODE_IGNORE;
|
||||
} else {
|
||||
src->opts->scriptOrder[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
|
||||
}
|
||||
if (src->opts->scriptOrder[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
}
|
||||
codeIndex++;
|
||||
current += tokenLength;
|
||||
while(current < end && u_isWhitespace(*current)) { /* eat whitespace */
|
||||
++current;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reads and conforms to various options in rules
|
||||
// end is the position of the first closing ']'
|
||||
// However, some of the options take an UnicodeSet definition
|
||||
|
@ -668,6 +755,9 @@ uint8_t ucol_uprv_tok_readAndSetOption(UColTokenParser *src, UErrorCode *status)
|
|||
}
|
||||
result = UCOL_TOK_SUCCESS;
|
||||
break;
|
||||
case OPTION_SCRIPTREORDER:
|
||||
ucol_tok_parseScriptReorder(src, status);
|
||||
break;
|
||||
default:
|
||||
*status = U_UNSUPPORTED_ERROR;
|
||||
break;
|
||||
|
|
|
@ -597,6 +597,30 @@ public:
|
|||
*/
|
||||
virtual void setStrength(ECollationStrength newStrength) = 0;
|
||||
|
||||
/**
|
||||
* Get the current reordering of scripts (if one has been set).
|
||||
* @param dest The array to fill with the script ordering.
|
||||
* @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
|
||||
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
|
||||
* @return The length of the array of the script ordering.
|
||||
* @see ucol_setScriptOrder
|
||||
* @internal
|
||||
*/
|
||||
virtual int32_t getScriptOrder(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Set the ordering of scripts for this collator.
|
||||
* @param scriptOrder An array of script codes in the new order.
|
||||
* @param scriptOrderLength The length of scriptOrder.
|
||||
* @see ucol_getStrength
|
||||
* @internal
|
||||
*/
|
||||
virtual void setScriptOrder(const int32_t* scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
UErrorCode& status) ;
|
||||
|
||||
/**
|
||||
* Get name of the object for the desired Locale, in the desired langauge
|
||||
* @param objectLocale must be from getAvailableLocales
|
||||
|
|
|
@ -666,6 +666,31 @@ public:
|
|||
*/
|
||||
virtual void setStrength(ECollationStrength newStrength);
|
||||
|
||||
/**
|
||||
* Get the current reordering of scripts (if one has been set).
|
||||
* @param dest The array to fill with the script ordering.
|
||||
* @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
|
||||
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
|
||||
* @return The length of the array of the script ordering.
|
||||
* @see ucol_setScriptOrder
|
||||
* @internal
|
||||
*/
|
||||
virtual int32_t getScriptOrder(int32_t* dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Set the ordering of scripts for this collator.
|
||||
* @param scriptOrder An array of script codes in the new order.
|
||||
* @param scriptOrderLength The length of scriptOrder.
|
||||
* @see ucol_getStrength
|
||||
* @internal
|
||||
*/
|
||||
virtual void setScriptOrder(const int32_t* scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
UErrorCode& status);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
// private static constants -----------------------------------------------
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "unicode/localpointer.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/uset.h"
|
||||
|
||||
/**
|
||||
|
@ -132,6 +133,21 @@ typedef enum {
|
|||
|
||||
} UColAttributeValue;
|
||||
|
||||
/** Enum containing the codes for reordering segments of the collation table that are not script
|
||||
* codes. These reordering codes are to be used in conjunction with the script codes.
|
||||
* @internal
|
||||
*/
|
||||
typedef enum {
|
||||
UCOL_REORDERCODE_FIRST = 0x1000,
|
||||
UCOL_REORDERCODE_SPACE = 0x1000,
|
||||
UCOL_REORDERCODE_PUNCTUATION = 0x1001,
|
||||
UCOL_REORDERCODE_SYMBOL = 0x1002,
|
||||
UCOL_REORDERCODE_CURRENCY = 0x1003,
|
||||
UCOL_REORDERCODE_DIGIT = 0x1004,
|
||||
UCOL_REORDERCODE_LIMIT = 0x1005,
|
||||
UCOL_REORDERCODE_IGNORE = 0x7FFF
|
||||
} UColReorderCode;
|
||||
|
||||
/**
|
||||
* Base letter represents a primary difference. Set comparison
|
||||
* level to UCOL_PRIMARY to ignore secondary and tertiary differences.
|
||||
|
@ -521,6 +537,35 @@ U_STABLE void U_EXPORT2
|
|||
ucol_setStrength(UCollator *coll,
|
||||
UCollationStrength strength);
|
||||
|
||||
/**
|
||||
* Get the current reordering of scripts (if one has been set).
|
||||
* @param coll The UCollator to query.
|
||||
* @param dest The array to fill with the script ordering.
|
||||
* @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
|
||||
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
|
||||
* @return The length of the array of the script ordering.
|
||||
* @see ucol_setScriptOrder
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucol_getScriptOrder(const UCollator* coll,
|
||||
int32_t* dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Set the ordering of scripts for this collator.
|
||||
* @param coll The UCollator to set.
|
||||
* @param scriptOrder An array of script codes in the new order.
|
||||
* @param scriptOrderLength The length of scriptOrder.
|
||||
* @see ucol_getStrength
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setScriptOrder(UCollator* coll,
|
||||
const int32_t* scriptOrder,
|
||||
const int32_t scriptOrderLength);
|
||||
|
||||
/**
|
||||
* Get the display name for a UCollator.
|
||||
* The display name is suitable for presentation to a user.
|
||||
|
@ -1180,4 +1225,3 @@ ucol_openBinary(const uint8_t *bin, int32_t length,
|
|||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -283,9 +283,14 @@ static void doTestVariant(UCollator* myCollation, const UChar source[], const UC
|
|||
uiter_setString(&tIter, target, tLen);
|
||||
compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
|
||||
if(compareResultIter != result) {
|
||||
log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
||||
log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
||||
}
|
||||
|
||||
|
||||
compareResultIter = ucol_strcoll(myCollation, source, sLen, target, tLen);
|
||||
if(compareResultIter != result) {
|
||||
log_err("different results in strcoll comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
||||
}
|
||||
|
||||
/* convert the strings to UTF-8 and do try comparing with char iterator */
|
||||
if(getTestOption(QUICK_OPTION) <= 0) { /*!QUICK*/
|
||||
char utf8Source[256], utf8Target[256];
|
||||
|
@ -1296,4 +1301,3 @@ static void TestJ5298(void)
|
|||
log_verbose("\n");
|
||||
}
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1996-2009, International Business Machines Corporation and
|
||||
* Copyright (c) 1996-2010, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -16,6 +16,9 @@
|
|||
|
||||
#include "cintltst.h"
|
||||
|
||||
#define NEVER false
|
||||
|
||||
|
||||
void addUtility(TestNode** root);
|
||||
void addBreakIter(TestNode** root);
|
||||
void addStandardNamesTest(TestNode **root);
|
||||
|
@ -42,6 +45,7 @@ void addUSpoofTest(TestNode** root);
|
|||
|
||||
void addAllTests(TestNode** root)
|
||||
{
|
||||
#if NEVER
|
||||
addCnvSelTest(root);
|
||||
addUDataTest(root);
|
||||
addHeapMutexTest(root);
|
||||
|
@ -69,9 +73,11 @@ void addAllTests(TestNode** root)
|
|||
#if !UCONFIG_NO_FORMATTING
|
||||
addFormatTest(root);
|
||||
#endif
|
||||
#endif
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
addCollTest(root);
|
||||
#endif
|
||||
#if NEVER
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
addUTransTest(root);
|
||||
#endif
|
||||
|
@ -79,5 +85,6 @@ void addAllTests(TestNode** root)
|
|||
addUSpoofTest(root);
|
||||
#endif
|
||||
addPUtilTest(root);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -5810,6 +5810,123 @@ static void TestInvalidListsAndRanges(void)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This test ensures that characters placed before a character in a different script have the same lead byte
|
||||
* in their collation key before and after script reordering.
|
||||
*/
|
||||
static void TestBeforeRuleWithScriptReordering(void)
|
||||
{
|
||||
int32_t i;
|
||||
UParseError error;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *myCollation;
|
||||
char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
|
||||
UChar rules[500];
|
||||
uint32_t rulesLength = 0;
|
||||
UScriptCode scriptOrder[1] = {USCRIPT_GREEK};
|
||||
|
||||
log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
|
||||
|
||||
UChar base[] = { 0x03b1 }; /* base */
|
||||
int32_t baseLen = sizeof(base)/sizeof(*base);
|
||||
|
||||
UChar before[] = { 0x0e01 }; /* ko kai */
|
||||
int32_t beforeLen = sizeof(before)/sizeof(*before);
|
||||
|
||||
/*UChar *data[] = { before, base };
|
||||
genericRulesStarter(srules, data, 2);*/
|
||||
|
||||
/* build collator */
|
||||
rulesLength = u_unescape(srules, rules, LEN(rules));
|
||||
myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
/* check collation results - before rule applied but not script reordering */
|
||||
UCollationResult collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
|
||||
if (collResult != UCOL_GREATER) {
|
||||
log_err("Collation result not correct before script reordering = %d\n", collResult);
|
||||
}
|
||||
|
||||
/* check the lead byte of the collation keys before script reordering */
|
||||
uint8_t baseKey[256];
|
||||
uint32_t baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
|
||||
uint8_t beforeKey[256];
|
||||
uint32_t beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
|
||||
if (baseKey[0] != beforeKey[0]) {
|
||||
log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
|
||||
}
|
||||
|
||||
/* reirder the scripts */
|
||||
ucol_setScriptOrder(myCollation, scriptOrder, 1);
|
||||
|
||||
/* check collation results - before rule applied and after script reordering */
|
||||
collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
|
||||
if (collResult != UCOL_GREATER) {
|
||||
log_err("Collation result not correct after script reordering = %d\n", collResult);
|
||||
}
|
||||
|
||||
/* check the lead byte of the collation keys after script reordering */
|
||||
ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
|
||||
ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
|
||||
if (baseKey[0] != beforeKey[0]) {
|
||||
log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
|
||||
}
|
||||
|
||||
ucol_close(myCollation);
|
||||
}
|
||||
|
||||
static void TestGreekFirstReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek]"
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0391}, UCOL_EQUAL },
|
||||
{ {0x0041}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
|
||||
{ {0x0060}, {0x0391}, UCOL_LESS },
|
||||
{ {0x0391}, {0xe2dc}, UCOL_LESS },
|
||||
{ {0x0391}, {0x0060}, UCOL_GREATER },
|
||||
};
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
}
|
||||
|
||||
static void TestGreekLastReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Zzzz Grek]"
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0391}, UCOL_EQUAL },
|
||||
{ {0x0041}, {0x0391}, UCOL_LESS },
|
||||
{ {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
|
||||
{ {0x0060}, {0x0391}, UCOL_LESS },
|
||||
{ {0x0391}, {0xe2dc}, UCOL_GREATER },
|
||||
};
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
}
|
||||
|
||||
static void TestNonScriptReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0041}, UCOL_LESS },
|
||||
{ {0x0041}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x0060}, {0x0041}, UCOL_LESS },
|
||||
{ {0x0060}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x0024}, {0x0041}, UCOL_GREATER },
|
||||
};
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
}
|
||||
|
||||
|
||||
#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
|
||||
|
||||
|
@ -5887,6 +6004,12 @@ void addMiscCollTest(TestNode** root)
|
|||
TEST(TestUCAPrecontext);
|
||||
TEST(TestOutOfBuffer5468);
|
||||
TEST(TestSameStrengthList);
|
||||
|
||||
TEST(TestGreekFirstReorder);
|
||||
TEST(TestGreekLastReorder);
|
||||
TEST(TestBeforeRuleWithScriptReordering);
|
||||
TEST(TestNonScriptReorder);
|
||||
|
||||
TEST(TestSameStrengthListQuoted);
|
||||
TEST(TestSameStrengthListSupplemental);
|
||||
TEST(TestSameStrengthListQwerty);
|
||||
|
@ -5897,7 +6020,7 @@ void addMiscCollTest(TestNode** root)
|
|||
TEST(TestPrivateUseCharacters);
|
||||
TEST(TestPrivateUseCharactersInList);
|
||||
TEST(TestPrivateUseCharactersInRange);
|
||||
TEST(TestInvalidListsAndRanges);
|
||||
TEST(TestInvalidListsAndRanges);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
|
|
@ -1850,6 +1850,7 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *st
|
|||
|
||||
|
||||
bundle_setlocale(bundle, tokenValue->fChars, status);
|
||||
|
||||
/* The following code is to make Empty bundle work no matter with :table specifer or not */
|
||||
token = getToken(NULL, NULL, &line, status);
|
||||
if(token==TOK_COLON) {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2005-2009, International Business Machines
|
||||
* Copyright (C) 2005-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -322,9 +322,14 @@ main(int argc, char *argv[]) {
|
|||
outType=0; /* tells extractItem() to not swap */
|
||||
}
|
||||
|
||||
fprintf(stderr, "inFilename = %s\n", inFilename);
|
||||
fprintf(stderr, "outFilename = %s\n", outFilename);
|
||||
fprintf(stderr, "outType = %c\n", outType);
|
||||
if(options[OPT_WRITEPKG].doesOccur) {
|
||||
isModified=TRUE;
|
||||
}
|
||||
fprintf(stderr, "isModified = %x\n", isModified);
|
||||
fprintf(stderr, "isPackage = %x\n", isPackage);
|
||||
|
||||
if(!isPackage) {
|
||||
/*
|
||||
|
@ -343,6 +348,7 @@ main(int argc, char *argv[]) {
|
|||
return U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
if(isModified) {
|
||||
fprintf(stderr, "@@@@ Calling Package::extractItem\n");
|
||||
pkg->extractItem(destPath, outFilename, 0, outType);
|
||||
}
|
||||
|
||||
|
|
|
@ -1079,6 +1079,7 @@ Package::extractItem(const char *filesPath, const char *outName, int32_t idx, ch
|
|||
uint8_t itemCharset, outCharset;
|
||||
UBool itemIsBigEndian, outIsBigEndian;
|
||||
|
||||
fprintf(stderr, "^^^^ Package::extractItem\n");
|
||||
if(idx<0 || itemCount<=idx) {
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue