mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-3984 update the uca file version, add support for reading/writing reordering from/to resources, update FractionalUCA.txt to a format for the genuca tool
X-SVN-Rev: 28975
This commit is contained in:
parent
6a82cf71dd
commit
5dd9ea7768
14 changed files with 25407 additions and 25382 deletions
|
@ -133,8 +133,8 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,
|
|||
header.magic=ds->readUInt32(inHeader->magic);
|
||||
if(!(
|
||||
header.magic==UCOL_HEADER_MAGIC &&
|
||||
inHeader->formatVersion[0]==2 &&
|
||||
inHeader->formatVersion[1]>=3
|
||||
inHeader->formatVersion[0]==3 &&
|
||||
inHeader->formatVersion[1]>=0
|
||||
)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -194,8 +194,8 @@ ucol_swapBinary(const UDataSwapper *ds,
|
|||
header.magic=ds->readUInt32(inHeader->magic);
|
||||
if(!(
|
||||
header.magic==UCOL_HEADER_MAGIC &&
|
||||
inHeader->formatVersion[0]==2 &&
|
||||
inHeader->formatVersion[1]>=3
|
||||
inHeader->formatVersion[0]==3 &&
|
||||
inHeader->formatVersion[1]>=0
|
||||
)) {
|
||||
udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
|
||||
header.magic,
|
||||
|
@ -350,8 +350,8 @@ ucol_swap(const UDataSwapper *ds,
|
|||
pInfo->dataFormat[1]==0x43 &&
|
||||
pInfo->dataFormat[2]==0x6f &&
|
||||
pInfo->dataFormat[3]==0x6c &&
|
||||
pInfo->formatVersion[0]==2 &&
|
||||
pInfo->formatVersion[1]>=3
|
||||
pInfo->formatVersion[0]==3 &&
|
||||
pInfo->formatVersion[1]>=0
|
||||
)) {
|
||||
udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
|
|
|
@ -132,11 +132,11 @@
|
|||
* version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values
|
||||
* version 7 - ICU 3.4 - with the UCA 4.1 Thai tag is no longer processed, complete ignorables
|
||||
* now break contractions
|
||||
* Backward compatible with the old rules.
|
||||
* This value may change in the subsequent releases of ICU
|
||||
* version 8 - ICU 4.6 - the addition of collation reordering. It should in general be
|
||||
* compatible since the data is at the end of the file. However,
|
||||
* if data parsers make assumptions about lengths this will cause problems.
|
||||
* Backward compatible with the old rules.
|
||||
* This value may change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define UCOL_BUILDER_VERSION 8
|
||||
|
|
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -598,7 +598,7 @@ void RuleBasedCollator::setScriptOrder(const int32_t *scriptOrder,
|
|||
const uint32_t scriptOrderLength,
|
||||
UErrorCode& status)
|
||||
{
|
||||
ucol_setScriptOrder(ucollator, scriptOrder, scriptOrderLength);
|
||||
ucol_setScriptOrder(ucollator, scriptOrder, scriptOrderLength, &status);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -7121,10 +7121,10 @@ ucol_getStrength(const UCollator *coll)
|
|||
U_INTERNAL uint32_t U_EXPORT2
|
||||
ucol_getScriptOrder(const UCollator *coll,
|
||||
int32_t *dest,
|
||||
const uint32_t destCapacity,
|
||||
UErrorCode *pErrorCode){
|
||||
uint32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
if (pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
if (coll->scriptOrder == NULL) {
|
||||
return 0;
|
||||
|
@ -7141,8 +7141,8 @@ ucol_getScriptOrder(const UCollator *coll,
|
|||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setScriptOrder(UCollator *coll,
|
||||
const int32_t *scriptOrder,
|
||||
const uint32_t scriptOrderLength) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint32_t scriptOrderLength,
|
||||
UErrorCode *pErrorCode ){
|
||||
if (coll->scriptOrder != NULL) {
|
||||
uprv_free(coll->scriptOrder);
|
||||
}
|
||||
|
@ -7151,8 +7151,7 @@ ucol_setScriptOrder(UCollator *coll,
|
|||
coll->scriptOrder[i] = scriptOrder[i];
|
||||
}
|
||||
coll->scriptOrderLength = scriptOrderLength;
|
||||
ucol_buildScriptReorderTable(coll, &status);
|
||||
// TODO: something with the status if error condition
|
||||
ucol_buildScriptReorderTable(coll, pErrorCode);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -143,7 +143,10 @@
|
|||
* same formatVersion as in ucadata.icu's UDataInfo header
|
||||
* (formatVersion 2.3)
|
||||
*
|
||||
* uint8_t reserved[84]; - currently unused
|
||||
* uint32_t offset to the reordering code to lead CE byte remapping table
|
||||
* uint32_t offset to the lead CE byte to reordering code mapping table
|
||||
*
|
||||
* uint8_t reserved[76]; - currently unused
|
||||
*
|
||||
* -------------------------------------------------------------
|
||||
*
|
||||
|
@ -178,8 +181,8 @@
|
|||
#define UCA_DATA_FORMAT_2 ((uint8_t)0x6f)
|
||||
#define UCA_DATA_FORMAT_3 ((uint8_t)0x6c)
|
||||
|
||||
#define UCA_FORMAT_VERSION_0 ((uint8_t)2)
|
||||
#define UCA_FORMAT_VERSION_1 ((uint8_t)3)
|
||||
#define UCA_FORMAT_VERSION_0 ((uint8_t)3)
|
||||
#define UCA_FORMAT_VERSION_1 ((uint8_t)0)
|
||||
#define UCA_FORMAT_VERSION_2 ((uint8_t)0)
|
||||
#define UCA_FORMAT_VERSION_3 ((uint8_t)0)
|
||||
|
||||
|
|
|
@ -292,10 +292,6 @@ void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
|
|||
if (internalScriptOrder != NULL) {
|
||||
uprv_free(internalScriptOrder);
|
||||
}
|
||||
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\treturn - next == USCRIPT_UNKNOWN\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
fromTheBottom = false;
|
||||
|
@ -317,9 +313,6 @@ void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
|
|||
if (internalScriptOrder != NULL) {
|
||||
uprv_free(internalScriptOrder);
|
||||
}
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\treturn - fromTheBottom reuse lead byte\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -342,9 +335,6 @@ void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
|
|||
if (internalScriptOrder != NULL) {
|
||||
uprv_free(internalScriptOrder);
|
||||
}
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\treturn - fromTheTop reuse lead byte\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -443,7 +433,8 @@ ucol_open_internal(const char *loc,
|
|||
collations = NULL; // We just reused the collations object as collElem.
|
||||
|
||||
UResourceBundle *binary = NULL;
|
||||
|
||||
UResourceBundle *reorderRes = NULL;
|
||||
|
||||
if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
|
||||
*status = U_USING_DEFAULT_WARNING;
|
||||
result = ucol_initCollator(UCA->image, result, UCA, status);
|
||||
|
@ -502,7 +493,18 @@ ucol_open_internal(const char *loc,
|
|||
result->hasRealData = FALSE;
|
||||
}
|
||||
result->freeImageOnClose = FALSE;
|
||||
|
||||
reorderRes = ures_getByKey(collElem, "%%ReorderCodes", NULL, &intStatus);
|
||||
if (U_SUCCESS(intStatus)) {
|
||||
int32_t reorderCodesLen = 0;
|
||||
const int32_t* reorderCodes = ures_getIntVector(reorderRes, &reorderCodesLen, status);
|
||||
ucol_setScriptOrder(result, reorderCodes, reorderCodesLen, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
goto clean;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else { // !U_SUCCESS(binaryStatus)
|
||||
if(U_SUCCESS(*status)) {
|
||||
*status = intStatus; // propagate underlying error
|
||||
|
@ -546,12 +548,14 @@ ucol_open_internal(const char *loc,
|
|||
ures_close(b);
|
||||
ures_close(collElem);
|
||||
ures_close(binary);
|
||||
ures_close(reorderRes);
|
||||
return result;
|
||||
|
||||
clean:
|
||||
ures_close(b);
|
||||
ures_close(collElem);
|
||||
ures_close(binary);
|
||||
ures_close(reorderRes);
|
||||
ucol_close(result);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -316,7 +316,7 @@ U_STRING_DECL(option_17, "optimize", 8);
|
|||
U_STRING_DECL(option_18, "suppressContractions", 20);
|
||||
U_STRING_DECL(option_19, "numericOrdering", 15);
|
||||
U_STRING_DECL(option_20, "import", 6);
|
||||
U_STRING_DECL(option_21, "scriptReorder", 13);
|
||||
U_STRING_DECL(option_21, "reorder", 7);
|
||||
|
||||
/*
|
||||
[last variable] last variable value
|
||||
|
@ -419,7 +419,7 @@ static const ucolTokOption rulesOptions[UTOK_OPTION_COUNT] = {
|
|||
/*18*/ {option_10, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charsetname" */
|
||||
/*19*/ {option_11, 7, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charset" */
|
||||
/*20*/ {option_20, 6, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"import" */
|
||||
/*21*/ {option_21, 13, NULL, 0, UCOL_ATTRIBUTE_COUNT} /*"scriptReorder" */
|
||||
/*21*/ {option_21, 7, NULL, 0, UCOL_ATTRIBUTE_COUNT} /*"scriptReorder" */
|
||||
};
|
||||
|
||||
static
|
||||
|
@ -488,7 +488,7 @@ void ucol_uprv_tok_initData() {
|
|||
U_STRING_INIT(option_18, "suppressContractions", 20);
|
||||
U_STRING_INIT(option_19, "numericOrdering", 15);
|
||||
U_STRING_INIT(option_20, "import ", 6);
|
||||
U_STRING_INIT(option_21, "scriptReorder ", 13);
|
||||
U_STRING_INIT(option_21, "reorder", 7);
|
||||
didInit = TRUE;
|
||||
}
|
||||
}
|
||||
|
@ -625,7 +625,7 @@ int32_t ucol_uprv_tok_readOption(const UChar *start, const UChar *end, const UCh
|
|||
|
||||
|
||||
static
|
||||
void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status){
|
||||
void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status) {
|
||||
int32_t codeCount = 0;
|
||||
int32_t codeIndex = 0;
|
||||
char conversion[64];
|
||||
|
|
|
@ -550,7 +550,7 @@ ucol_setStrength(UCollator *coll,
|
|||
U_INTERNAL uint32_t U_EXPORT2
|
||||
ucol_getScriptOrder(const UCollator* coll,
|
||||
int32_t* dest,
|
||||
const uint32_t destCapacity,
|
||||
uint32_t destCapacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
|
@ -558,13 +558,15 @@ ucol_getScriptOrder(const UCollator* coll,
|
|||
* @param coll The UCollator to set.
|
||||
* @param scriptOrder An array of script codes in the new order.
|
||||
* @param scriptOrderLength The length of scriptOrder.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
|
||||
* @see ucol_getStrength
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setScriptOrder(UCollator* coll,
|
||||
const int32_t* scriptOrder,
|
||||
const uint32_t scriptOrderLength);
|
||||
uint32_t scriptOrderLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the display name for a UCollator.
|
||||
|
|
|
@ -5845,7 +5845,7 @@ static void TestBeforeRuleWithScriptReordering(void)
|
|||
/*UChar *data[] = { before, base };
|
||||
genericRulesStarter(srules, data, 2);*/
|
||||
|
||||
log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
|
||||
log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
|
||||
|
||||
|
||||
/* build collator */
|
||||
|
@ -5872,7 +5872,11 @@ static void TestBeforeRuleWithScriptReordering(void)
|
|||
}
|
||||
|
||||
/* reorder the scripts */
|
||||
ucol_setScriptOrder(myCollation, scriptOrder, 1);
|
||||
ucol_setScriptOrder(myCollation, scriptOrder, 1, &status);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
/* check collation results - before rule applied and after script reordering */
|
||||
collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
|
||||
|
@ -5911,7 +5915,12 @@ static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32
|
|||
}
|
||||
/*ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
||||
ucol_setStrength(myCollation, UCOL_TERTIARY);*/
|
||||
ucol_setScriptOrder(myCollation, reorderTokens, reorderTokensLen);
|
||||
ucol_setScriptOrder(myCollation, reorderTokens, reorderTokensLen, &status);
|
||||
if(U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
|
||||
doTest(myCollation,
|
||||
testCases[testCaseNum].source,
|
||||
|
@ -5926,7 +5935,7 @@ static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32
|
|||
static void TestGreekFirstReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek]"
|
||||
"[reorder Grek]"
|
||||
};
|
||||
|
||||
const int32_t apiRules[] = {
|
||||
|
@ -5952,7 +5961,7 @@ static void TestGreekFirstReorder(void)
|
|||
static void TestGreekLastReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Zzzz Grek]"
|
||||
"[reorder Zzzz Grek]"
|
||||
};
|
||||
|
||||
const int32_t apiRules[] = {
|
||||
|
@ -5977,7 +5986,7 @@ static void TestGreekLastReorder(void)
|
|||
static void TestNonScriptReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
|
||||
"[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
|
||||
};
|
||||
|
||||
const int32_t apiRules[] = {
|
||||
|
@ -6004,7 +6013,7 @@ static void TestNonScriptReorder(void)
|
|||
static void TestHaniReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Hani]"
|
||||
"[reorder Hani]"
|
||||
};
|
||||
const int32_t apiRules[] = {
|
||||
USCRIPT_HAN
|
||||
|
|
|
@ -899,6 +899,9 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
|
|||
int32_t len = 0;
|
||||
uint8_t *data = NULL;
|
||||
UCollator *coll = NULL;
|
||||
int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST)];
|
||||
uint32_t reorderCodeCount;
|
||||
int32_t reorderCodeIndex;
|
||||
UParseError parseError;
|
||||
|
||||
genrbdata.inputDir = state->inputdir;
|
||||
|
@ -929,6 +932,16 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
|
|||
struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
|
||||
table_add(result, collationBin, line, status);
|
||||
uprv_free(data);
|
||||
|
||||
reorderCodeCount = ucol_getScriptOrder(
|
||||
coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST), &intStatus);
|
||||
if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
|
||||
struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
|
||||
for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
|
||||
intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
|
||||
}
|
||||
table_add(result, reorderCodeRes, line, status);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -322,14 +322,9 @@ main(int argc, char *argv[]) {
|
|||
outType=0; /* tells extractItem() to not swap */
|
||||
}
|
||||
|
||||
fprintf(stderr, "inFilename = %s\n", inFilename);
|
||||
fprintf(stderr, "outFilename = %s\n", outFilename);
|
||||
fprintf(stderr, "outType = %c\n", outType);
|
||||
if(options[OPT_WRITEPKG].doesOccur) {
|
||||
isModified=TRUE;
|
||||
}
|
||||
fprintf(stderr, "isModified = %x\n", isModified);
|
||||
fprintf(stderr, "isPackage = %x\n", isPackage);
|
||||
|
||||
if(!isPackage) {
|
||||
/*
|
||||
|
|
|
@ -1079,7 +1079,6 @@ Package::extractItem(const char *filesPath, const char *outName, int32_t idx, ch
|
|||
uint8_t itemCharset, outCharset;
|
||||
UBool itemIsBigEndian, outIsBigEndian;
|
||||
|
||||
fprintf(stderr, "^^^^ Package::extractItem\n");
|
||||
if(idx<0 || itemCount<=idx) {
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue