ICU-3984 update the uca file version, add support for reading/writing reordering from/to resources, update FractionalUCA.txt to a format for the genuca tool

X-SVN-Rev: 28975
This commit is contained in:
Stuart Gill 2010-11-02 02:21:57 +00:00
parent 6a82cf71dd
commit 5dd9ea7768
14 changed files with 25407 additions and 25382 deletions

View file

@ -133,8 +133,8 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,
header.magic=ds->readUInt32(inHeader->magic);
if(!(
header.magic==UCOL_HEADER_MAGIC &&
inHeader->formatVersion[0]==2 &&
inHeader->formatVersion[1]>=3
inHeader->formatVersion[0]==3 &&
inHeader->formatVersion[1]>=0
)) {
return FALSE;
}
@ -194,8 +194,8 @@ ucol_swapBinary(const UDataSwapper *ds,
header.magic=ds->readUInt32(inHeader->magic);
if(!(
header.magic==UCOL_HEADER_MAGIC &&
inHeader->formatVersion[0]==2 &&
inHeader->formatVersion[1]>=3
inHeader->formatVersion[0]==3 &&
inHeader->formatVersion[1]>=0
)) {
udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
header.magic,
@ -350,8 +350,8 @@ ucol_swap(const UDataSwapper *ds,
pInfo->dataFormat[1]==0x43 &&
pInfo->dataFormat[2]==0x6f &&
pInfo->dataFormat[3]==0x6c &&
pInfo->formatVersion[0]==2 &&
pInfo->formatVersion[1]>=3
pInfo->formatVersion[0]==3 &&
pInfo->formatVersion[1]>=0
)) {
udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],

View file

@ -132,11 +132,11 @@
* version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values
* version 7 - ICU 3.4 - with the UCA 4.1 Thai tag is no longer processed, complete ignorables
* now break contractions
* Backward compatible with the old rules.
* This value may change in the subsequent releases of ICU
* version 8 - ICU 4.6 - the addition of collation reordering. It should in general be
* compatible since the data is at the end of the file. However,
* if data parsers make assumptions about lengths this will cause problems.
* Backward compatible with the old rules.
* This value may change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define UCOL_BUILDER_VERSION 8

File diff suppressed because it is too large Load diff

View file

@ -598,7 +598,7 @@ void RuleBasedCollator::setScriptOrder(const int32_t *scriptOrder,
const uint32_t scriptOrderLength,
UErrorCode& status)
{
ucol_setScriptOrder(ucollator, scriptOrder, scriptOrderLength);
ucol_setScriptOrder(ucollator, scriptOrder, scriptOrderLength, &status);
}

View file

@ -7121,10 +7121,10 @@ ucol_getStrength(const UCollator *coll)
U_INTERNAL uint32_t U_EXPORT2
ucol_getScriptOrder(const UCollator *coll,
int32_t *dest,
const uint32_t destCapacity,
UErrorCode *pErrorCode){
uint32_t destCapacity,
UErrorCode *pErrorCode) {
if (pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return NULL;
return 0;
}
if (coll->scriptOrder == NULL) {
return 0;
@ -7141,8 +7141,8 @@ ucol_getScriptOrder(const UCollator *coll,
U_INTERNAL void U_EXPORT2
ucol_setScriptOrder(UCollator *coll,
const int32_t *scriptOrder,
const uint32_t scriptOrderLength) {
UErrorCode status = U_ZERO_ERROR;
uint32_t scriptOrderLength,
UErrorCode *pErrorCode ){
if (coll->scriptOrder != NULL) {
uprv_free(coll->scriptOrder);
}
@ -7151,8 +7151,7 @@ ucol_setScriptOrder(UCollator *coll,
coll->scriptOrder[i] = scriptOrder[i];
}
coll->scriptOrderLength = scriptOrderLength;
ucol_buildScriptReorderTable(coll, &status);
// TODO: something with the status if error condition
ucol_buildScriptReorderTable(coll, pErrorCode);
}

View file

@ -143,7 +143,10 @@
* same formatVersion as in ucadata.icu's UDataInfo header
* (formatVersion 2.3)
*
* uint8_t reserved[84]; - currently unused
* uint32_t offset to the reordering code to lead CE byte remapping table
* uint32_t offset to the lead CE byte to reordering code mapping table
*
* uint8_t reserved[76]; - currently unused
*
* -------------------------------------------------------------
*
@ -178,8 +181,8 @@
#define UCA_DATA_FORMAT_2 ((uint8_t)0x6f)
#define UCA_DATA_FORMAT_3 ((uint8_t)0x6c)
#define UCA_FORMAT_VERSION_0 ((uint8_t)2)
#define UCA_FORMAT_VERSION_1 ((uint8_t)3)
#define UCA_FORMAT_VERSION_0 ((uint8_t)3)
#define UCA_FORMAT_VERSION_1 ((uint8_t)0)
#define UCA_FORMAT_VERSION_2 ((uint8_t)0)
#define UCA_FORMAT_VERSION_3 ((uint8_t)0)

View file

@ -292,10 +292,6 @@ void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
if (internalScriptOrder != NULL) {
uprv_free(internalScriptOrder);
}
#ifdef REORDER_DEBUG
fprintf(stdout, "\treturn - next == USCRIPT_UNKNOWN\n");
#endif
return;
}
fromTheBottom = false;
@ -317,9 +313,6 @@ void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
if (internalScriptOrder != NULL) {
uprv_free(internalScriptOrder);
}
#ifdef REORDER_DEBUG
fprintf(stdout, "\treturn - fromTheBottom reuse lead byte\n");
#endif
return;
}
@ -342,9 +335,6 @@ void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
if (internalScriptOrder != NULL) {
uprv_free(internalScriptOrder);
}
#ifdef REORDER_DEBUG
fprintf(stdout, "\treturn - fromTheTop reuse lead byte\n");
#endif
return;
}
@ -443,7 +433,8 @@ ucol_open_internal(const char *loc,
collations = NULL; // We just reused the collations object as collElem.
UResourceBundle *binary = NULL;
UResourceBundle *reorderRes = NULL;
if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
*status = U_USING_DEFAULT_WARNING;
result = ucol_initCollator(UCA->image, result, UCA, status);
@ -502,7 +493,18 @@ ucol_open_internal(const char *loc,
result->hasRealData = FALSE;
}
result->freeImageOnClose = FALSE;
reorderRes = ures_getByKey(collElem, "%%ReorderCodes", NULL, &intStatus);
if (U_SUCCESS(intStatus)) {
int32_t reorderCodesLen = 0;
const int32_t* reorderCodes = ures_getIntVector(reorderRes, &reorderCodesLen, status);
ucol_setScriptOrder(result, reorderCodes, reorderCodesLen, status);
if (U_FAILURE(*status)) {
goto clean;
}
}
}
} else { // !U_SUCCESS(binaryStatus)
if(U_SUCCESS(*status)) {
*status = intStatus; // propagate underlying error
@ -546,12 +548,14 @@ ucol_open_internal(const char *loc,
ures_close(b);
ures_close(collElem);
ures_close(binary);
ures_close(reorderRes);
return result;
clean:
ures_close(b);
ures_close(collElem);
ures_close(binary);
ures_close(reorderRes);
ucol_close(result);
return NULL;
}

View file

@ -316,7 +316,7 @@ U_STRING_DECL(option_17, "optimize", 8);
U_STRING_DECL(option_18, "suppressContractions", 20);
U_STRING_DECL(option_19, "numericOrdering", 15);
U_STRING_DECL(option_20, "import", 6);
U_STRING_DECL(option_21, "scriptReorder", 13);
U_STRING_DECL(option_21, "reorder", 7);
/*
[last variable] last variable value
@ -419,7 +419,7 @@ static const ucolTokOption rulesOptions[UTOK_OPTION_COUNT] = {
/*18*/ {option_10, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charsetname" */
/*19*/ {option_11, 7, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charset" */
/*20*/ {option_20, 6, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"import" */
/*21*/ {option_21, 13, NULL, 0, UCOL_ATTRIBUTE_COUNT} /*"scriptReorder" */
/*21*/ {option_21, 7, NULL, 0, UCOL_ATTRIBUTE_COUNT} /*"scriptReorder" */
};
static
@ -488,7 +488,7 @@ void ucol_uprv_tok_initData() {
U_STRING_INIT(option_18, "suppressContractions", 20);
U_STRING_INIT(option_19, "numericOrdering", 15);
U_STRING_INIT(option_20, "import ", 6);
U_STRING_INIT(option_21, "scriptReorder ", 13);
U_STRING_INIT(option_21, "reorder", 7);
didInit = TRUE;
}
}
@ -625,7 +625,7 @@ int32_t ucol_uprv_tok_readOption(const UChar *start, const UChar *end, const UCh
static
void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status){
void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status) {
int32_t codeCount = 0;
int32_t codeIndex = 0;
char conversion[64];

View file

@ -550,7 +550,7 @@ ucol_setStrength(UCollator *coll,
U_INTERNAL uint32_t U_EXPORT2
ucol_getScriptOrder(const UCollator* coll,
int32_t* dest,
const uint32_t destCapacity,
uint32_t destCapacity,
UErrorCode *pErrorCode);
/**
@ -558,13 +558,15 @@ ucol_getScriptOrder(const UCollator* coll,
* @param coll The UCollator to set.
* @param scriptOrder An array of script codes in the new order.
* @param scriptOrderLength The length of scriptOrder.
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
* @see ucol_getStrength
* @internal
*/
U_INTERNAL void U_EXPORT2
ucol_setScriptOrder(UCollator* coll,
const int32_t* scriptOrder,
const uint32_t scriptOrderLength);
uint32_t scriptOrderLength,
UErrorCode *pErrorCode);
/**
* Get the display name for a UCollator.

View file

@ -5845,7 +5845,7 @@ static void TestBeforeRuleWithScriptReordering(void)
/*UChar *data[] = { before, base };
genericRulesStarter(srules, data, 2);*/
log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
/* build collator */
@ -5872,7 +5872,11 @@ static void TestBeforeRuleWithScriptReordering(void)
}
/* reorder the scripts */
ucol_setScriptOrder(myCollation, scriptOrder, 1);
ucol_setScriptOrder(myCollation, scriptOrder, 1, &status);
if(U_FAILURE(status)) {
log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
return;
}
/* check collation results - before rule applied and after script reordering */
collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
@ -5911,7 +5915,12 @@ static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32
}
/*ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
ucol_setStrength(myCollation, UCOL_TERTIARY);*/
ucol_setScriptOrder(myCollation, reorderTokens, reorderTokensLen);
ucol_setScriptOrder(myCollation, reorderTokens, reorderTokensLen, &status);
if(U_FAILURE(status)) {
log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
return;
}
for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
doTest(myCollation,
testCases[testCaseNum].source,
@ -5926,7 +5935,7 @@ static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32
static void TestGreekFirstReorder(void)
{
const char* strRules[] = {
"[scriptReorder Grek]"
"[reorder Grek]"
};
const int32_t apiRules[] = {
@ -5952,7 +5961,7 @@ static void TestGreekFirstReorder(void)
static void TestGreekLastReorder(void)
{
const char* strRules[] = {
"[scriptReorder Zzzz Grek]"
"[reorder Zzzz Grek]"
};
const int32_t apiRules[] = {
@ -5977,7 +5986,7 @@ static void TestGreekLastReorder(void)
static void TestNonScriptReorder(void)
{
const char* strRules[] = {
"[scriptReorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
"[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
};
const int32_t apiRules[] = {
@ -6004,7 +6013,7 @@ static void TestNonScriptReorder(void)
static void TestHaniReorder(void)
{
const char* strRules[] = {
"[scriptReorder Hani]"
"[reorder Hani]"
};
const int32_t apiRules[] = {
USCRIPT_HAN

View file

@ -899,6 +899,9 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
int32_t len = 0;
uint8_t *data = NULL;
UCollator *coll = NULL;
int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST)];
uint32_t reorderCodeCount;
int32_t reorderCodeIndex;
UParseError parseError;
genrbdata.inputDir = state->inputdir;
@ -929,6 +932,16 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
table_add(result, collationBin, line, status);
uprv_free(data);
reorderCodeCount = ucol_getScriptOrder(
coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST), &intStatus);
if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
}
table_add(result, reorderCodeRes, line, status);
}
}
else
{

View file

@ -322,14 +322,9 @@ main(int argc, char *argv[]) {
outType=0; /* tells extractItem() to not swap */
}
fprintf(stderr, "inFilename = %s\n", inFilename);
fprintf(stderr, "outFilename = %s\n", outFilename);
fprintf(stderr, "outType = %c\n", outType);
if(options[OPT_WRITEPKG].doesOccur) {
isModified=TRUE;
}
fprintf(stderr, "isModified = %x\n", isModified);
fprintf(stderr, "isPackage = %x\n", isPackage);
if(!isPackage) {
/*

View file

@ -1079,7 +1079,6 @@ Package::extractItem(const char *filesPath, const char *outName, int32_t idx, ch
uint8_t itemCharset, outCharset;
UBool itemIsBigEndian, outIsBigEndian;
fprintf(stderr, "^^^^ Package::extractItem\n");
if(idx<0 || itemCount<=idx) {
return;
}