mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-5225 Provide optimization option for size or speed of the alias table.
X-SVN-Rev: 19703
This commit is contained in:
parent
6a1f5ae01a
commit
5e15aecbe2
3 changed files with 158 additions and 27 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -94,8 +94,10 @@
|
|||
* the third dimension to the section 5. No other section should be referencing
|
||||
* this section.
|
||||
*
|
||||
* 7) Reserved at this time (There is no information). This _usually_ has a
|
||||
* size of 0. Future versions may add more information here.
|
||||
* 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
|
||||
* presence indicates that a section 9 exists. UConverterAliasOptions specifies
|
||||
* what type of string normalization is used among other potential things in the
|
||||
* future.
|
||||
*
|
||||
* 8) This is the string table. All strings are indexed on an even address.
|
||||
* There are two reasons for this. First many chip architectures locate strings
|
||||
|
@ -103,6 +105,10 @@
|
|||
* numbers, this string table can be 128KB in size instead of 64KB when we
|
||||
* only have strings starting on an even address.
|
||||
*
|
||||
* 9) When present this is a set of prenormalized strings from section 8. This
|
||||
* table contains normalized strings with the dashes and spaces stripped out,
|
||||
* and all strings lowercased. In the future, the options in section 7 may state
|
||||
* other types of normalization.
|
||||
*
|
||||
* Here is the concept of section 5 and 6. It's a 3D cube. Each tag
|
||||
* has a unique alias among all converters. That same alias can
|
||||
|
@ -173,15 +179,20 @@ enum {
|
|||
untaggedConvArrayIndex=4,
|
||||
taggedAliasArrayIndex=5,
|
||||
taggedAliasListsIndex=6,
|
||||
reservedIndex1=7,
|
||||
tableOptions=7,
|
||||
stringTableIndex=8,
|
||||
minTocLength=8, /* min. tocLength in the file, does not count the tocLengthIndex! */
|
||||
offsetsCount /* length of the swapper's temporary offsets[] */
|
||||
normalizedStringTableIndex=9,
|
||||
offsetsCount, /* length of the swapper's temporary offsets[] */
|
||||
minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
|
||||
};
|
||||
|
||||
static const UConverterAliasOptions defaultTableOptions = {
|
||||
UCNV_IO_UNNORMALIZED
|
||||
};
|
||||
static UConverterAlias gMainTable;
|
||||
|
||||
#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
|
||||
#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context,
|
||||
|
@ -226,7 +237,6 @@ haveAliasData(UErrorCode *pErrorCode) {
|
|||
const uint16_t *table = NULL;
|
||||
uint32_t tableStart;
|
||||
uint32_t currOffset;
|
||||
uint32_t reservedSize1;
|
||||
|
||||
data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
|
@ -253,8 +263,12 @@ haveAliasData(UErrorCode *pErrorCode) {
|
|||
gMainTable.untaggedConvArraySize = ((const uint32_t *)(table))[4];
|
||||
gMainTable.taggedAliasArraySize = ((const uint32_t *)(table))[5];
|
||||
gMainTable.taggedAliasListsSize = ((const uint32_t *)(table))[6];
|
||||
reservedSize1 = ((const uint32_t *)(table))[7]; /* reserved */
|
||||
/*gStringTableSize = ((const uint32_t *)(table))[8];*/
|
||||
gMainTable.optionTableSize = ((const uint32_t *)(table))[7];
|
||||
gMainTable.stringTableSize = ((const uint32_t *)(table))[8];
|
||||
|
||||
if (((const uint32_t *)(table))[0] > 8) {
|
||||
gMainTable.normalizedStringTableSize = ((const uint32_t *)(table))[9];
|
||||
}
|
||||
|
||||
currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
|
||||
gMainTable.converterList = table + currOffset;
|
||||
|
@ -276,11 +290,25 @@ haveAliasData(UErrorCode *pErrorCode) {
|
|||
gMainTable.taggedAliasLists = table + currOffset;
|
||||
|
||||
currOffset += gMainTable.taggedAliasListsSize;
|
||||
/* reserved */
|
||||
if (gMainTable.optionTableSize > 0
|
||||
&& ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
|
||||
{
|
||||
/* Faster table */
|
||||
gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
|
||||
}
|
||||
else {
|
||||
/* Smaller table, or I can't handle this normalization mode!
|
||||
Use the original slower table lookup. */
|
||||
gMainTable.optionTable = &defaultTableOptions;
|
||||
}
|
||||
|
||||
currOffset += reservedSize1;
|
||||
currOffset += gMainTable.optionTableSize;
|
||||
gMainTable.stringTable = table + currOffset;
|
||||
|
||||
currOffset += gMainTable.stringTableSize;
|
||||
gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
|
||||
? gMainTable.stringTable : (table + currOffset));
|
||||
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
@ -411,6 +439,15 @@ findConverter(const char *alias, UErrorCode *pErrorCode) {
|
|||
uint32_t mid, start, limit;
|
||||
uint32_t lastMid;
|
||||
int result;
|
||||
char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
|
||||
|
||||
if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
|
||||
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
/* Lower case and remove ignoreable characters. */
|
||||
ucnv_io_stripForCompare(strippedName, alias);
|
||||
|
||||
/* do a binary search for the alias */
|
||||
start = 0;
|
||||
|
@ -424,7 +461,12 @@ findConverter(const char *alias, UErrorCode *pErrorCode) {
|
|||
break; /* We haven't moved, and it wasn't found. */
|
||||
}
|
||||
lastMid = mid;
|
||||
result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
|
||||
if (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED) {
|
||||
result = ucnv_compareNames(strippedName, GET_STRING(gMainTable.aliasList[mid]));
|
||||
}
|
||||
else {
|
||||
result = uprv_strcmp(strippedName, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
|
||||
}
|
||||
|
||||
if (result < 0) {
|
||||
limit = mid;
|
||||
|
@ -981,22 +1023,23 @@ ucnv_swapAliases(const UDataSwapper *ds,
|
|||
}
|
||||
|
||||
inTable=(const uint16_t *)((const char *)inData+headerSize);
|
||||
uprv_memset(toc, 0, sizeof(toc));
|
||||
toc[tocLengthIndex]=tocLength=ds->readUInt32(((const uint32_t *)inTable)[tocLengthIndex]);
|
||||
if(tocLength<minTocLength) {
|
||||
udata_printError(ds, "ucnv_swapAliases(): table of contents too short (%u sections)\n", tocLength);
|
||||
if(tocLength<minTocLength || offsetsCount<=tocLength) {
|
||||
udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* read the known part of the table of contents */
|
||||
for(i=converterListIndex; i<=minTocLength; ++i) {
|
||||
for(i=converterListIndex; i<=tocLength; ++i) {
|
||||
toc[i]=ds->readUInt32(((const uint32_t *)inTable)[i]);
|
||||
}
|
||||
|
||||
/* compute offsets */
|
||||
offsets[tocLengthIndex]=0;
|
||||
uprv_memset(offsets, 0, sizeof(offsets));
|
||||
offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
|
||||
for(i=tagListIndex; i<=stringTableIndex; ++i) {
|
||||
for(i=tagListIndex; i<=tocLength; ++i) {
|
||||
offsets[i]=offsets[i-1]+toc[i-1];
|
||||
}
|
||||
|
||||
|
@ -1024,6 +1067,11 @@ ucnv_swapAliases(const UDataSwapper *ds,
|
|||
/* swap strings */
|
||||
ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)toc[stringTableIndex],
|
||||
outTable+offsets[stringTableIndex], pErrorCode);
|
||||
/* swap normalized strings */
|
||||
if (toc[normalizedStringTableIndex] > 0) {
|
||||
ds->swapInvChars(ds, inTable+offsets[normalizedStringTableIndex], 2*(int32_t)toc[normalizedStringTableIndex],
|
||||
outTable+offsets[normalizedStringTableIndex], pErrorCode);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
|
||||
return 0;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
|
@ -24,6 +24,16 @@
|
|||
#define UCNV_NUM_RESERVED_TAGS 2
|
||||
#define UCNV_NUM_HIDDEN_TAGS 1
|
||||
|
||||
typedef enum {
|
||||
UCNV_IO_UNNORMALIZED,
|
||||
UCNV_IO_STD_NORMALIZED,
|
||||
UCNV_IO_NORM_TYPE_COUNT
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint16_t stringNormalizationType;
|
||||
} UConverterAliasOptions;
|
||||
|
||||
typedef struct UConverterAlias {
|
||||
const uint16_t *converterList;
|
||||
const uint16_t *tagList;
|
||||
|
@ -31,7 +41,9 @@ typedef struct UConverterAlias {
|
|||
const uint16_t *untaggedConvArray;
|
||||
const uint16_t *taggedAliasArray;
|
||||
const uint16_t *taggedAliasLists;
|
||||
const UConverterAliasOptions *optionTable;
|
||||
const uint16_t *stringTable;
|
||||
const uint16_t *normalizedStringTable;
|
||||
|
||||
uint32_t converterListSize;
|
||||
uint32_t tagListSize;
|
||||
|
@ -39,7 +51,9 @@ typedef struct UConverterAlias {
|
|||
uint32_t untaggedConvArraySize;
|
||||
uint32_t taggedAliasArraySize;
|
||||
uint32_t taggedAliasListsSize;
|
||||
/*uint32_t stringTableSize;*/
|
||||
uint32_t optionTableSize;
|
||||
uint32_t stringTableSize;
|
||||
uint32_t normalizedStringTableSize;
|
||||
} UConverterAlias;
|
||||
|
||||
/**
|
||||
|
@ -59,10 +73,10 @@ typedef struct UConverterAlias {
|
|||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
|
||||
U_CFUNC char * U_EXPORT2
|
||||
U_CAPI char * U_EXPORT2
|
||||
ucnv_io_stripASCIIForCompare(char *dst, const char *name);
|
||||
|
||||
U_CFUNC char * U_EXPORT2
|
||||
U_CAPI char * U_EXPORT2
|
||||
ucnv_io_stripEBCDICForCompare(char *dst, const char *name);
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2004, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -84,7 +84,7 @@ static const UDataInfo dataInfo={
|
|||
0,
|
||||
|
||||
{0x43, 0x76, 0x41, 0x6c}, /* dataFormat="CvAl" */
|
||||
{3, 0, 0, 0}, /* formatVersion */
|
||||
{3, 0, 1, 0}, /* formatVersion */
|
||||
{1, 4, 2, 0} /* dataVersion */
|
||||
};
|
||||
|
||||
|
@ -137,6 +137,10 @@ static UBool standardTagsUsed = FALSE;
|
|||
static UBool verbose = FALSE;
|
||||
static int lineNum = 1;
|
||||
|
||||
static UConverterAliasOptions tableOptions = {
|
||||
UCNV_IO_UNNORMALIZED
|
||||
};
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
|
@ -192,7 +196,8 @@ enum
|
|||
VERBOSE,
|
||||
COPYRIGHT,
|
||||
DESTDIR,
|
||||
SOURCEDIR
|
||||
SOURCEDIR,
|
||||
OPTIMIZE
|
||||
};
|
||||
|
||||
static UOption options[]={
|
||||
|
@ -201,7 +206,8 @@ static UOption options[]={
|
|||
UOPTION_VERBOSE,
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_SOURCEDIR
|
||||
UOPTION_SOURCEDIR,
|
||||
UOPTION_DEF( "optimize", 'O', UOPT_REQUIRES_ARG),
|
||||
};
|
||||
|
||||
extern int
|
||||
|
@ -234,6 +240,7 @@ main(int argc, char* argv[]) {
|
|||
"\t-c or --copyright include a copyright notice\n"
|
||||
"\t-d or --destdir destination directory, followed by the path\n"
|
||||
"\t-s or --sourcedir source directory, followed by the path\n",
|
||||
"\t-O or --optimize optimize the table for \"size\" or \"speed\"\n",
|
||||
argv[0]);
|
||||
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
@ -242,6 +249,19 @@ main(int argc, char* argv[]) {
|
|||
verbose = TRUE;
|
||||
}
|
||||
|
||||
if(options[OPTIMIZE].doesOccur) {
|
||||
if (strcmp(options[OPTIMIZE].value, "size") == 0) {
|
||||
tableOptions.stringNormalizationType = UCNV_IO_UNNORMALIZED;
|
||||
}
|
||||
else if (strcmp(options[OPTIMIZE].value, "speed") == 0) {
|
||||
tableOptions.stringNormalizationType = UCNV_IO_STD_NORMALIZED;
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Invalid value for optimization\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(argc>=2) {
|
||||
path=argv[1];
|
||||
} else {
|
||||
|
@ -905,6 +925,26 @@ createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, ui
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
|
||||
int32_t currStrLen;
|
||||
uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
|
||||
while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
|
||||
int32_t currStrSize = currStrLen + 1;
|
||||
if (currStrLen > 0) {
|
||||
int32_t normStrLen;
|
||||
ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
|
||||
normStrLen = uprv_strlen(normalizedStrings);
|
||||
if (normStrLen > 0) {
|
||||
uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
|
||||
}
|
||||
}
|
||||
stringBlockLength -= currStrSize;
|
||||
normalizedStrings += currStrSize;
|
||||
origStringBlock += currStrSize;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
writeAliasTable(UNewDataMemory *out) {
|
||||
uint32_t i, j;
|
||||
|
@ -928,7 +968,12 @@ writeAliasTable(UNewDataMemory *out) {
|
|||
}
|
||||
|
||||
/* Write the size of the TOC */
|
||||
udata_write32(out, 8);
|
||||
if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
|
||||
udata_write32(out, 8);
|
||||
}
|
||||
else {
|
||||
udata_write32(out, 9);
|
||||
}
|
||||
|
||||
/* Write the sizes of each section */
|
||||
/* All sizes are the number of uint16_t units, not bytes */
|
||||
|
@ -938,8 +983,16 @@ writeAliasTable(UNewDataMemory *out) {
|
|||
udata_write32(out, uniqueAliasesSize); /* The preresolved form of mapping an untagged the alias to a converter */
|
||||
udata_write32(out, tagCount * converterCount);
|
||||
udata_write32(out, aliasListsSize + 1);
|
||||
udata_write32(out, 0); /* Reserved space. */
|
||||
if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
|
||||
udata_write32(out, 0);
|
||||
}
|
||||
else {
|
||||
udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
|
||||
}
|
||||
udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
|
||||
if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
|
||||
udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
|
||||
}
|
||||
|
||||
/* write the table of converters */
|
||||
/* Think of this as the column headers */
|
||||
|
@ -973,12 +1026,28 @@ writeAliasTable(UNewDataMemory *out) {
|
|||
/* Write the lists */
|
||||
udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
|
||||
|
||||
/* Write any options for the alias table. */
|
||||
if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
|
||||
udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
|
||||
}
|
||||
|
||||
/* write the tags strings */
|
||||
udata_writeString(out, tagBlock.store, tagBlock.top);
|
||||
|
||||
/* write the aliases strings */
|
||||
udata_writeString(out, stringBlock.store, stringBlock.top);
|
||||
|
||||
/* write the normalized aliases strings */
|
||||
if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
|
||||
char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
|
||||
createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
|
||||
createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
|
||||
|
||||
/* Write out the complete normalized array. */
|
||||
udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
|
||||
uprv_free(normalizedStrings);
|
||||
}
|
||||
|
||||
uprv_free(aliasArrLists);
|
||||
uprv_free(uniqueAliases);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue