ICU-3100 changes according to the code review

X-SVN-Rev: 15870
This commit is contained in:
Vladimir Weinstein 2004-06-14 21:00:06 +00:00
parent 1587dc9caa
commit 0a61ee4332
3 changed files with 106 additions and 53 deletions

View file

@ -105,6 +105,7 @@ struct CollatorSpec {
UColAttributeValue options[UCOL_ATTRIBUTE_COUNT];
uint32_t variableTopValue;
UChar variableTopString[locElementCapacity];
int32_t variableTopStringLen;
UBool variableTopSet;
struct {
const char *start;
@ -198,7 +199,7 @@ _processRFC3066Locale(CollatorSpec *spec, uint32_t, const char* string,
char terminator = *string;
string++;
const char *end = uprv_strchr(string+1, terminator);
if(end - string > loc3066Capacity) {
if(end == NULL || end - string >= loc3066Capacity) {
*status = U_BUFFER_OVERFLOW_ERROR;
return string;
} else {
@ -245,6 +246,10 @@ readHexCodeUnit(const char **string, UErrorCode *status)
noDigits++;
(*string)++;
}
// if the string was terminated before we read 4 digits, set an error
if(noDigits < 4) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return result;
}
@ -255,9 +260,13 @@ _processVariableTop(CollatorSpec *spec, uint32_t value1, const char* string, UEr
// get four digits
int32_t i = 0;
if(!value1) {
while(U_SUCCESS(*status) && *string != 0 && *string != '_') {
while(U_SUCCESS(*status) && i < locElementCapacity && *string != 0 && *string != '_') {
spec->variableTopString[i++] = readHexCodeUnit(&string, status);
}
spec->variableTopStringLen = i;
if(i == locElementCapacity && (*string != 0 || *string != '_')) {
*status = U_BUFFER_OVERFLOW_ERROR;
}
} else {
spec->variableTopValue = readHexCodeUnit(&string, status);
}
@ -346,7 +355,7 @@ ucol_sit_readSpecs(CollatorSpec *s, const char *string,
}
static
int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, UErrorCode *status)
int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, int32_t capacity, UErrorCode *status)
{
int32_t i = 0, j = 0;
int32_t len = 0;
@ -355,17 +364,24 @@ int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, UErrorCode *statu
for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
if(s->entries[i].start) {
if(len) {
uprv_strcat(destination, "_");
if(len < capacity) {
uprv_strcat(destination, "_");
}
len++;
}
optName = *(s->entries[i].start);
if(optName == languageArg || optName == regionArg || optName == variantArg || optName == keywordArg) {
for(j = 0; j < s->entries[i].len; j++) {
destination[len++] = uprv_toupper(*(s->entries[i].start+j));
if(len + j < capacity) {
destination[len+j] = uprv_toupper(*(s->entries[i].start+j));
}
}
} else {
uprv_strncat(destination,s->entries[i].start, s->entries[i].len);
len += s->entries[i].len;
} else {
len += s->entries[i].len;
if(len < capacity) {
uprv_strncat(destination,s->entries[i].start, s->entries[i].len);
}
}
}
}
@ -450,7 +466,7 @@ ucol_openFromShortString( const char *definition,
for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
if(s.options[i] != UCOL_DEFAULT) {
if(ucol_getAttribute(result, (UColAttribute)i, status) != s.options[i] || forceDefaults) {
if(forceDefaults || ucol_getAttribute(result, (UColAttribute)i, status) != s.options[i]) {
ucol_setAttribute(result, (UColAttribute)i, s.options[i], status);
}
@ -464,7 +480,7 @@ ucol_openFromShortString( const char *definition,
}
if(s.variableTopSet) {
if(s.variableTopString[0]) {
ucol_setVariableTop(result, s.variableTopString, u_strlen(s.variableTopString), status);
ucol_setVariableTop(result, s.variableTopString, s.variableTopStringLen, status);
} else { // we set by value, using 'B'
ucol_restoreVariableTop(result, s.variableTopValue, status);
}
@ -481,16 +497,20 @@ ucol_openFromShortString( const char *definition,
}
static void appendShortStringElement(const char *src, int32_t len, char *result, int32_t *resultSize, char arg)
static void appendShortStringElement(const char *src, int32_t len, char *result, int32_t *resultSize, int32_t capacity, char arg)
{
if(len) {
if(*resultSize) {
uprv_strcat(result, "_");
if(*resultSize < capacity) {
uprv_strcat(result, "_");
}
(*resultSize)++;
}
*resultSize += len + 1;
uprv_strncat(result, &arg, 1);
uprv_strncat(result, src, len);
if(*resultSize < capacity) {
uprv_strncat(result, &arg, 1);
uprv_strncat(result, src, len);
}
}
}
@ -521,15 +541,15 @@ ucol_getShortDefinitionString(const UCollator *coll,
if(elementSize) {
// we should probably canonicalize here...
elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, status);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, languageArg);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, languageArg);
elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, status);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, regionArg);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, regionArg);
elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, status);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, scriptArg);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, scriptArg);
elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, status);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, variantArg);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, variantArg);
elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, internalBufferSize, status);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, keywordArg);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, keywordArg);
}
int32_t i = 0;
@ -540,14 +560,14 @@ ucol_getShortDefinitionString(const UCollator *coll,
if(attribute != UCOL_DEFAULT) {
char letter = ucol_sit_attributeValueToLetter(attribute, status);
appendShortStringElement(&letter, 1,
buffer, &resultSize, options[i].optionStart);
buffer, &resultSize, capacity, options[i].optionStart);
}
}
}
if(coll->variableTopValueisDefault == FALSE) {
//s.variableTopValue = ucol_getVariableTop(coll, status);
elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue, 16);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, variableTopValArg);
appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, variableTopValArg);
}
UParseError parseError;
@ -565,16 +585,16 @@ ucol_normalizeShortDefinitionString(const char *definition,
if(U_FAILURE(*status)) {
return 0;
}
if(capacity == 0 || destination == NULL) {
return uprv_strlen(definition);
if(destination) {
uprv_memset(destination, 0, capacity*sizeof(char));
}
uprv_memset(destination, 0, capacity*sizeof(char));
// validate
CollatorSpec s;
ucol_sit_initCollatorSpecs(&s);
ucol_sit_readSpecs(&s, definition, parseError, status);
return ucol_sit_dumpSpecs(&s, destination, status);
return ucol_sit_dumpSpecs(&s, destination, capacity, status);
}
// structure for packing the bits of the attributes in the
@ -604,8 +624,6 @@ static const uint32_t keywordWidth = 5;
static const uint32_t localeShift = 0;
static const uint32_t localeWidth = 7;
static const uint32_t needExpansion = 0xC0000000;
static uint32_t ucol_sit_putLocaleInIdentifier(uint32_t result, const char* locale, UErrorCode* status) {
char buffer[internalBufferSize], keywordBuffer[internalBufferSize],
@ -634,7 +652,7 @@ static uint32_t ucol_sit_putLocaleInIdentifier(uint32_t result, const char* loca
mid = (high+low) >> 1; /*Finds median*/
if (mid == oldmid)
return needExpansion; // we didn't find it
return UCOL_SIT_COLLATOR_NOT_ENCODABLE; // we didn't find it
compVal = uprv_strcmp(baseName, locales[mid]);
if (compVal < 0){
@ -674,7 +692,7 @@ ucol_collatorToIdentifier(const UCollator *coll,
// if variable top is not default, we need to use strings
if(coll->variableTopValueisDefault != TRUE) {
return needExpansion;
return UCOL_SIT_COLLATOR_NOT_ENCODABLE;
}
if(locale == NULL) {
@ -726,8 +744,8 @@ ucol_openFromIdentifier(uint32_t identifier,
// the collator is all default, so we will set only the values that will differ from
// the default values.
if(attrValue != UCOL_DEFAULT) {
if(ucol_getAttribute(result, attributesToBits[i].attribute, status) != attrValue
|| forceDefaults) {
if(forceDefaults ||
ucol_getAttribute(result, attributesToBits[i].attribute, status) != attrValue) {
ucol_setAttribute(result, attributesToBits[i].attribute, attrValue, status);
}
}
@ -778,7 +796,7 @@ ucol_identifierToShortString(uint32_t identifier,
uprv_strncat(buffer, &letter, 1);
}
}
return ucol_sit_dumpSpecs(&s, buffer, status);
return ucol_sit_dumpSpecs(&s, buffer, capacity, status);
}
#endif
}
@ -877,7 +895,7 @@ addContraction(const UCollator *coll, USet *contractions, UChar *buffer, int32_t
uint32_t newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
// we might have a contraction that ends from previous level
if(newCE != UCOL_NOT_FOUND && rightIndex > 1) {
uset_addString(contractions, buffer, rightIndex + 1);
uset_addString(contractions, buffer, rightIndex);
}
UCharOffset++;
@ -948,6 +966,14 @@ ucol_getContractions( const UCollator *coll,
USet *contractions,
UErrorCode *status)
{
if(U_FAILURE(*status)) {
return 0;
}
if(coll == NULL || contractions == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
uset_clear(contractions);
int32_t rulesLen = 0;
const UChar* rules = ucol_getRules(coll, &rulesLen);
@ -1007,14 +1033,19 @@ ucol_getUnsafeSet( const UCollator *coll,
int32_t i = 0, j = 0;
int32_t contsSize = ucol_getContractions(coll, contractions, status);
UChar32 c = 0;
// Contraction set consists only of strings
// to get unsafe code points, we need to
// break the strings apart and add them to the unsafe set
for(i = 0; i < contsSize; i++) {
len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSize, status);
if(len > 0) {
for(j = 1; j < len; j++) {
uset_add(unsafe, buffer[j]);
j = 0;
while(j < len) {
U16_NEXT(buffer, j, len, c);
if(j < len) {
uset_add(unsafe, c);
}
}
}
}

View file

@ -315,8 +315,11 @@ ucol_openRules( const UChar *rules,
* state for a locale.
* @param parseError if not NULL, structure that will get filled with error's pre
* and post context in case of error.
* @param forceDefaults controls whether the settings that are the same as the collator
* default settings are set (TRUE) or not (FALSE). If the definition
* @param forceDefaults if FALSE, the settings that are the same as the collator
* default settings will not be applied (for example, setting
* French secondary on a French collator would not be executed).
* If TRUE, all the settings will be applied regardless of the
* collator default value. If the definition
* strings are to be cached, should be set to FALSE.
* @param status Error code. Apart from regular error conditions connected to
* instantiating collators (like out of memory or similar), this
@ -340,9 +343,12 @@ ucol_openFromShortString( const char *definition,
/**
* Get a set containing the contractions defined by the collator. The set includes
* both the UCA contractions and the contractions defined by the collator
* both the UCA contractions and the contractions defined by the collator. This set
* will contain only strings. If a tailoring explicitly suppresses contractions from
* the UCA (like Russian), removed contractions will not be in the resulting set.
* @param coll collator
* @param conts the set to hold the result
* @param conts the set to hold the result. It gets emptied before
* contractions are added.
* @param status to hold the error code
* @return the size of the contraction set
*
@ -624,7 +630,9 @@ ucol_getRules( const UCollator *coll,
* http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators
* This API supports preflighting.
* @param coll a collator
* @param locale locale for the collator.
* @param locale a locale that will appear as a collators locale in the resulting
* short string definition. If NULL, the locale will be harvested
* from the collator.
* @param buffer space to hold the resulting string
* @param capacity capacity of the buffer
* @param status for returning errors. All the preflighting errors are featured
@ -652,6 +660,7 @@ ucol_getShortDefinitionString(const UCollator *coll,
* @param status Error code. This API will return an error if an invalid attribute
* or attribute/value combination is specified. All the preflighting
* errors are also featured
* @return length of the resulting normalized string.
*
* @see ucol_openFromShortString
* @see ucol_getShortDefinitionString
@ -999,13 +1008,22 @@ ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode
U_STABLE USet * U_EXPORT2
ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
/**
* Returned by ucol_collatorToIdentifier to signify that collator is
* not encodable as an identifier.
* @internal ICU 3.0
*/
#define UCOL_SIT_COLLATOR_NOT_ENCODABLE 0x80000000
/**
* Get a 31-bit identifier given a collator.
* @param coll UCollator
* @param locale locale for the collator.
* @param status set U_BUFFER_OVERFLOW_ERROR if collator cannot be encoded
* @return 31-bit identifier. MSB is not used, hence the 31 bits
* @param locale a locale that will appear as a collators locale in the resulting
* short string definition. If NULL, the locale will be harvested
* from the collator.
* @param status holds error messages
* @return 31-bit identifier. MSB is used if the collator cannot be encoded. In that
* case UCOL_SIT_COLLATOR_NOT_ENCODABLE is returned
* @see ucol_openFromIdentifier
* @see ucol_identifierToShortString
* @internal ICU 3.0
@ -1018,9 +1036,13 @@ ucol_collatorToIdentifier(const UCollator *coll,
/**
* Open a collator given a 31-bit identifier
* @param identifier 31-bit identifier, encoded by calling ucol_collatorToIdentifier
* @param forceDefaults controls whether the settings that are the same as the collator
* default settings are set (TRUE) or not (FALSE). If the definition
* strings are to be cached, should be set to FALSE.
* @param forceDefaults if FALSE, the settings that are the same as the collator
* default settings will not be applied (for example, setting
* French secondary on a French collator would not be executed).
* If TRUE, all the settings will be applied regardless of the
* collator default value. If the definition
* strings that can be produced from a collator instantiated by
* calling this API are to be cached, should be set to FALSE.
* @param status for returning errors
* @return UCollator object
* @see ucol_collatorToIdentifier

View file

@ -1788,7 +1788,7 @@ static void TestShortString(void)
/* test identifiers */
identifier = ucol_collatorToIdentifier(coll, locale, &status);
if(identifier < 0xC0000000) {
if(identifier < UCOL_SIT_COLLATOR_NOT_ENCODABLE) {
ucol_identifierToShortString(identifier, fromIDBuffer, 256, FALSE, &status);
fromID = ucol_openFromIdentifier(identifier, FALSE, &status);
if(!ucol_equals(coll, fromID)) {
@ -1872,27 +1872,27 @@ TestGetContractionsAndUnsafes(void)
{ "ru",
"[{\\u0474\\u030F}{\\u0475\\u030F}{\\u04D8\\u0308}{\\u04D9\\u0308}{\\u04E8\\u0308}{\\u04E9\\u0308}]",
"[{\\u0430\\u0306}{\\u0410\\u0306}{\\u0430\\u0308}{\\u0410\\u0306}{\\u0433\\u0301}{\\u0413\\u0301}]",
"[\\u0306\\u0308]",
"[\\u0474\\u0430\\u0410\\u0433\\u0413aAbB]"
"[\\u0474\\u0475\\u04d8\\u04d9\\u04e8\\u04e9]",
"[aAbB\\u0430\\u0410\\u0433\\u0413]"
},
{ "uk",
"[{\\u0474\\u030F}{\\u0475\\u030F}{\\u04D8\\u0308}{\\u04D9\\u0308}{\\u04E8\\u0308}{\\u04E9\\u0308}"
"{\\u0430\\u0306}{\\u0410\\u0306}{\\u0430\\u0308}{\\u0410\\u0306}{\\u0433\\u0301}{\\u0413\\u0301}]",
"[]",
"[\\u0306\\u030f\\u0308]",
"[\\u0474\\u0475\\u04D8\\u04D9\\u04E8\\u04E9\\u0430\\u0410\\u0433\\u0413aAbBxv]"
"[\\u0474\\u0475\\u04D8\\u04D9\\u04E8\\u04E9\\u0430\\u0410\\u0433\\u0413]",
"[aAbBxv]",
},
{ "ja",
"[{\\u309d\\u3099}{\\u30fd\\u3099}]",
"[{lj}{nj}]",
"[\\u3099]",
"[\\u3099\\u309d\\u30fd]",
"[\\u30a6\\u3044\\uff73]"
},
{ "sh",
"[{C\\u0301}{C\\u030C}{C\\u0341}{DZ\\u030C}{Dz\\u030C}{D\\u017D}{D\\u017E}{lj}{nj}]",
"[{\\u309d\\u3099}{\\u30fd\\u3099}]",
"[j]",
"[n]"
"[nlcdzNLCDZ]",
"[jabv]"
}
};