From 416839892a58b016903e69b3a4f051aac27e3dc4 Mon Sep 17 00:00:00 2001 From: Vladimir Weinstein Date: Thu, 12 Feb 2004 08:32:07 +0000 Subject: [PATCH] ICU-3567 CODAN bug fixes X-SVN-Rev: 14495 --- icu4c/source/i18n/ucol.cpp | 65 ++++++++++++++++++++++------------ icu4c/source/i18n/ucol_elm.cpp | 5 +++ 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index 34025efce84..1cc4f8587df 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -3971,6 +3971,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, uint32_t digIndx = 0; uint32_t endIndex = 0; uint32_t leadingZeroIndex = 0; + uint32_t trailingZeroCount = 0; uint32_t primWeight = 0; @@ -4019,11 +4020,10 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, uprv_realloc(numTempBuf, numTempBufSize); } - // Skipping over "trailing" zeroes but we still add to digIndx. - if (digVal != 0 || nonZeroValReached){ - if (digVal != 0 && !nonZeroValReached) + // Skip over trailing zeroes, and keep a count of them. + if (digVal != 0) nonZeroValReached = TRUE; - + if (nonZeroValReached){ /* We parse the digit string into base 100 numbers (this fits into a byte). We only add to the buffer in twos, thus if we are parsing an odd character, @@ -4037,23 +4037,41 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, ones place and the second digit encountered into the tens place. */ - if (digIndx % 2 == 1){ + if ((digIndx + trailingZeroCount) % 2 == 1){ + // High-order digit case (tens place) collateVal += (uint8_t)(digVal * 10); - // This removes leading zeroes. - if (collateVal == 0 && !leadingZeroIndex) - leadingZeroIndex = ((digIndx-1)/2) + 2; - else if (leadingZeroIndex) + // We cannot set leadingZeroIndex unless it has been set for the + // low-order digit. Therefore, all we can do for the high-order + // digit is turn it off, never on. + // The only time we will have a high digit without a low is for + // the very first non-zero digit, so no zero check is necessary. + if (collateVal != 0) leadingZeroIndex = 0; - numTempBuf[((digIndx-1)/2) + 2] = collateVal*2 + 6; + numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; collateVal = 0; } else{ + // Low-order digit case (ones place) collateVal = (uint8_t)digVal; + + // Check for leading zeroes. + if (collateVal == 0) + { + if (!leadingZeroIndex) + leadingZeroIndex = (digIndx/2) + 2; + } + else + leadingZeroIndex = 0; + + // No need to write to buffer; the case of a last odd digit + // is handled below. } + ++digIndx; } - digIndx++; + else + ++trailingZeroCount; if (!collIter_bos(source)){ ch = getPrevNormalizedChar(source); @@ -4092,24 +4110,22 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, if (nonZeroValReached == FALSE){ digIndx = 2; + trailingZeroCount = 0; numTempBuf[2] = 6; } - if (digIndx % 2 != 0){ - if (collateVal == 0 && leadingZeroIndex == 0) { - // This removes the leading 0 in a odd number sequence of - // numbers e.g. avery001 - leadingZeroIndex = ((digIndx - 1) >> 1) + 2; - } - else { - // this is not a leading 0, we add it in + if ((digIndx + trailingZeroCount) % 2 != 0){ numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6; - digIndx += 1; + digIndx += 1; // The implicit leading zero } + if (trailingZeroCount % 2 != 0){ + // We had to consume one trailing zero for the low digit + // of the least significant byte + digIndx += 1; // The trailing zero not in the exponent + trailingZeroCount -= 1; } endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ; - digIndx = ((endIndex - 2) << 1) + 1; // removing initial zeros // Subtract one off of the last byte. Really the first byte here, but it's reversed... numTempBuf[2] -= 1; @@ -4118,9 +4134,14 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, We want to skip over the first two slots in the buffer. The first slot is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the sign/exponent byte: 0x80 + (decimalPos/2) & 7f. + The exponent must be adjusted by the number of leading zeroes, and the number of + trailing zeroes. */ numTempBuf[0] = UCOL_CODAN_PLACEHOLDER; - numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F)); + uint32_t exponent = (digIndx+trailingZeroCount)/2; + if (leadingZeroIndex) + exponent -= ((digIndx/2) + 2 - leadingZeroIndex); + numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F)); // Now transfer the collation key to our collIterate struct. // The total size for our collation key is endIndx bumped up to the next largest even value divided by two. diff --git a/icu4c/source/i18n/ucol_elm.cpp b/icu4c/source/i18n/ucol_elm.cpp index 26961fa357d..7d2f9359e54 100644 --- a/icu4c/source/i18n/ucol_elm.cpp +++ b/icu4c/source/i18n/ucol_elm.cpp @@ -1103,6 +1103,11 @@ uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4); } element->mapCE = expansion; + + // Need to go back to the beginning of the digit string if in the middle! + if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars + unsafeCPSet(t->unsafeCP, (UChar)uniChar); + } } // here we want to add the prefix structure.