From 796016432700c4eca0062468db178730a4e36d53 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 20 Sep 2016 01:10:36 +0000 Subject: [PATCH] ICU-12663 UnicodeString support nearly 2G UChars & avoid integer overflows; string-to-string case mappings & detect integer overflows; make some calls to uprv_memcpy() and similar a little more robust by casting int32_t to size_t before multiplying X-SVN-Rev: 39288 --- icu4c/source/common/bytestriebuilder.cpp | 2 +- icu4c/source/common/cmemory.h | 10 +- icu4c/source/common/cpputils.h | 18 +-- icu4c/source/common/locdispnames.cpp | 2 +- icu4c/source/common/messagepattern.cpp | 2 +- icu4c/source/common/propsvec.c | 8 +- icu4c/source/common/uarrsort.c | 10 +- icu4c/source/common/ubidi.c | 4 +- icu4c/source/common/ucasemap.cpp | 83 ++++++++++--- icu4c/source/common/ucharstriebuilder.cpp | 2 +- icu4c/source/common/ucnv.c | 4 +- icu4c/source/common/ucnv_ext.cpp | 2 +- icu4c/source/common/ucnv_io.cpp | 4 +- icu4c/source/common/ucurr.cpp | 4 +- icu4c/source/common/uidna.cpp | 12 +- icu4c/source/common/unicode/unistr.h | 1 - icu4c/source/common/uniset.cpp | 8 +- icu4c/source/common/unistr.cpp | 137 ++++++++++++++-------- icu4c/source/common/ushape.cpp | 20 ++-- icu4c/source/common/ustr_wcs.cpp | 10 +- icu4c/source/common/ustrcase.cpp | 75 ++++++++---- icu4c/source/common/ustring.cpp | 4 +- icu4c/source/common/utrie.cpp | 4 +- icu4c/source/common/utrie2_builder.cpp | 10 +- icu4c/source/i18n/rbt_rule.cpp | 2 +- icu4c/source/i18n/reldtfmt.cpp | 4 +- icu4c/source/i18n/ucoleitr.cpp | 4 +- icu4c/source/i18n/unesctrn.cpp | 2 +- icu4c/source/i18n/usearch.cpp | 6 +- icu4c/source/io/ustdio.c | 2 +- icu4c/source/test/intltest/colldata.cpp | 4 +- icu4c/source/test/intltest/idnaref.cpp | 26 ++-- icu4c/source/test/intltest/nptrans.cpp | 4 +- icu4c/source/test/intltest/strcase.cpp | 20 ++++ icu4c/source/test/intltest/ustrtest.h | 1 + icu4c/source/tools/genrb/ustr.c | 2 +- icu4c/source/tools/gensprep/store.c | 2 +- icu4c/source/tools/toolutil/package.cpp | 2 +- icu4c/source/tools/toolutil/swapimpl.cpp | 2 +- icu4c/source/tools/toolutil/toolutil.cpp | 2 +- icu4c/source/tools/toolutil/ucm.c | 2 +- 41 files changed, 327 insertions(+), 196 deletions(-) diff --git a/icu4c/source/common/bytestriebuilder.cpp b/icu4c/source/common/bytestriebuilder.cpp index f1e2001a69e..913d85a2121 100644 --- a/icu4c/source/common/bytestriebuilder.cpp +++ b/icu4c/source/common/bytestriebuilder.cpp @@ -167,7 +167,7 @@ BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) { return *this; // error instead of dereferencing null } if(elementsLength>0) { - uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement)); + uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement)); } delete[] elements; elements=newElements; diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index 17cf900b8ea..a1211bdc3c4 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -283,7 +283,7 @@ inline T *LocalMemory::allocateInsteadAndCopy(int32_t newCapacity, int32_t le if(length>newCapacity) { length=newCapacity; } - uprv_memcpy(p, LocalPointerBase::ptr, length*sizeof(T)); + uprv_memcpy(p, LocalPointerBase::ptr, (size_t)length*sizeof(T)); } uprv_free(LocalPointerBase::ptr); LocalPointerBase::ptr=p; @@ -430,7 +430,7 @@ inline T *MaybeStackArray::resize(int32_t newCapacity, int32_t if(length>newCapacity) { length=newCapacity; } - uprv_memcpy(p, ptr, length*sizeof(T)); + uprv_memcpy(p, ptr, (size_t)length*sizeof(T)); } releaseArray(); ptr=p; @@ -461,7 +461,7 @@ inline T *MaybeStackArray::orphanOrClone(int32_t length, int32 if(p==NULL) { return NULL; } - uprv_memcpy(p, ptr, length*sizeof(T)); + uprv_memcpy(p, ptr, (size_t)length*sizeof(T)); } resultCapacity=length; ptr=stackArray; @@ -609,7 +609,7 @@ inline H *MaybeStackHeaderAndArray::resize(int32_t newCapac length=newCapacity; } } - uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T)); + uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T)); releaseMemory(); ptr=p; capacity=newCapacity; @@ -640,7 +640,7 @@ inline H *MaybeStackHeaderAndArray::orphanOrClone(int32_t l if(p==NULL) { return NULL; } - uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T)); + uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T)); } resultCapacity=length; ptr=&stackHeader; diff --git a/icu4c/source/common/cpputils.h b/icu4c/source/common/cpputils.h index a49649856ab..57af69a7129 100644 --- a/icu4c/source/common/cpputils.h +++ b/icu4c/source/common/cpputils.h @@ -26,45 +26,45 @@ static inline void uprv_arrayCopy(const double* src, double* dst, int32_t count) -{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const double* src, int32_t srcStart, double* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count) - { uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); } + { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart, int8_t* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count) -{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart, int16_t* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count) -{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart, int32_t* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } static inline void uprv_arrayCopy(const UChar *src, int32_t srcStart, UChar *dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); } +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } /** * Copy an array of UnicodeString OBJECTS (not pointers). diff --git a/icu4c/source/common/locdispnames.cpp b/icu4c/source/common/locdispnames.cpp index 76b6ae0c0f0..d60920c6124 100644 --- a/icu4c/source/common/locdispnames.cpp +++ b/icu4c/source/common/locdispnames.cpp @@ -854,7 +854,7 @@ uloc_getDisplayKeywordValue( const char* locale, /* now copy the dispName over if not NULL */ if(dispName != NULL){ if(dispNameLen <= destCapacity){ - uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR); + u_memcpy(dest, dispName, dispNameLen); return u_terminateUChars(dest, destCapacity, dispNameLen, status); }else{ *status = U_BUFFER_OVERFLOW_ERROR; diff --git a/icu4c/source/common/messagepattern.cpp b/icu4c/source/common/messagepattern.cpp index 23398fdff66..31d5bb7b9a6 100644 --- a/icu4c/source/common/messagepattern.cpp +++ b/icu4c/source/common/messagepattern.cpp @@ -116,7 +116,7 @@ MessagePatternList::copyFrom( errorCode=U_MEMORY_ALLOCATION_ERROR; return; } - uprv_memcpy(a.getAlias(), other.a.getAlias(), length*sizeof(T)); + uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T)); } } diff --git a/icu4c/source/common/propsvec.c b/icu4c/source/common/propsvec.c index ec7ca694c13..01fb5c93a5f 100644 --- a/icu4c/source/common/propsvec.c +++ b/icu4c/source/common/propsvec.c @@ -220,7 +220,7 @@ upvec_setValue(UPropsVectors *pv, *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return; } - uprv_memcpy(newVectors, pv->v, rows*columns*4); + uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4); firstRow=newVectors+(firstRow-pv->v); lastRow=newVectors+(lastRow-pv->v); uprv_free(pv->v); @@ -242,7 +242,7 @@ upvec_setValue(UPropsVectors *pv, if(splitFirstRow) { /* copy all affected rows up one and move the lastRow pointer */ count = (int32_t)((lastRow-firstRow)+columns); - uprv_memmove(firstRow+columns, firstRow, count*4); + uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); lastRow+=columns; /* split the range and move the firstRow pointer */ @@ -253,7 +253,7 @@ upvec_setValue(UPropsVectors *pv, /* split the last row */ if(splitLastRow) { /* copy the last row data */ - uprv_memcpy(lastRow+columns, lastRow, columns*4); + uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); /* split the range and move the firstRow pointer */ lastRow[1]=lastRow[columns]=(uint32_t)limit; @@ -417,7 +417,7 @@ upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UE /* add a new values vector if it is different from the current one */ if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { count+=valueColumns; - uprv_memmove(pv->v+count, row+2, valueColumns*4); + uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); } if(startlength*sizeof(UBiDiLevel)); + uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel)); saveTrailingWSStart=pBiDi->trailingWSStart; saveLength=pBiDi->length; saveDirection=pBiDi->direction; @@ -2514,7 +2514,7 @@ setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, if(saveLength>pBiDi->levelsSize) { saveLength=pBiDi->levelsSize; } - uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); + uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel)); pBiDi->trailingWSStart=saveTrailingWSStart; if(pBiDi->runCount>1) { pBiDi->direction=UBIDI_MIXED; diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp index 55195663df8..e8807dd9a56 100644 --- a/icu4c/source/common/ucasemap.cpp +++ b/icu4c/source/common/ucasemap.cpp @@ -124,52 +124,69 @@ static inline int32_t appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s) { UChar32 c; - int32_t length, destLength; + int32_t length; UErrorCode errorCode; /* decode the result */ if(result<0) { /* (not) original code point */ c=~result; - length=-1; + length=U8_LENGTH(c); } else if(result<=UCASE_MAX_STRING_LENGTH) { c=U_SENTINEL; length=result; } else { c=result; - length=-1; + length=U8_LENGTH(c); + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow } if(destIndex=0) { /* code point */ UBool isError=FALSE; U8_APPEND(dest, destIndex, destCapacity, c, isError); if(isError) { /* overflow, nothing written */ - destIndex+=U8_LENGTH(c); + destIndex+=length; } } else { /* string */ + int32_t destLength; errorCode=U_ZERO_ERROR; u_strToUTF8( (char *)(dest+destIndex), destCapacity-destIndex, &destLength, s, length, &errorCode); + if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) { + return -1; + } + if(destLength>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } destIndex+=destLength; /* we might have an overflow, but we know the actual length */ } } else { /* preflight */ - if(length<0) { - destIndex+=U8_LENGTH(c); + if(c>=0) { + destIndex+=length; } else { + int32_t destLength; errorCode=U_ZERO_ERROR; u_strToUTF8( NULL, 0, &destLength, s, length, &errorCode); + if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) { + return -1; + } + if(destLength>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } destIndex+=destLength; } } @@ -178,7 +195,11 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, static inline int32_t appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) { - int32_t limit=destIndex+U8_LENGTH(c); + int32_t length=U8_LENGTH(c); + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + int32_t limit=destIndex+length; if(limitcsp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } /* Special case Dutch IJ titlecasing */ - if ( titleStart+1 < idx && - ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH && - ( src[titleStart] == 0x0049 || src[titleStart] == 0x0069 ) && - ( src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A )) { - c=0x004A; - destIndex=appendResult(dest, destIndex, destCapacity, c, s); - titleLimit++; + if (titleStart+1 < idx && + ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH && + (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && + (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); + titleLimit++; } /* lowercase [titleLimit..index[ */ if(titleLimit(INT32_MAX-destIndex)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } if((destIndex+length)<=destCapacity) { uprv_memcpy(dest+destIndex, src+titleLimit, length); } @@ -498,16 +533,20 @@ int32_t toUpper(const UCaseMap *csm, } } destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper); - if ((data & HAS_EITHER_DIALYTIKA) != 0) { + if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika } - if (addTonos) { + if (destIndex >= 0 && addTonos) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); } - while (numYpogegrammeni > 0) { + while (destIndex >= 0 && numYpogegrammeni > 0) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); --numYpogegrammeni; } + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } else { const UChar *s; UChar32 c2 = 0; @@ -517,6 +556,10 @@ int32_t toUpper(const UCaseMap *csm, dest[destIndex++]=(uint8_t)c2; } else { destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } } i = nextIndex; @@ -595,6 +638,10 @@ utf8_foldCase(const UCaseProps *csp, dest[destIndex++]=(uint8_t)c2; } else { destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } } diff --git a/icu4c/source/common/ucharstriebuilder.cpp b/icu4c/source/common/ucharstriebuilder.cpp index 339c82b5d59..412a58a45d6 100644 --- a/icu4c/source/common/ucharstriebuilder.cpp +++ b/icu4c/source/common/ucharstriebuilder.cpp @@ -117,7 +117,7 @@ UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorC return *this; } if(elementsLength>0) { - uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharsTrieElement)); + uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(UCharsTrieElement)); } delete[] elements; elements=newElements; diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index 7a985c2ee98..527f86f140b 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -1063,7 +1063,7 @@ _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { length=(int32_t)(pArgs->sourceLimit-pArgs->source); if(length>0) { - uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); + u_memcpy(cnv->preFromU, pArgs->source, length); cnv->preFromULength=(int8_t)-length; } @@ -2745,7 +2745,7 @@ ucnv_getInvalidUChars (const UConverter * converter, } if ((*len = converter->invalidUCharLength) > 0) { - uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); + u_memcpy (errChars, converter->invalidUCharBuffer, *len); } } diff --git a/icu4c/source/common/ucnv_ext.cpp b/icu4c/source/common/ucnv_ext.cpp index 2103515b8df..f8605187240 100644 --- a/icu4c/source/common/ucnv_ext.cpp +++ b/icu4c/source/common/ucnv_ext.cpp @@ -885,7 +885,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv, } else { /* the match did not use all of preFromU[] - keep the rest for replay */ int32_t length=cnv->preFromULength-match; - uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR); + u_memmove(cnv->preFromU, cnv->preFromU+match, length); cnv->preFromULength=(int8_t)-length; } diff --git a/icu4c/source/common/ucnv_io.cpp b/icu4c/source/common/ucnv_io.cpp index 7aeeb9e42e2..9c3a5a87f20 100644 --- a/icu4c/source/common/ucnv_io.cpp +++ b/icu4c/source/common/ucnv_io.cpp @@ -1299,13 +1299,13 @@ ucnv_swapAliases(const UDataSwapper *ds, oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); } - uprv_memcpy(q, r, 2*count); + uprv_memcpy(q, r, 2*(size_t)count); for(i=0; iswapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); } - uprv_memcpy(q2, r, 2*count); + uprv_memcpy(q2, r, 2*(size_t)count); } } diff --git a/icu4c/source/common/ucurr.cpp b/icu4c/source/common/ucurr.cpp index 83f153bcd8f..c17a26106f4 100644 --- a/icu4c/source/common/ucurr.cpp +++ b/icu4c/source/common/ucurr.cpp @@ -416,7 +416,7 @@ struct CReg : public icu::UMemory { } uprv_strncpy(id, _id, len); id[len] = 0; - uprv_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH * sizeof(const UChar)); + u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH); iso[ISO_CURRENCY_CODE_LENGTH] = 0; } @@ -940,7 +940,7 @@ toUpperCase(const UChar* source, int32_t len, const char* locale) { dest = (UChar*)uprv_malloc(sizeof(UChar) * MAX(destLen, len)); u_strToUpper(dest, destLen, source, len, locale, &ec); if (U_FAILURE(ec)) { - uprv_memcpy(dest, source, sizeof(UChar) * len); + u_memcpy(dest, source, len); } return dest; } diff --git a/icu4c/source/common/uidna.cpp b/icu4c/source/common/uidna.cpp index a9ac91b72f1..e01c9aea603 100644 --- a/icu4c/source/common/uidna.cpp +++ b/icu4c/source/common/uidna.cpp @@ -318,7 +318,7 @@ _internal_toASCII(const UChar* src, int32_t srcLength, // Step 4: if the source is ASCII then proceed to step 8 if(srcIsASCII){ if(b1Len <= destCapacity){ - uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); + u_memmove(dest, b1, b1Len); reqLength = b1Len; }else{ reqLength = b1Len; @@ -364,9 +364,9 @@ _internal_toASCII(const UChar* src, int32_t srcLength, goto CLEANUP; } //Step 7: prepend the ACE prefix - uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); + u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); //Step 6: copy the contents in b2 into dest - uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); + u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); }else{ *status = U_IDNA_ACE_PREFIX_ERROR; @@ -543,7 +543,7 @@ _internal_toUnicode(const UChar* src, int32_t srcLength, //step 8: return output of step 5 reqLength = b2Len; if(b2Len <= destCapacity) { - uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); + u_memmove(dest, b2, b2Len); } } else{ @@ -572,7 +572,7 @@ _internal_toUnicode(const UChar* src, int32_t srcLength, // just return the source //copy the source to destination if(srcLength <= destCapacity){ - uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); + u_memmove(dest, src, srcLength); } reqLength = srcLength; } @@ -599,7 +599,7 @@ CLEANUP: if(dest && srcLength <= destCapacity){ // srcLength should have already been set earlier. U_ASSERT(srcLength >= 0); - uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); + u_memmove(dest, src, srcLength); } reqLength = srcLength; *status = U_ZERO_ERROR; diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 96575936670..06ab5c08228 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -3619,7 +3619,6 @@ private: */ US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) - kGrowSize=128, // grow size for this buffer kInvalidHashCode=0, // invalid hash code kEmptyHashCode=1, // hash code for empty string diff --git a/icu4c/source/common/uniset.cpp b/icu4c/source/common/uniset.cpp index c158e4434da..7866719a171 100644 --- a/icu4c/source/common/uniset.cpp +++ b/icu4c/source/common/uniset.cpp @@ -236,7 +236,7 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : if(list!=NULL){ // *this = o except for bmpSet and stringSpan len = o.len; - uprv_memcpy(list, o.list, len*sizeof(UChar32)); + uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); if (strings != NULL && o.strings != NULL) { strings->assign(*o.strings, cloneUnicodeString, status); } else { // Invalid strings. @@ -288,7 +288,7 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { return *this; // There is no way to report this error :-( } len = o.len; - uprv_memcpy(list, o.list, len*sizeof(UChar32)); + uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); if (o.bmpSet == NULL) { bmpSet = NULL; } else { @@ -1255,14 +1255,14 @@ UnicodeSet& UnicodeSet::complement(void) { if (U_FAILURE(status)) { return *this; } - uprv_memcpy(buffer, list + 1, (len-1)*sizeof(UChar32)); + uprv_memcpy(buffer, list + 1, (size_t)(len-1)*sizeof(UChar32)); --len; } else { ensureBufferCapacity(len+1, status); if (U_FAILURE(status)) { return *this; } - uprv_memcpy(buffer + 1, list, len*sizeof(UChar32)); + uprv_memcpy(buffer + 1, list, (size_t)len*sizeof(UChar32)); buffer[0] = UNICODESET_LOW; ++len; } diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index b88ea77c4e8..bdd58ecd267 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -84,7 +84,7 @@ us_arrayCopy(const UChar *src, int32_t srcStart, UChar *dst, int32_t dstStart, int32_t count) { if(count>0) { - uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); + uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src)); } } @@ -153,41 +153,39 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { if(count <= 0 || (uint32_t)c > 0x10ffff) { // just allocate and do not do anything else allocate(capacity); - } else { - // count > 0, allocate and fill the new string with count c's - int32_t unitCount = U16_LENGTH(c), length = count * unitCount; + } else if(c <= 0xffff) { + int32_t length = count; if(capacity < length) { capacity = length; } if(allocate(capacity)) { UChar *array = getArrayStart(); - int32_t i = 0; - - // fill the new string with c - if(unitCount == 1) { - // fill with length UChars - while(i < length) { - array[i++] = (UChar)c; - } - } else { - // get the code units for c - UChar units[U16_MAX_LENGTH]; - U16_APPEND_UNSAFE(units, i, c); - - // now it must be i==unitCount - i = 0; - - // for Unicode, unitCount can only be 1, 2, 3, or 4 - // 1 is handled above - while(i < length) { - int32_t unitIdx = 0; - while(unitIdx < unitCount) { - array[i++]=units[unitIdx++]; - } - } + UChar unit = (UChar)c; + for(int32_t i = 0; i < length; ++i) { + array[i] = unit; } + setLength(length); + } + } else { // supplementary code point, write surrogate pairs + if(count > (INT32_MAX / 2)) { + // We would get more than 2G UChars. + allocate(capacity); + return; + } + int32_t length = count * 2; + if(capacity < length) { + capacity = length; + } + if(allocate(capacity)) { + UChar *array = getArrayStart(); + UChar lead = U16_LEAD(c); + UChar trail = U16_TRAIL(c); + for(int32_t i = 0; i < length; i += 2) { + array[i] = lead; + array[i + 1] = trail; + } + setLength(length); } - setLength(length); } } @@ -344,33 +342,60 @@ UnicodeString::clone() const { // array allocation //======================================== +namespace { + +const int32_t kGrowSize = 128; + +// The number of bytes for one int32_t reference counter and capacity UChars +// must fit into a 32-bit size_t (at least when on a 32-bit platform). +// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(), +// and round up to a multiple of 16 bytes. +// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5. +// (With more complicated checks we could go up to 0x7ffffffd without rounding up, +// but that does not seem worth it.) +const int32_t kMaxCapacity = 0x7ffffff5; + +int32_t getGrowCapacity(int32_t newLength) { + int32_t growSize = (newLength >> 2) + kGrowSize; + if(growSize <= (kMaxCapacity - newLength)) { + return newLength + growSize; + } else { + return kMaxCapacity; + } +} + +} // namespace + UBool UnicodeString::allocate(int32_t capacity) { if(capacity <= US_STACKBUF_SIZE) { fUnion.fFields.fLengthAndFlags = kShortString; - } else { - // count bytes for the refCounter and the string capacity, and - // round up to a multiple of 16; then divide by 4 and allocate int32_t's - // to be safely aligned for the refCount - // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() - int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); - int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); - if(array != 0) { + return TRUE; + } + if(capacity <= kMaxCapacity) { + ++capacity; // for the NUL + // Switch to size_t which is unsigned so that we can allocate up to 4GB. + // Reference counter + UChars. + size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR; + // Round up to a multiple of 16. + numBytes = (numBytes + 15) & ~15; + int32_t *array = (int32_t *) uprv_malloc(numBytes); + if(array != NULL) { // set initial refCount and point behind the refCount *array++ = 1; + numBytes -= sizeof(int32_t); // have fArray point to the first UChar fUnion.fFields.fArray = (UChar *)array; - fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); + fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR); fUnion.fFields.fLengthAndFlags = kLongString; - } else { - fUnion.fFields.fLengthAndFlags = kIsBogus; - fUnion.fFields.fArray = 0; - fUnion.fFields.fCapacity = 0; - return FALSE; + return TRUE; } } - return TRUE; + fUnion.fFields.fLengthAndFlags = kIsBogus; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; + return FALSE; } //======================================== @@ -528,7 +553,7 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { // src is a writable alias; we make a copy of that instead int32_t srcLength = src.length(); if(allocate(srcLength)) { - uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); + u_memcpy(getArrayStart(), src.getArrayStart(), srcLength); setLength(srcLength); break; } @@ -857,7 +882,7 @@ UnicodeString::extract(UChar *dest, int32_t destCapacity, } else { const UChar *array = getArrayStart(); if(len>0 && len<=destCapacity && array!=dest) { - uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); + u_memcpy(dest, array, len); } return u_terminateUChars(dest, destCapacity, len, &errorCode); } @@ -1219,7 +1244,7 @@ UnicodeString::getTerminatedBuffer() { return array; } } - if(cloneArrayIfNeeded(len+1)) { + if(len (INT32_MAX - newLength)) { + setToBogus(); + return *this; + } + newLength += srcLength; // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents; // therefore we need to keep the current fArray @@ -1448,7 +1479,7 @@ UnicodeString::doReplace(int32_t start, // clone our array and allocate a bigger array if needed int32_t *bufferToDelete = 0; - if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize, + if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength), FALSE, &bufferToDelete) ) { return *this; @@ -1515,7 +1546,7 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng int32_t newLength = oldLength + srcLength; // optimize append() onto a large-enough, owned string if((newLength <= getCapacity() && isBufferWritable()) || - cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize)) { + cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { UChar *newArray = getArrayStart(); // Do not copy characters when // UChar *buffer=str.getAppendBuffer(...); @@ -1863,7 +1894,9 @@ UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, return NULL; } int32_t oldLength = str.length(); - if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { + if(minCapacity <= (kMaxCapacity - oldLength) && + desiredCapacityHint <= (kMaxCapacity - oldLength) && + str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { *resultCapacity = str.getCapacity() - oldLength; return str.getArrayStart() + oldLength; } diff --git a/icu4c/source/common/ushape.cpp b/icu4c/source/common/ushape.cpp index 1bd1ab2a4ff..8420cc95c60 100644 --- a/icu4c/source/common/ushape.cpp +++ b/icu4c/source/common/ushape.cpp @@ -779,7 +779,7 @@ handleGeneratedSpaces(UChar *dest, int32_t sourceLength, count--; } - uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); + u_memcpy(dest, tempbuffer, sourceLength); destSize = u_strlen(dest); } @@ -837,7 +837,7 @@ handleGeneratedSpaces(UChar *dest, int32_t sourceLength, tempbuffer[i] = SPACE_CHAR; } - uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); + u_memcpy(dest, tempbuffer, sourceLength); destSize = sourceLength; } @@ -879,7 +879,7 @@ handleGeneratedSpaces(UChar *dest, int32_t sourceLength, count--; } - uprv_memcpy(dest,tempbuffer, sourceLength*U_SIZEOF_UCHAR); + u_memcpy(dest, tempbuffer, sourceLength); destSize = sourceLength; } @@ -943,7 +943,7 @@ expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UE i--; j--; } - uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); + u_memcpy(dest, tempbuffer, sourceLength); uprv_free(tempbuffer); @@ -1004,14 +1004,14 @@ expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErr } if(countr > 0) { - uprv_memmove(tempbuffer, tempbuffer+countr, sourceLength*U_SIZEOF_UCHAR); + u_memmove(tempbuffer, tempbuffer+countr, sourceLength); if(u_strlen(tempbuffer) < sourceLength) { for(i=sourceLength-1;i>=sourceLength-countr;i--) { tempbuffer[i] = SPACE_CHAR; } } } - uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); + u_memcpy(dest, tempbuffer, sourceLength); uprv_free(tempbuffer); @@ -1179,7 +1179,7 @@ expandCompositChar(UChar *dest, int32_t sourceLength, j++; } - uprv_memcpy(dest, tempbuffer, destSize*U_SIZEOF_UCHAR); + u_memcpy(dest, tempbuffer, destSize); } } @@ -1573,7 +1573,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength, return 0; } } - uprv_memcpy(tempbuffer, source, sourceLength*U_SIZEOF_UCHAR); + u_memcpy(tempbuffer, source, sourceLength); if (tempsource != NULL){ uprv_free(tempsource); } @@ -1639,7 +1639,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength, countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); } - uprv_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)*U_SIZEOF_UCHAR); + u_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)); if(tempbuffer!=buffer) { uprv_free(tempbuffer); @@ -1661,7 +1661,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength, *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return sourceLength; } - uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR); + u_memcpy(dest, source, sourceLength); destLength=sourceLength; } diff --git a/icu4c/source/common/ustr_wcs.cpp b/icu4c/source/common/ustr_wcs.cpp index bf8607d4416..59a58692ec3 100644 --- a/icu4c/source/common/ustr_wcs.cpp +++ b/icu4c/source/common/ustr_wcs.cpp @@ -44,7 +44,7 @@ u_growAnyBufferFromStatic(void *context, char *newBuffer=(char *)uprv_malloc(reqCapacity*size); if(newBuffer!=NULL) { if(length>0) { - uprv_memcpy(newBuffer, *pBuffer, length*size); + uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); } *pCapacity=reqCapacity; } else { @@ -202,7 +202,7 @@ _strToWCS(wchar_t *dest, count = (int32_t)(pIntTarget-intTarget); if(0 < count && count <= destCapacity){ - uprv_memcpy(dest,intTarget,count*sizeof(wchar_t)); + uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); } if(pDestLength){ @@ -254,7 +254,7 @@ u_strToWCS(wchar_t *dest, srcLength = u_strlen(src); } if(0 < srcLength && srcLength <= destCapacity){ - uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); + u_memcpy(dest, src, srcLength); } if(pDestLength){ *pDestLength = srcLength; @@ -397,7 +397,7 @@ _strFromWCS( UChar *dest, } if(nulLen>0){ /* copy the contents to tempStack */ - uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t)); + uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); } /* null terminate the tempBuffer */ @@ -509,7 +509,7 @@ u_strFromWCS(UChar *dest, srcLength = u_strlen((const UChar *)src); } if(0 < srcLength && srcLength <= destCapacity){ - uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); + u_memcpy(dest, src, srcLength); } if(pDestLength){ *pDestLength = srcLength; diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index fd5d2d6b03b..0c184e48737 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -47,24 +47,27 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, if(result<0) { /* (not) original code point */ c=~result; - length=-1; + length=U16_LENGTH(c); } else if(result<=UCASE_MAX_STRING_LENGTH) { c=U_SENTINEL; length=result; } else { c=result; - length=-1; + length=U16_LENGTH(c); + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow } if(destIndex=0) { /* code point */ UBool isError=FALSE; U16_APPEND(dest, destIndex, destCapacity, c, isError); if(isError) { /* overflow, nothing written */ - destIndex+=U16_LENGTH(c); + destIndex+=length; } } else { /* string */ @@ -80,11 +83,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, } } else { /* preflight */ - if(length<0) { - destIndex+=U16_LENGTH(c); - } else { - destIndex+=length; - } + destIndex+=length; } return destIndex; } @@ -93,6 +92,8 @@ static inline int32_t appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { if(destIndex0) { if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); + u_memcpy(dest+destIndex, src+prev, length); } destIndex+=length; } @@ -258,15 +263,22 @@ ustrcase_internalToTitle(const UCaseMap *csm, csc.cpLimit=titleLimit; c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } /* Special case Dutch IJ titlecasing */ - if ( titleStart+1 < idx && - ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && - ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && - ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { - c=(UChar32) 0x004A; - destIndex=appendResult(dest, destIndex, destCapacity, c, s); - titleLimit++; + if (titleStart+1 < idx && + ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && + (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && + (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + titleLimit++; } /* lowercase [titleLimit..index[ */ @@ -280,11 +292,18 @@ ustrcase_internalToTitle(const UCaseMap *csm, src, &csc, titleLimit, idx, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return destIndex; + } } else { /* Optionally just copy the rest of the word unchanged. */ length=idx-titleLimit; + if(length>(INT32_MAX-destIndex)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); + u_memcpy(dest+destIndex, src+titleLimit, length); } destIndex+=length; } @@ -860,16 +879,20 @@ int32_t toUpper(const UCaseMap *csm, } } destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper); - if ((data & HAS_EITHER_DIALYTIKA) != 0) { + if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika } - if (addTonos) { + if (destIndex >= 0 && addTonos) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); } - while (numYpogegrammeni > 0) { + while (destIndex >= 0 && numYpogegrammeni > 0) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); --numYpogegrammeni; } + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } else { const UChar *s; UChar32 c2 = 0; @@ -879,6 +902,10 @@ int32_t toUpper(const UCaseMap *csm, dest[destIndex++]=(UChar)c2; } else { destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } } i = nextIndex; @@ -951,6 +978,10 @@ ustr_foldCase(const UCaseProps *csp, dest[destIndex++]=(UChar)c2; } else { destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } } @@ -1024,7 +1055,7 @@ ustrcase_map(const UCaseMap *csm, if(destLength>0) { int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; if(copyLength>0) { - uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); + u_memmove(dest, temp, copyLength); } } if(temp!=buffer) { diff --git a/icu4c/source/common/ustring.cpp b/icu4c/source/common/ustring.cpp index 8813c569ec6..aae7f5c36d3 100644 --- a/icu4c/source/common/ustring.cpp +++ b/icu4c/source/common/ustring.cpp @@ -1115,7 +1115,7 @@ u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { U_CAPI UChar * U_EXPORT2 u_memcpy(UChar *dest, const UChar *src, int32_t count) { if(count > 0) { - uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR); + uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR); } return dest; } @@ -1123,7 +1123,7 @@ u_memcpy(UChar *dest, const UChar *src, int32_t count) { U_CAPI UChar * U_EXPORT2 u_memmove(UChar *dest, const UChar *src, int32_t count) { if(count > 0) { - uprv_memmove(dest, src, count*U_SIZEOF_UCHAR); + uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR); } return dest; } diff --git a/icu4c/source/common/utrie.cpp b/icu4c/source/common/utrie.cpp index aa697d04660..478560df66e 100644 --- a/icu4c/source/common/utrie.cpp +++ b/icu4c/source/common/utrie.cpp @@ -141,7 +141,7 @@ utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_ uprv_free(aliasData); } else { uprv_memcpy(trie->index, other->index, sizeof(trie->index)); - uprv_memcpy(trie->data, other->data, other->dataLength*4); + uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); trie->dataLength=other->dataLength; trie->isDataAllocated=isDataAllocated; } @@ -841,7 +841,7 @@ utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity, } /* write 32-bit data values */ - uprv_memcpy(dest16, trie->data, 4*trie->dataLength); + uprv_memcpy(dest16, trie->data, 4*(size_t)trie->dataLength); } return length; diff --git a/icu4c/source/common/utrie2_builder.cpp b/icu4c/source/common/utrie2_builder.cpp index dc3d55aaf8d..664051c5f9b 100644 --- a/icu4c/source/common/utrie2_builder.cpp +++ b/icu4c/source/common/utrie2_builder.cpp @@ -250,11 +250,11 @@ cloneBuilder(const UNewTrie2 *other) { /* clone data */ uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1)); - uprv_memcpy(trie->index2, other->index2, other->index2Length*4); + uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4); trie->index2NullOffset=other->index2NullOffset; trie->index2Length=other->index2Length; - uprv_memcpy(trie->data, other->data, other->dataLength*4); + uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); trie->dataNullOffset=other->dataNullOffset; trie->dataLength=other->dataLength; @@ -262,7 +262,7 @@ cloneBuilder(const UNewTrie2 *other) { if(other->isCompacted) { trie->firstFreeBlock=0; } else { - uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4); + uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4); trie->firstFreeBlock=other->firstFreeBlock; } @@ -542,7 +542,7 @@ allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { if(data==NULL) { return -1; } - uprv_memcpy(data, trie->data, trie->dataLength*4); + uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4); uprv_free(trie->data); trie->data=data; trie->dataCapacity=capacity; @@ -1404,7 +1404,7 @@ utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { /* write 32-bit data values */ trie->data16=NULL; trie->data32=(uint32_t *)dest16; - uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4); + uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4); break; default: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp index 86822d46458..36a07d9a975 100644 --- a/icu4c/source/i18n/rbt_rule.cpp +++ b/icu4c/source/i18n/rbt_rule.cpp @@ -176,7 +176,7 @@ TransliterationRule::TransliterationRule(TransliterationRule& other) : segmentsCount = 0; if (other.segmentsCount > 0) { segments = (UnicodeFunctor **)uprv_malloc(other.segmentsCount * sizeof(UnicodeFunctor *)); - uprv_memcpy(segments, other.segments, other.segmentsCount*sizeof(segments[0])); + uprv_memcpy(segments, other.segments, (size_t)other.segmentsCount*sizeof(segments[0])); } if (other.anteContext != NULL) { diff --git a/icu4c/source/i18n/reldtfmt.cpp b/icu4c/source/i18n/reldtfmt.cpp index 5a46c19bc87..e620296a56a 100644 --- a/icu4c/source/i18n/reldtfmt.cpp +++ b/icu4c/source/i18n/reldtfmt.cpp @@ -57,8 +57,8 @@ RelativeDateFormat::RelativeDateFormat(const RelativeDateFormat& other) : fCombinedFormat = new SimpleFormatter(*other.fCombinedFormat); } if (fDatesLen > 0) { - fDates = (URelativeString*) uprv_malloc(sizeof(fDates[0])*fDatesLen); - uprv_memcpy(fDates, other.fDates, sizeof(fDates[0])*fDatesLen); + fDates = (URelativeString*) uprv_malloc(sizeof(fDates[0])*(size_t)fDatesLen); + uprv_memcpy(fDates, other.fDates, sizeof(fDates[0])*(size_t)fDatesLen); } #if !UCONFIG_NO_BREAK_ITERATION if (other.fCapitalizationBrkIter != NULL) { diff --git a/icu4c/source/i18n/ucoleitr.cpp b/icu4c/source/i18n/ucoleitr.cpp index 6fba5441968..9c73afda19b 100644 --- a/icu4c/source/i18n/ucoleitr.cpp +++ b/icu4c/source/i18n/ucoleitr.cpp @@ -36,9 +36,9 @@ U_NAMESPACE_USE #define DEFAULT_BUFFER_SIZE 16 #define BUFFER_GROW 8 -#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) +#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0]) -#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) +#define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type)) #define DELETE_ARRAY(array) uprv_free((void *) (array)) diff --git a/icu4c/source/i18n/unesctrn.cpp b/icu4c/source/i18n/unesctrn.cpp index ec34d56b024..88a78d6e325 100644 --- a/icu4c/source/i18n/unesctrn.cpp +++ b/icu4c/source/i18n/unesctrn.cpp @@ -87,7 +87,7 @@ static UChar* copySpec(const UChar* spec) { UChar *result = (UChar *)uprv_malloc(len*sizeof(UChar)); // Check for memory allocation error. if (result != NULL) { - uprv_memcpy(result, spec, len*sizeof(result[0])); + uprv_memcpy(result, spec, (size_t)len*sizeof(result[0])); } return result; } diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp index 6e81290dfcf..4bb3fc40315 100644 --- a/icu4c/source/i18n/usearch.cpp +++ b/icu4c/source/i18n/usearch.cpp @@ -224,7 +224,7 @@ inline int32_t * addTouint32_tArray(int32_t *destination, if (U_FAILURE(*status)) { return NULL; } - uprv_memcpy(temp, destination, sizeof(int32_t) * offset); + uprv_memcpy(temp, destination, sizeof(int32_t) * (size_t)offset); *destinationlength = newlength; destination = temp; } @@ -266,7 +266,7 @@ inline int64_t * addTouint64_tArray(int64_t *destination, return NULL; } - uprv_memcpy(temp, destination, sizeof(int64_t) * offset); + uprv_memcpy(temp, destination, sizeof(int64_t) * (size_t)offset); *destinationlength = newlength; destination = temp; } @@ -1381,7 +1381,7 @@ inline UChar * addToUCharArray( UChar *destination, } } if (source1length != 0) { - uprv_memcpy(destination, source1, sizeof(UChar) * source1length); + u_memcpy(destination, source1, source1length); } if (source2length != 0) { uprv_memcpy(destination + source1length, source2, diff --git a/icu4c/source/io/ustdio.c b/icu4c/source/io/ustdio.c index d2bac6fc4d1..1c2225a9423 100644 --- a/icu4c/source/io/ustdio.c +++ b/icu4c/source/io/ustdio.c @@ -430,7 +430,7 @@ ufile_fill_uchar_buffer(UFILE *f) /* shift the buffer if it isn't empty */ if(dataSize != 0) { - uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */ + u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ } diff --git a/icu4c/source/test/intltest/colldata.cpp b/icu4c/source/test/intltest/colldata.cpp index 0603a6bbb11..89e7a6591cc 100644 --- a/icu4c/source/test/intltest/colldata.cpp +++ b/icu4c/source/test/intltest/colldata.cpp @@ -34,9 +34,9 @@ #include "colldata.h" -#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) +#define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type)) #define DELETE_ARRAY(array) uprv_free((void *) (array)) -#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) +#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0]) CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) diff --git a/icu4c/source/test/intltest/idnaref.cpp b/icu4c/source/test/intltest/idnaref.cpp index 1e6d65370c8..454102c8186 100644 --- a/icu4c/source/test/intltest/idnaref.cpp +++ b/icu4c/source/test/intltest/idnaref.cpp @@ -341,7 +341,7 @@ idnaref_toASCII(const UChar* src, int32_t srcLength, } if(srcIsASCII){ if(b1Len <= destCapacity){ - uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); + u_memmove(dest, b1, b1Len); reqLength = b1Len; }else{ reqLength = b1Len; @@ -382,9 +382,9 @@ idnaref_toASCII(const UChar* src, int32_t srcLength, goto CLEANUP; } //Step 7: prepend the ACE prefix - uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); + u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); //Step 6: copy the contents in b2 into dest - uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); + u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); }else{ *status = U_IDNA_ACE_PREFIX_ERROR; @@ -507,7 +507,7 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, // copy everything to b1 if(srcLength < b1Capacity){ - uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); + u_memmove(b1, src, srcLength); }else{ /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); @@ -515,7 +515,7 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } - uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); + u_memmove(b1, src, srcLength); } b1Len = srcLength; } @@ -577,7 +577,7 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, //step 8: return output of step 5 reqLength = b2Len; if(b2Len <= destCapacity) { - uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); + u_memmove(dest, b2, b2Len); } }else{ // verify that STD3 ASCII rules are satisfied @@ -603,7 +603,7 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, } //copy the source to destination if(srcLength <= destCapacity){ - uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); + u_memmove(dest, src, srcLength); } reqLength = srcLength; } @@ -628,9 +628,9 @@ CLEANUP: //copy the source to destination if(dest && srcLength <= destCapacity){ if(srcLength == -1) { - uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); + u_memmove(dest, src, u_strlen(src)); } else { - uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); + u_memmove(dest, src, srcLength); } } reqLength = srcLength; @@ -752,7 +752,7 @@ idnaref_IDNToASCII( const UChar* src, int32_t srcLength, int32_t tempLen = (reqLength + b1Len ); // copy to dest if( tempLen< destCapacity){ - uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); + u_memmove(dest+reqLength, b1, b1Len); } reqLength = tempLen; @@ -801,7 +801,7 @@ idnaref_IDNToASCII( const UChar* src, int32_t srcLength, int32_t tempLen = (reqLength + b1Len ); // copy to dest if( tempLen< destCapacity){ - uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); + u_memmove(dest+reqLength, b1, b1Len); } reqLength = tempLen; @@ -906,7 +906,7 @@ idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, int32_t tempLen = (reqLength + b1Len ); // copy to dest if( tempLen< destCapacity){ - uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); + u_memmove(dest+reqLength, b1, b1Len); } reqLength = tempLen; @@ -958,7 +958,7 @@ idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, int32_t tempLen = (reqLength + b1Len ); // copy to dest if( tempLen< destCapacity){ - uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); + u_memmove(dest+reqLength, b1, b1Len); } reqLength = tempLen; diff --git a/icu4c/source/test/intltest/nptrans.cpp b/icu4c/source/test/intltest/nptrans.cpp index 8c2bd81f4cf..db530f2b6e1 100644 --- a/icu4c/source/test/intltest/nptrans.cpp +++ b/icu4c/source/test/intltest/nptrans.cpp @@ -174,7 +174,7 @@ int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength, } // check if there is enough room in the output if(bufLen < destCapacity){ - uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR); + u_memcpy(dest, buffer, bufLen); } return u_terminateUChars(dest, destCapacity, bufLen, &status); @@ -266,7 +266,7 @@ int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, } if(b1Len <= destCapacity){ - uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR); + u_memmove(dest, b1, b1Len); } CLEANUP: diff --git a/icu4c/source/test/intltest/strcase.cpp b/icu4c/source/test/intltest/strcase.cpp index b0e617b1dd7..af79f68e49a 100644 --- a/icu4c/source/test/intltest/strcase.cpp +++ b/icu4c/source/test/intltest/strcase.cpp @@ -47,6 +47,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha #endif TESTCASE_AUTO(TestFullCaseFoldingIterator); TESTCASE_AUTO(TestGreekUpper); + TESTCASE_AUTO(TestLongUpper); TESTCASE_AUTO_END; } @@ -687,3 +688,22 @@ StringCaseTest::TestGreekUpper() { assertGreekUpper("\\u03C1\\u03C9\\u03BC\\u03AD\\u03B9\\u03BA\\u03B1", "\\u03A1\\u03A9\\u039C\\u0395\\u03AA\\u039A\\u0391"); } + +void +StringCaseTest::TestLongUpper() { + // Ticket #12663, crash with an extremely long string where + // U+0390 maps to 0399 0308 0301 so that the result is three times as long + // and overflows an int32_t. + int32_t length = 0x40000004; // more than 1G UChars + UnicodeString s(length, (UChar32)0x390, length); + UnicodeString result; + IcuTestErrorCode errorCode(*this, "TestLongUpper"); + UChar *dest = result.getBuffer(length + 1); + int32_t destLength = u_strToUpper(dest, result.getCapacity(), + s.getBuffer(), s.length(), "", errorCode); + result.releaseBuffer(destLength); + if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) { + errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)", + errorCode.errorName(), (long)destLength); + } +} diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h index 6598f03aa06..8dfa750ac22 100644 --- a/icu4c/source/test/intltest/ustrtest.h +++ b/icu4c/source/test/intltest/ustrtest.h @@ -110,6 +110,7 @@ public: void TestCasing(); void TestFullCaseFoldingIterator(); void TestGreekUpper(); + void TestLongUpper(); private: void assertGreekUpper(const char *s, const char *expected); diff --git a/icu4c/source/tools/genrb/ustr.c b/icu4c/source/tools/genrb/ustr.c index ee052aed9af..07e9b153814 100644 --- a/icu4c/source/tools/genrb/ustr.c +++ b/icu4c/source/tools/genrb/ustr.c @@ -94,7 +94,7 @@ ustr_cpy(struct UString *dst, if(src->fChars == NULL || dst->fChars == NULL){ return; } - uprv_memcpy(dst->fChars, src->fChars, sizeof(UChar) * src->fLength); + u_memcpy(dst->fChars, src->fChars, src->fLength); dst->fLength = src->fLength; dst->fChars[dst->fLength] = 0x0000; } diff --git a/icu4c/source/tools/gensprep/store.c b/icu4c/source/tools/gensprep/store.c index eb1d86b96ba..ea7ee70f86b 100644 --- a/icu4c/source/tools/gensprep/store.c +++ b/icu4c/source/tools/gensprep/store.c @@ -328,7 +328,7 @@ storeMappingData(){ mappingData[currentIndex++] = (uint16_t) mappingLength; } /* copy the contents to mappindData array */ - uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); + u_memmove(mappingData+currentIndex, value->mapping, value->length); currentIndex += value->length; if (currentIndex > mappingDataCapacity) { /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */ diff --git a/icu4c/source/tools/toolutil/package.cpp b/icu4c/source/tools/toolutil/package.cpp index e1526686e2b..a5835e9618c 100644 --- a/icu4c/source/tools/toolutil/package.cpp +++ b/icu4c/source/tools/toolutil/package.cpp @@ -1294,7 +1294,7 @@ void Package::setItemCapacity(int32_t max) exit(U_MEMORY_ALLOCATION_ERROR); } if(items && itemCount>0) { - uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); + uprv_memcpy(newItems, items, (size_t)itemCount*sizeof(items[0])); } itemMax = max; items = newItems; diff --git a/icu4c/source/tools/toolutil/swapimpl.cpp b/icu4c/source/tools/toolutil/swapimpl.cpp index 5e2d3bf97a8..6cc21623016 100644 --- a/icu4c/source/tools/toolutil/swapimpl.cpp +++ b/icu4c/source/tools/toolutil/swapimpl.cpp @@ -232,7 +232,7 @@ uprops_swap(const UDataSwapper *ds, /* copy everything for inaccessible data (padding) */ if(inData32!=outData32) { - uprv_memcpy(outData32, inData32, 4*dataTop); + uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); } /* swap the indexes[16] */ diff --git a/icu4c/source/tools/toolutil/toolutil.cpp b/icu4c/source/tools/toolutil/toolutil.cpp index 8f851c36402..6afce5bb20c 100644 --- a/icu4c/source/tools/toolutil/toolutil.cpp +++ b/icu4c/source/tools/toolutil/toolutil.cpp @@ -314,7 +314,7 @@ utm_hasCapacity(UToolMemory *mem, int32_t capacity) { if(mem->array==mem->staticArray) { mem->array=uprv_malloc(newCapacity*mem->size); if(mem->array!=NULL) { - uprv_memcpy(mem->array, mem->staticArray, mem->idx*mem->size); + uprv_memcpy(mem->array, mem->staticArray, (size_t)mem->idx*mem->size); } } else { mem->array=uprv_realloc(mem->array, newCapacity*mem->size); diff --git a/icu4c/source/tools/toolutil/ucm.c b/icu4c/source/tools/toolutil/ucm.c index 197e46a2720..afdcb92a954 100644 --- a/icu4c/source/tools/toolutil/ucm.c +++ b/icu4c/source/tools/toolutil/ucm.c @@ -969,7 +969,7 @@ ucm_addMapping(UCMTable *table, exit(U_MEMORY_ALLOCATION_ERROR); } - uprv_memcpy(table->codePoints+idx, codePoints, m->uLen*4); + uprv_memcpy(table->codePoints+idx, codePoints, (size_t)m->uLen*4); m->u=idx; }