From c8aa800735dc33bbc35784096a80a0c72812fdb5 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 21 Aug 2020 19:19:41 +0000 Subject: [PATCH] ICU-21035 add & use CharString::extract(dest buffer) See #1253 --- icu4c/source/common/charstr.cpp | 14 +++++++ icu4c/source/common/charstr.h | 16 ++++++++ icu4c/source/common/icuplug.cpp | 4 +- icu4c/source/common/loclikely.cpp | 13 +------ icu4c/source/common/uloc.cpp | 47 ++++-------------------- icu4c/source/common/uloc_tag.cpp | 2 +- icu4c/source/i18n/rulebasedcollator.cpp | 5 +-- icu4c/source/i18n/ucol_sit.cpp | 5 +-- icu4c/source/test/cintltst/cloctst.c | 7 +++- icu4c/source/test/intltest/strtest.cpp | 21 +++++++++++ icu4c/source/tools/toolutil/pkg_genc.cpp | 4 +- 11 files changed, 72 insertions(+), 66 deletions(-) diff --git a/icu4c/source/common/charstr.cpp b/icu4c/source/common/charstr.cpp index 4878c737fb5..318a185b3f1 100644 --- a/icu4c/source/common/charstr.cpp +++ b/icu4c/source/common/charstr.cpp @@ -20,6 +20,7 @@ #include "cmemory.h" #include "cstring.h" #include "uinvchar.h" +#include "ustr_imp.h" U_NAMESPACE_BEGIN @@ -46,6 +47,19 @@ char *CharString::cloneData(UErrorCode &errorCode) const { return p; } +int32_t CharString::extract(char *dest, int32_t capacity, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return len; } + if (capacity < 0 || (capacity > 0 && dest == nullptr)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return len; + } + const char *src = buffer.getAlias(); + if (0 < len && len <= capacity && src != dest) { + uprv_memcpy(dest, src, len); + } + return u_terminateChars(dest, capacity, len, &errorCode); +} + CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) { if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) { len=s.len; diff --git a/icu4c/source/common/charstr.h b/icu4c/source/common/charstr.h index 9400fbb5461..6619faac618 100644 --- a/icu4c/source/common/charstr.h +++ b/icu4c/source/common/charstr.h @@ -87,6 +87,22 @@ public: * The caller must uprv_free() the result. */ char *cloneData(UErrorCode &errorCode) const; + /** + * Copies the contents of the string into dest. + * Checks if there is enough space in dest, extracts the entire string if possible, + * and NUL-terminates dest if possible. + * + * If the string fits into dest but cannot be NUL-terminated (length()==capacity), + * then the error code is set to U_STRING_NOT_TERMINATED_WARNING. + * If the string itself does not fit into dest (length()>capacity), + * then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * + * @param dest Destination string buffer. + * @param capacity Size of the dest buffer (number of chars). + * @param errorCode ICU error code. + * @return length() + */ + int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const; bool operator==(StringPiece other) const { return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0); diff --git a/icu4c/source/common/icuplug.cpp b/icu4c/source/common/icuplug.cpp index c6439cc819a..303bd4406e2 100644 --- a/icu4c/source/common/icuplug.cpp +++ b/icu4c/source/common/icuplug.cpp @@ -782,8 +782,8 @@ uplug_init(UErrorCode *status) { /* plugin_file is not used for processing - it is only used so that uplug_getPluginFile() works (i.e. icuinfo) */ - uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file)); - + pluginFile.extract(plugin_file, sizeof(plugin_file), *status); + #if UPLUG_TRACE DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file))); #endif diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index ea0d98cd9e3..e87b06edcb4 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -464,18 +464,7 @@ parseTagString( goto error; } - { - icu::CharString result = ulocimp_getLanguage(position, &position, *err); - if (U_FAILURE(*err)) { - goto error; - } - - subtagLength = result.length(); - if (subtagLength <= *langLength) { - uprv_memcpy(lang, result.data(), subtagLength); - } - u_terminateChars(lang, *langLength, subtagLength, err); - } + subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err); /* * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index 94cbaab220e..45fab01917b 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -50,9 +50,6 @@ #include "uassert.h" #include "charstr.h" -#include -#include /* for sprintf */ - U_NAMESPACE_USE /* ### Declarations **************************************************/ @@ -892,7 +889,6 @@ uloc_setKeywordValue(const char* keywordName, char* startSearchHere = NULL; char* keywordStart = NULL; CharString updatedKeysAndValues; - int32_t updatedKeysAndValuesLen; UBool handledInputKeyAndValue = FALSE; char keyValuePrefix = '@'; @@ -1072,18 +1068,10 @@ uloc_setKeywordValue(const char* keywordName, return bufLen; } - updatedKeysAndValuesLen = updatedKeysAndValues.length(); - /* needLen = length of the part before '@' + length of updated key-value part including '@' */ - needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen; - if(needLen >= bufferCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - return needLen; /* no change */ - } - if (updatedKeysAndValuesLen > 0) { - uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen); - } - buffer[needLen]=0; - return needLen; + // needLen = length of the part before '@' + needLen = (int32_t)(startSearchHere - buffer); + return needLen + updatedKeysAndValues.extract( + startSearchHere, bufferCapacity - needLen, *status); } /* ### ID parsing implementation **************************************************/ @@ -1232,13 +1220,7 @@ ulocimp_getScript(const char *localeID, char *script, int32_t scriptCapacity, const char **pEnd) { ErrorCode status; - CharString result = ulocimp_getScript(localeID, pEnd, status); - if (status.isFailure()) { - return 0; - } - int32_t reslen = result.length(); - uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity)); - return reslen; + return ulocimp_getScript(localeID, pEnd, status).extract(script, scriptCapacity, status); } static CharString @@ -1281,13 +1263,7 @@ ulocimp_getCountry(const char *localeID, char *country, int32_t countryCapacity, const char **pEnd) { ErrorCode status; - CharString result = ulocimp_getCountry(localeID, pEnd, status); - if (status.isFailure()) { - return 0; - } - int32_t reslen = result.length(); - uprv_memcpy(country, result.data(), std::min(reslen, countryCapacity)); - return reslen; + return ulocimp_getCountry(localeID, pEnd, status).extract(country, countryCapacity, status); } /** @@ -1744,16 +1720,7 @@ uloc_getLanguage(const char* localeID, localeID=uloc_getDefault(); } - CharString result = ulocimp_getLanguage(localeID, NULL, *err); - if (U_FAILURE(*err)) { - return 0; - } - - int32_t reslen = result.length(); - if (reslen <= languageCapacity) { - uprv_memcpy(language, result.data(), reslen); - } - return u_terminateChars(language, languageCapacity, reslen, err); + return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err); } U_CAPI int32_t U_EXPORT2 diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp index 5eed02c6114..8faac32cb17 100644 --- a/icu4c/source/common/uloc_tag.cpp +++ b/icu4c/source/common/uloc_tag.cpp @@ -1413,7 +1413,7 @@ _appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool st break; } - uprv_strcpy(pExtBuf, bcpValue); + buf.extract(pExtBuf, resultCapacity, tmpStatus); T_CString_toLowerCase(pExtBuf); extBuf->append(pExtBuf, bcpValueLen, tmpStatus); diff --git a/icu4c/source/i18n/rulebasedcollator.cpp b/icu4c/source/i18n/rulebasedcollator.cpp index 60acf17815a..917482d65bb 100644 --- a/icu4c/source/i18n/rulebasedcollator.cpp +++ b/icu4c/source/i18n/rulebasedcollator.cpp @@ -1600,10 +1600,7 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale, appendSubtag(result, 'Z', subtag, length, errorCode); if(U_FAILURE(errorCode)) { return 0; } - if(result.length() <= capacity) { - uprv_memcpy(buffer, result.data(), result.length()); - } - return u_terminateChars(buffer, capacity, result.length(), &errorCode); + return result.extract(buffer, capacity, errorCode); } UBool diff --git a/icu4c/source/i18n/ucol_sit.cpp b/icu4c/source/i18n/ucol_sit.cpp index 92f332d6d06..4dc81aebcc9 100644 --- a/icu4c/source/i18n/ucol_sit.cpp +++ b/icu4c/source/i18n/ucol_sit.cpp @@ -372,10 +372,7 @@ int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, int32_t capacity, } len += s->entries[i].length(); } else { - len += s->entries[i].length(); - if(len < capacity) { - uprv_strncat(destination,s->entries[i].data(), s->entries[i].length()); - } + len += s->entries[i].extract(destination + len, capacity - len, *status); } } } diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c index 29f4c82ec7e..f87e518a59e 100644 --- a/icu4c/source/test/cintltst/cloctst.c +++ b/icu4c/source/test/cintltst/cloctst.c @@ -2233,7 +2233,12 @@ static void TestKeywordSetError(void) strcpy(buffer,kwSetTestCases[i].l); status = U_ZERO_ERROR; res = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, blen, &status); - if(status != U_BUFFER_OVERFLOW_ERROR) { + if(res == blen) { + if(status != U_STRING_NOT_TERMINATED_WARNING) { + log_err("expected not terminated warning on buffer %d got %s, len %d (%s + [%s=%s])\n", blen, u_errorName(status), res, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v); + return; + } + } else if(status != U_BUFFER_OVERFLOW_ERROR) { log_err("expected buffer overflow on buffer %d got %s, len %d (%s + [%s=%s])\n", blen, u_errorName(status), res, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v); return; } diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp index 1185d31c352..54b5a193008 100644 --- a/icu4c/source/test/intltest/strtest.cpp +++ b/icu4c/source/test/intltest/strtest.cpp @@ -810,6 +810,27 @@ StringTest::TestCharString() { "Long string over 40 characters to trigger heap allocation", s3.data()); } + + { + // extract() + errorCode.reset(); + CharString s("abc", errorCode); + char buffer[10]; + + s.extract(buffer, 10, errorCode); + assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get()); + assertEquals("abc.extract(10) output", "abc", buffer); + + strcpy(buffer, "012345"); + s.extract(buffer, 3, errorCode); + assertEquals("abc.extract(3) not terminated", + U_STRING_NOT_TERMINATED_WARNING, errorCode.reset()); + assertEquals("abc.extract(3) output", "abc345", buffer); + + strcpy(buffer, "012345"); + s.extract(buffer, 2, errorCode); + assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); + } } void diff --git a/icu4c/source/tools/toolutil/pkg_genc.cpp b/icu4c/source/tools/toolutil/pkg_genc.cpp index 82511288180..17347bac5d7 100644 --- a/icu4c/source/tools/toolutil/pkg_genc.cpp +++ b/icu4c/source/tools/toolutil/pkg_genc.cpp @@ -738,8 +738,8 @@ getOutFilename( exit(U_ILLEGAL_ARGUMENT_ERROR); } - uprv_strcpy(outFilename, outFilenameBuilder.data()); - uprv_strcpy(entryName, entryNameBuilder.data()); + outFilenameBuilder.extract(outFilename, outFilenameCapacity, status); + entryNameBuilder.extract(entryName, entryNameCapacity, status); } #ifdef CAN_GENERATE_OBJECTS