ICU-22520 Refactor CheckedArrayByteSink & u_terminateChars into helper.

The repeated sequence of allocating a CheckedArrayByteSink, calling some
function that writes into this, then checking for overflow and returning
through u_terminateChars() can all be moved into a single shared helper
function.
This commit is contained in:
Fredrik Roubert 2024-03-04 12:34:58 +01:00 committed by Fredrik Roubert
parent c610d7f986
commit 02a1bfc59f
8 changed files with 204 additions and 340 deletions

View file

@ -7,11 +7,14 @@
#ifndef BYTESINKUTIL_H
#define BYTESINKUTIL_H
#include <type_traits>
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "cmemory.h"
#include "uassert.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
@ -57,6 +60,40 @@ public:
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
/**
* Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
* and then returns through u_terminateChars(), in order to implement
* the classic ICU4C C API writing to a fix sized buffer on top of a
* contemporary C++ API.
*
* @param buffer receiving buffer
* @param capacity capacity of receiving buffer
* @param lambda that gets called with the sink as an argument
* @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
* @return number of bytes written, or needed (in case of overflow)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
F&& lambda,
UErrorCode& status) {
if (U_FAILURE(status)) { return 0; }
CheckedArrayByteSink sink(buffer, capacity);
lambda(sink, status);
if (U_FAILURE(status)) { return 0; }
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
status = U_BUFFER_OVERFLOW_ERROR;
return reslen;
}
return u_terminateChars(buffer, capacity, reslen, &status);
}
private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);

View file

@ -35,7 +35,6 @@
#include "cstring.h"
#include "loclikelysubtags.h"
#include "ulocimp.h"
#include "ustr_imp.h"
namespace {
@ -274,28 +273,12 @@ uloc_addLikelySubtags(const char* localeID,
char* maximizedLocaleID,
int32_t maximizedLocaleIDCapacity,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(
maximizedLocaleID, maximizedLocaleIDCapacity);
ulocimp_addLikelySubtags(localeID, sink, *status);
if (U_FAILURE(*status)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(
maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
}
return reslen;
return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
maximizedLocaleID, maximizedLocaleIDCapacity,
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_addLikelySubtags(localeID, sink, status);
},
*status);
}
U_EXPORT void
@ -316,28 +299,12 @@ uloc_minimizeSubtags(const char* localeID,
char* minimizedLocaleID,
int32_t minimizedLocaleIDCapacity,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(
minimizedLocaleID, minimizedLocaleIDCapacity);
ulocimp_minimizeSubtags(localeID, sink, false, *status);
if (U_FAILURE(*status)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(
minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
}
return reslen;
return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
minimizedLocaleID, minimizedLocaleIDCapacity,
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_minimizeSubtags(localeID, sink, false, status);
},
*status);
}
U_EXPORT void

View file

@ -41,7 +41,6 @@
#include "uassert.h"
#include "ucase.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h"
U_NAMESPACE_USE
@ -917,21 +916,20 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
return 0;
}
CheckedArrayByteSink sink(dest, destCapacity);
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
(const uint8_t *)src, srcLength, sink, edits, errorCode);
sink.Flush();
if (U_SUCCESS(errorCode)) {
if (sink.Overflowed()) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != nullptr) {
edits->copyErrorTo(errorCode);
}
int32_t reslen = ByteSinkUtil::viaByteSinkToTerminatedChars(
dest, destCapacity,
[&](ByteSink& sink, UErrorCode& status) {
stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
(const uint8_t *)src, srcLength, sink, edits, status);
},
errorCode);
if (U_SUCCESS(errorCode) && edits != nullptr) {
edits->copyErrorTo(errorCode);
}
return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
return reslen;
}
/* public API functions */

View file

@ -716,25 +716,12 @@ uloc_getKeywordValue(const char* localeID,
char* buffer, int32_t bufferCapacity,
UErrorCode* status)
{
if (U_FAILURE(*status)) {
return 0;
}
CheckedArrayByteSink sink(buffer, bufferCapacity);
ulocimp_getKeywordValue(localeID, keywordName, sink, *status);
if (U_FAILURE(*status)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(buffer, bufferCapacity, reslen, status);
}
return reslen;
return ByteSinkUtil::viaByteSinkToTerminatedChars(
buffer, bufferCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getKeywordValue(localeID, keywordName, sink, status);
},
*status);
}
U_EXPORT void
@ -1881,25 +1868,12 @@ uloc_getParent(const char* localeID,
int32_t parentCapacity,
UErrorCode* err)
{
if (U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(parent, parentCapacity);
ulocimp_getParent(localeID, sink, *err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(parent, parentCapacity, reslen, err);
}
return reslen;
return ByteSinkUtil::viaByteSinkToTerminatedChars(
parent, parentCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getParent(localeID, sink, status);
},
*err);
}
U_EXPORT void
@ -1938,32 +1912,19 @@ uloc_getLanguage(const char* localeID,
UErrorCode* err)
{
/* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
if (err==nullptr || U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(language, languageCapacity);
ulocimp_getSubtags(
localeID,
&sink,
nullptr,
nullptr,
nullptr,
nullptr,
*err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t length = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
return length;
}
return u_terminateChars(language, languageCapacity, length, err);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
language, languageCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
&sink,
nullptr,
nullptr,
nullptr,
nullptr,
status);
},
*err);
}
U_CAPI int32_t U_EXPORT2
@ -1972,31 +1933,19 @@ uloc_getScript(const char* localeID,
int32_t scriptCapacity,
UErrorCode* err)
{
if(err==nullptr || U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(script, scriptCapacity);
ulocimp_getSubtags(
localeID,
nullptr,
&sink,
nullptr,
nullptr,
nullptr,
*err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t length = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
return length;
}
return u_terminateChars(script, scriptCapacity, length, err);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
script, scriptCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
nullptr,
&sink,
nullptr,
nullptr,
nullptr,
status);
},
*err);
}
U_CAPI int32_t U_EXPORT2
@ -2005,31 +1954,19 @@ uloc_getCountry(const char* localeID,
int32_t countryCapacity,
UErrorCode* err)
{
if(err==nullptr || U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(country, countryCapacity);
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
&sink,
nullptr,
nullptr,
*err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t length = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
return length;
}
return u_terminateChars(country, countryCapacity, length, err);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
country, countryCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
&sink,
nullptr,
nullptr,
status);
},
*err);
}
U_CAPI int32_t U_EXPORT2
@ -2038,31 +1975,19 @@ uloc_getVariant(const char* localeID,
int32_t variantCapacity,
UErrorCode* err)
{
if(err==nullptr || U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(variant, variantCapacity);
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
nullptr,
&sink,
nullptr,
*err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t length = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
return length;
}
return u_terminateChars(variant, variantCapacity, length, err);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
variant, variantCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
nullptr,
&sink,
nullptr,
status);
},
*err);
}
U_CAPI int32_t U_EXPORT2
@ -2071,25 +1996,12 @@ uloc_getName(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
if (U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(name, nameCapacity);
ulocimp_getName(localeID, sink, *err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(name, nameCapacity, reslen, err);
}
return reslen;
return ByteSinkUtil::viaByteSinkToTerminatedChars(
name, nameCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getName(localeID, sink, status);
},
*err);
}
U_EXPORT void
@ -2106,25 +2018,12 @@ uloc_getBaseName(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
if (U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(name, nameCapacity);
ulocimp_getBaseName(localeID, sink, *err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(name, nameCapacity, reslen, err);
}
return reslen;
return ByteSinkUtil::viaByteSinkToTerminatedChars(
name, nameCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getBaseName(localeID, sink, status);
},
*err);
}
U_EXPORT void
@ -2141,25 +2040,12 @@ uloc_canonicalize(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
if (U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(name, nameCapacity);
ulocimp_canonicalize(localeID, sink, *err);
if (U_FAILURE(*err)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(name, nameCapacity, reslen, err);
}
return reslen;
return ByteSinkUtil::viaByteSinkToTerminatedChars(
name, nameCapacity,
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_canonicalize(localeID, sink, status);
},
*err);
}
U_EXPORT void

View file

@ -16,7 +16,6 @@
#include "unicode/putil.h"
#include "unicode/uenum.h"
#include "unicode/uloc.h"
#include "ustr_imp.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
@ -2570,25 +2569,12 @@ uloc_toLanguageTag(const char* localeID,
int32_t langtagCapacity,
UBool strict,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(langtag, langtagCapacity);
ulocimp_toLanguageTag(localeID, sink, strict, *status);
if (U_FAILURE(*status)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(langtag, langtagCapacity, reslen, status);
}
return reslen;
return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
langtag, langtagCapacity,
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_toLanguageTag(localeID, sink, strict, status);
},
*status);
}
@ -2672,25 +2658,12 @@ uloc_forLanguageTag(const char* langtag,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, *status);
if (U_FAILURE(*status)) {
return 0;
}
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(localeID, localeIDCapacity, reslen, status);
}
return reslen;
return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
localeID, localeIDCapacity,
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
},
*status);
}

View file

@ -1,6 +1,7 @@
// © 2023 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
#include "unicode/bytestream.h"
#include "unicode/errorcode.h"
#include "unicode/stringpiece.h"
#include "unicode/utypes.h"
@ -8,9 +9,9 @@
#include "unicode/ulocale.h"
#include "unicode/locid.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "ustr_imp.h"
U_NAMESPACE_USE
#define EXTERNAL(i) (reinterpret_cast<ULocale*>(i))
@ -55,20 +56,14 @@ int32_t ulocale_get ##N ( \
*err = U_ILLEGAL_ARGUMENT_ERROR; \
return 0; \
} \
CheckedArrayByteSink sink(valueBuffer, bufferCapacity); \
CONST_INTERNAL(locale)->get ## N( \
keywordLength < 0 ? StringPiece(keyword) : StringPiece(keyword, keywordLength), \
sink, *err); \
if (U_FAILURE(*err)) { \
return 0; \
} \
int32_t reslen = sink.NumberOfBytesAppended(); \
if (sink.Overflowed()) { \
*err = U_BUFFER_OVERFLOW_ERROR; \
} else { \
u_terminateChars(valueBuffer, bufferCapacity, reslen, err); \
} \
return reslen; \
return ByteSinkUtil::viaByteSinkToTerminatedChars( \
valueBuffer, bufferCapacity, \
[&](ByteSink& sink, UErrorCode& status) { \
CONST_INTERNAL(locale)->get ## N( \
keywordLength < 0 ? StringPiece(keyword) : StringPiece(keyword, keywordLength), \
sink, status); \
}, \
*err); \
}
#define IMPL_ULOCALE_GET_KEYWORDS(N) \

View file

@ -9,10 +9,10 @@
#include "unicode/stringpiece.h"
#include "unicode/umachine.h"
#include "unicode/ulocbuilder.h"
#include "bytesinkutil.h"
#include "cstring.h"
#include "ustr_imp.h"
using icu::CheckedArrayByteSink;
using icu::StringPiece;
#define EXTERNAL(i) (reinterpret_cast<ULocaleBuilder*>(i))
@ -134,17 +134,12 @@ int32_t ulocbld_buildLanguageTag(ULocaleBuilder* builder,
return 0;
}
icu::Locale l = INTERNAL(builder)->build(*err);
if (U_FAILURE(*err)) { return 0; }
CheckedArrayByteSink sink(buffer, bufferCapacity);
l.toLanguageTag(sink, *err);
if (U_FAILURE(*err)) { return 0; }
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(buffer, bufferCapacity, reslen, err);
}
return reslen;
return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
buffer, bufferCapacity,
[&](icu::ByteSink& sink, UErrorCode& status) {
l.toLanguageTag(sink, status);
},
*err);
}
UBool ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode) {

View file

@ -18,16 +18,17 @@
#if !UCONFIG_NO_IDNA
#include "unicode/bytestream.h"
#include "unicode/idna.h"
#include "unicode/normalizer2.h"
#include "unicode/uscript.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "cmemory.h"
#include "cstring.h"
#include "punycode.h"
#include "ubidi_props.h"
#include "ustr_imp.h"
// Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG:
//
@ -1425,11 +1426,14 @@ uidna_labelToASCII_UTF8(const UIDNA *idna,
return 0;
}
StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length);
CheckedArrayByteSink sink(dest, capacity);
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode);
idnaInfoToStruct(info, pInfo);
return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
dest, capacity,
[&](ByteSink& sink, UErrorCode& status) {
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, status);
idnaInfoToStruct(info, pInfo);
},
*pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -1441,11 +1445,14 @@ uidna_labelToUnicodeUTF8(const UIDNA *idna,
return 0;
}
StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length);
CheckedArrayByteSink sink(dest, capacity);
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode);
idnaInfoToStruct(info, pInfo);
return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
dest, capacity,
[&](ByteSink& sink, UErrorCode& status) {
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, status);
idnaInfoToStruct(info, pInfo);
},
*pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -1457,11 +1464,14 @@ uidna_nameToASCII_UTF8(const UIDNA *idna,
return 0;
}
StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length);
CheckedArrayByteSink sink(dest, capacity);
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode);
idnaInfoToStruct(info, pInfo);
return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
dest, capacity,
[&](ByteSink& sink, UErrorCode& status) {
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, status);
idnaInfoToStruct(info, pInfo);
},
*pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -1473,11 +1483,14 @@ uidna_nameToUnicodeUTF8(const UIDNA *idna,
return 0;
}
StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length);
CheckedArrayByteSink sink(dest, capacity);
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode);
idnaInfoToStruct(info, pInfo);
return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
return ByteSinkUtil::viaByteSinkToTerminatedChars(
dest, capacity,
[&](ByteSink& sink, UErrorCode& status) {
IDNAInfo info;
reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, status);
idnaInfoToStruct(info, pInfo);
},
*pErrorCode);
}
#endif // UCONFIG_NO_IDNA