ICU-22520 Refactor CharString & CharStringByteSink into helper.

The repeated sequence of allocating a CharString and CharStringByteSink,
before calling some function that writes into this, can be moved into a
single shared helper function which then is used to give all ulocimp.h
functions that write to ByteSink an overload that instead returns a
CharString, to make call sites look like perfectly normal C++ code.
This commit is contained in:
Fredrik Roubert 2024-03-05 18:01:43 +01:00 committed by Fredrik Roubert
parent 7bc202ae87
commit 53568e8dfc
27 changed files with 318 additions and 362 deletions

View file

@ -12,6 +12,7 @@
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "charstr.h"
#include "cmemory.h"
#include "uassert.h"
#include "ustr_imp.h"
@ -19,9 +20,29 @@
U_NAMESPACE_BEGIN
class ByteSink;
class CharString;
class Edits;
class U_COMMON_API CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
~CharStringByteSink() override;
CharStringByteSink() = delete;
CharStringByteSink(const CharStringByteSink&) = delete;
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
void Append(const char* bytes, int32_t n) override;
char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) override;
private:
CharString& dest_;
};
class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
@ -94,32 +115,32 @@ public:
return u_terminateChars(buffer, capacity, reslen, &status);
}
/**
* Calls a lambda that writes to a ByteSink with a CharStringByteSink and
* then returns a CharString, in order to implement a contemporary C++ API
* on top of a C/C++ compatibility ByteSink API.
*
* @param lambda that gets called with the sink as an argument
* @param status to check and report
* @return the resulting string, or an empty string (in case of error)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString result;
CharStringByteSink sink(&result);
lambda(sink, status);
return result;
}
private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};
class U_COMMON_API CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
~CharStringByteSink() override;
CharStringByteSink() = delete;
CharStringByteSink(const CharStringByteSink&) = delete;
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
void Append(const char* bytes, int32_t n) override;
char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) override;
private:
CharString& dest_;
};
U_NAMESPACE_END
#endif //BYTESINKUTIL_H

View file

@ -26,7 +26,6 @@
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -836,11 +835,7 @@ uloc_getDisplayKeywordValue( const char* locale,
}
/* get the keyword value */
CharString keywordValue;
{
CharStringByteSink sink(&keywordValue);
ulocimp_getKeywordValue(locale, keyword, sink, *status);
}
CharString keywordValue = ulocimp_getKeywordValue(locale, keyword, *status);
/*
* if the keyword is equal to currency .. then to get the display name

View file

@ -178,15 +178,8 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
canonicalize = true; // always canonicalize host ID
}
CharString localeNameBuf;
{
CharStringByteSink sink(&localeNameBuf);
if (canonicalize) {
ulocimp_canonicalize(id, sink, status);
} else {
ulocimp_getName(id, sink, status);
}
}
CharString localeNameBuf =
canonicalize ? ulocimp_canonicalize(id, status) : ulocimp_getName(id, status);
if (U_FAILURE(status)) {
return gDefaultLocale;
@ -2083,11 +2076,7 @@ Locale::addLikelySubtags(UErrorCode& status) {
return;
}
CharString maximizedLocaleID;
{
CharStringByteSink sink(&maximizedLocaleID);
ulocimp_addLikelySubtags(fullName, sink, status);
}
CharString maximizedLocaleID = ulocimp_addLikelySubtags(fullName, status);
if (U_FAILURE(status)) {
return;
@ -2109,11 +2098,7 @@ Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
return;
}
CharString minimizedLocaleID;
{
CharStringByteSink sink(&minimizedLocaleID);
ulocimp_minimizeSubtags(fullName, sink, favorScript, status);
}
CharString minimizedLocaleID = ulocimp_minimizeSubtags(fullName, favorScript, status);
if (U_FAILURE(status)) {
return;
@ -2164,17 +2149,12 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
// and then Locale::init(), instead of just calling the normal constructor.
CharString localeID;
int32_t parsedLength;
{
CharStringByteSink sink(&localeID);
ulocimp_forLanguageTag(
tag.data(),
tag.length(),
sink,
&parsedLength,
status);
}
CharString localeID = ulocimp_forLanguageTag(
tag.data(),
tag.length(),
&parsedLength,
status);
if (U_FAILURE(status)) {
return result;
@ -2561,9 +2541,7 @@ Locale::createKeywords(UErrorCode &status) const
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
CharString keywords;
CharStringByteSink sink(&keywords);
ulocimp_getKeywords(variantStart+1, '@', sink, false, status);
CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
if (U_SUCCESS(status) && !keywords.isEmpty()) {
result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
if (!result) {
@ -2590,9 +2568,7 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
CharString keywords;
CharStringByteSink sink(&keywords);
ulocimp_getKeywords(variantStart+1, '@', sink, false, status);
CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
if (U_SUCCESS(status) && !keywords.isEmpty()) {
result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
if (!result) {

View file

@ -281,16 +281,22 @@ uloc_addLikelySubtags(const char* localeID,
*status);
}
U_EXPORT icu::CharString
ulocimp_addLikelySubtags(const char* localeID,
UErrorCode& status) {
return icu::ByteSinkUtil::viaByteSinkToCharString(
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_addLikelySubtags(localeID, sink, status);
},
status);
}
U_EXPORT void
ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
icu::CharString localeBuffer;
{
icu::CharStringByteSink localeSink(&localeBuffer);
ulocimp_canonicalize(localeID, localeSink, status);
}
icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
_uloc_addLikelySubtags(localeBuffer.data(), sink, status);
}
@ -307,17 +313,24 @@ uloc_minimizeSubtags(const char* localeID,
*status);
}
U_EXPORT icu::CharString
ulocimp_minimizeSubtags(const char* localeID,
bool favorScript,
UErrorCode& status) {
return icu::ByteSinkUtil::viaByteSinkToCharString(
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_minimizeSubtags(localeID, sink, favorScript, status);
},
status);
}
U_EXPORT void
ulocimp_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
bool favorScript,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
icu::CharString localeBuffer;
{
icu::CharStringByteSink localeSink(&localeBuffer);
ulocimp_canonicalize(localeID, localeSink, status);
}
icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
_uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status);
}
@ -348,11 +361,7 @@ uloc_isRightToLeft(const char *locale) {
}
// Otherwise, find the likely script.
errorCode = U_ZERO_ERROR;
icu::CharString likely;
{
icu::CharStringByteSink sink(&likely);
ulocimp_addLikelySubtags(locale, sink, errorCode);
}
icu::CharString likely = ulocimp_addLikelySubtags(locale, errorCode);
if (U_FAILURE(errorCode)) {
return false;
}
@ -380,11 +389,7 @@ GetRegionFromKey(const char* localeID, const char* key, UErrorCode& status) {
icu::CharString result;
// First check for keyword value
icu::CharString kw;
{
icu::CharStringByteSink sink(&kw);
ulocimp_getKeywordValue(localeID, key, sink, status);
}
icu::CharString kw = ulocimp_getKeywordValue(localeID, key, status);
int32_t len = kw.length();
if (U_SUCCESS(status) && len >= 3 && len <= 7) {
// chop off the subdivision code (which will generally be "zzzz" anyway)
@ -417,11 +422,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
if (U_SUCCESS(status) && rgBuf.isEmpty()) {
// no unicode_region_subtag but inferRegion true, try likely subtags
UErrorCode rgStatus = U_ZERO_ERROR;
icu::CharString locBuf;
{
icu::CharStringByteSink sink(&locBuf);
ulocimp_addLikelySubtags(localeID, sink, rgStatus);
}
icu::CharString locBuf = ulocimp_addLikelySubtags(localeID, rgStatus);
if (U_SUCCESS(rgStatus)) {
rgBuf = ulocimp_getRegion(locBuf.data(), status);
}

View file

@ -28,7 +28,6 @@
*/
#include "locmap.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cstring.h"
#include "cmemory.h"
@ -1181,11 +1180,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
// Check any for keywords.
if (uprv_strchr(localeID, '@'))
{
icu::CharString collVal;
{
icu::CharStringByteSink sink(&collVal);
ulocimp_getKeywordValue(localeID, "collation", sink, *status);
}
icu::CharString collVal = ulocimp_getKeywordValue(localeID, "collation", *status);
if (U_SUCCESS(*status) && !collVal.isEmpty())
{
// If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
@ -1194,10 +1189,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
else
{
// If the locale ID contains keywords other than collation, just use the base name.
{
icu::CharStringByteSink sink(&baseName);
ulocimp_getBaseName(localeID, sink, *status);
}
baseName = ulocimp_getBaseName(localeID, *status);
if (U_SUCCESS(*status) && !baseName.isEmpty())
{
mylocaleID = baseName.data();
@ -1206,11 +1198,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
}
// this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
icu::CharString asciiBCP47Tag;
{
icu::CharStringByteSink sink(&asciiBCP47Tag);
ulocimp_toLanguageTag(mylocaleID, sink, false, *status);
}
icu::CharString asciiBCP47Tag = ulocimp_toLanguageTag(mylocaleID, false, *status);
if (U_SUCCESS(*status))
{

View file

@ -24,7 +24,6 @@
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cstring.h"
#include "ulocimp.h"
@ -162,11 +161,7 @@ _uloc_getOrientationHelper(const char* localeId,
if (U_FAILURE(status)) { return result; }
icu::CharString localeBuffer;
{
icu::CharStringByteSink sink(&localeBuffer);
ulocimp_canonicalize(localeId, sink, status);
}
icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status);
if (U_FAILURE(status)) { return result; }

View file

@ -22,7 +22,6 @@
#include "unicode/usetiter.h"
#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -523,11 +522,7 @@ ucurr_forLocale(const char* locale,
}
UErrorCode localStatus = U_ZERO_ERROR;
CharString currency;
{
CharStringByteSink sink(&currency);
ulocimp_getKeywordValue(locale, "currency", sink, localStatus);
}
CharString currency = ulocimp_getKeywordValue(locale, "currency", localStatus);
int32_t resLen = currency.length();
if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency.data(), resLen)) {
@ -602,11 +597,7 @@ ucurr_forLocale(const char* locale,
if ((U_FAILURE(localStatus)) && strchr(id.data(), '_') != 0) {
// We don't know about it. Check to see if we support the variant.
CharString parent;
{
CharStringByteSink sink(&parent);
ulocimp_getParent(locale, sink, *ec);
}
CharString parent = ulocimp_getParent(locale, *ec);
*ec = U_USING_FALLBACK_WARNING;
// TODO: Loop over the parent rather than recursing and
// looking again for a currency keyword.
@ -645,10 +636,7 @@ static UBool fallback(CharString& loc) {
loc.truncate(3);
loc.append("001", status);
} else {
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(loc.data(), sink, status);
loc = std::move(tmp);
loc = ulocimp_getParent(loc.data(), status);
}
/*
char *i = uprv_strrchr(loc, '_');
@ -703,11 +691,7 @@ ucurr_getName(const char16_t* currency,
// this function.
UErrorCode ec2 = U_ZERO_ERROR;
CharString loc;
{
CharStringByteSink sink(&loc);
ulocimp_getName(locale, sink, ec2);
}
CharString loc = ulocimp_getName(locale, ec2);
if (U_FAILURE(ec2)) {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@ -805,11 +789,7 @@ ucurr_getPluralName(const char16_t* currency,
// this function.
UErrorCode ec2 = U_ZERO_ERROR;
CharString loc;
{
CharStringByteSink sink(&loc);
ulocimp_getName(locale, sink, ec2);
}
CharString loc = ulocimp_getName(locale, ec2);
if (U_FAILURE(ec2)) {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@ -1000,11 +980,7 @@ collectCurrencyNames(const char* locale,
// Look up the Currencies resource for the given locale.
UErrorCode ec2 = U_ZERO_ERROR;
CharString loc;
{
CharStringByteSink sink(&loc);
ulocimp_getName(locale, sink, ec2);
}
CharString loc = ulocimp_getName(locale, ec2);
if (U_FAILURE(ec2)) {
ec = U_ILLEGAL_ARGUMENT_ERROR;
}

View file

@ -587,6 +587,23 @@ compareKeywordStructs(const void * /*context*/, const void *left, const void *ri
} // namespace
U_EXPORT CharString
ulocimp_getKeywords(const char* localeID,
char prev,
bool valuesToo,
UErrorCode& status)
{
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getKeywords(localeID,
prev,
sink,
valuesToo,
status);
},
status);
}
U_EXPORT void
ulocimp_getKeywords(const char* localeID,
char prev,
@ -724,6 +741,18 @@ uloc_getKeywordValue(const char* localeID,
*status);
}
U_EXPORT CharString
ulocimp_getKeywordValue(const char* localeID,
const char* keywordName,
UErrorCode& status)
{
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getKeywordValue(localeID, keywordName, sink, status);
},
status);
}
U_EXPORT void
ulocimp_getKeywordValue(const char* localeID,
const char* keywordName,
@ -749,9 +778,8 @@ ulocimp_getKeywordValue(const char* localeID,
}
if (_hasBCP47Extension(localeID)) {
CharStringByteSink sink(&tempBuffer);
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, status);
tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
tmpLocaleID=localeID;
}
@ -1367,66 +1395,66 @@ _getVariant(const char* localeID,
U_EXPORT CharString
ulocimp_getLanguage(const char* localeID, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString language;
CharStringByteSink sink(&language);
ulocimp_getSubtags(
localeID,
&sink,
nullptr,
nullptr,
nullptr,
nullptr,
status);
return language;
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
&sink,
nullptr,
nullptr,
nullptr,
nullptr,
status);
},
status);
}
U_EXPORT CharString
ulocimp_getScript(const char* localeID, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString script;
CharStringByteSink sink(&script);
ulocimp_getSubtags(
localeID,
nullptr,
&sink,
nullptr,
nullptr,
nullptr,
status);
return script;
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
nullptr,
&sink,
nullptr,
nullptr,
nullptr,
status);
},
status);
}
U_EXPORT CharString
ulocimp_getRegion(const char* localeID, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString region;
CharStringByteSink sink(&region);
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
&sink,
nullptr,
nullptr,
status);
return region;
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
&sink,
nullptr,
nullptr,
status);
},
status);
}
U_EXPORT CharString
ulocimp_getVariant(const char* localeID, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString variant;
CharStringByteSink sink(&variant);
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
nullptr,
&sink,
nullptr,
status);
return variant;
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
localeID,
nullptr,
nullptr,
nullptr,
&sink,
nullptr,
status);
},
status);
}
U_EXPORT void
@ -1641,8 +1669,7 @@ uloc_openKeywords(const char* localeID,
const char* tmpLocaleID;
if (_hasBCP47Extension(localeID)) {
CharStringByteSink sink(&tempBuffer);
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, *status);
tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status);
tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
if (localeID==nullptr) {
@ -1665,9 +1692,7 @@ uloc_openKeywords(const char* localeID,
/* keywords are located after '@' */
if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != nullptr) {
CharString keywords;
CharStringByteSink sink(&keywords);
ulocimp_getKeywords(tmpLocaleID+1, '@', sink, false, *status);
CharString keywords = ulocimp_getKeywords(tmpLocaleID + 1, '@', false, *status);
if (U_FAILURE(*status)) {
return nullptr;
}
@ -1728,8 +1753,7 @@ _canonicalize(const char* localeID,
}
}
CharStringByteSink tempSink(&tempBuffer);
ulocimp_forLanguageTag(localeIDPtr, -1, tempSink, nullptr, err);
tempBuffer = ulocimp_forLanguageTag(localeIDPtr, -1, nullptr, err);
tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
} else {
if (localeID==nullptr) {
@ -1876,6 +1900,17 @@ uloc_getParent(const char* localeID,
*err);
}
U_EXPORT CharString
ulocimp_getParent(const char* localeID,
UErrorCode& err)
{
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getParent(localeID, sink, status);
},
err);
}
U_EXPORT void
ulocimp_getParent(const char* localeID,
icu::ByteSink& sink,
@ -2004,6 +2039,17 @@ uloc_getName(const char* localeID,
*err);
}
U_EXPORT CharString
ulocimp_getName(const char* localeID,
UErrorCode& err)
{
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getName(localeID, sink, status);
},
err);
}
U_EXPORT void
ulocimp_getName(const char* localeID,
ByteSink& sink,
@ -2026,6 +2072,17 @@ uloc_getBaseName(const char* localeID,
*err);
}
U_EXPORT CharString
ulocimp_getBaseName(const char* localeID,
UErrorCode& err)
{
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getBaseName(localeID, sink, status);
},
err);
}
U_EXPORT void
ulocimp_getBaseName(const char* localeID,
ByteSink& sink,
@ -2048,6 +2105,17 @@ uloc_canonicalize(const char* localeID,
*err);
}
U_EXPORT CharString
ulocimp_canonicalize(const char* localeID,
UErrorCode& err)
{
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_canonicalize(localeID, sink, status);
},
err);
}
U_EXPORT void
ulocimp_canonicalize(const char* localeID,
ByteSink& sink,
@ -2118,17 +2186,9 @@ uloc_getLCID(const char* localeID)
if (uprv_strchr(localeID, '@')) {
// uprv_convertToLCID does not support keywords other than collation.
// Remove all keywords except collation.
CharString collVal;
{
CharStringByteSink sink(&collVal);
ulocimp_getKeywordValue(localeID, "collation", sink, status);
}
CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status);
if (U_SUCCESS(status) && !collVal.isEmpty()) {
CharString tmpLocaleID;
{
CharStringByteSink sink(&tmpLocaleID);
ulocimp_getBaseName(localeID, sink, status);
}
CharString tmpLocaleID = ulocimp_getBaseName(localeID, status);
ulocimp_setKeywordValue("collation", collVal.data(), tmpLocaleID, status);
if (U_SUCCESS(status)) {
return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status);

View file

@ -1306,11 +1306,7 @@ _appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool str
break;
}
icu::CharString buf;
{
icu::CharStringByteSink sink(&buf);
ulocimp_getKeywordValue(localeID, key, sink, tmpStatus);
}
icu::CharString buf = ulocimp_getKeywordValue(localeID, key, tmpStatus);
len = buf.length();
if (U_FAILURE(tmpStatus)) {
@ -2577,6 +2573,16 @@ uloc_toLanguageTag(const char* localeID,
*status);
}
U_EXPORT icu::CharString
ulocimp_toLanguageTag(const char* localeID,
bool strict,
UErrorCode& status) {
return icu::ByteSinkUtil::viaByteSinkToCharString(
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_toLanguageTag(localeID, sink, strict, status);
},
status);
}
U_EXPORT void
ulocimp_toLanguageTag(const char* localeID,
@ -2585,16 +2591,12 @@ ulocimp_toLanguageTag(const char* localeID,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
icu::CharString canonical;
UErrorCode tmpStatus = U_ZERO_ERROR;
bool hadPosix = false;
const char* pKeywordStart;
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
{
icu::CharStringByteSink canonicalSink(&canonical);
ulocimp_canonicalize(localeID, canonicalSink, tmpStatus);
}
icu::CharString canonical = ulocimp_canonicalize(localeID, tmpStatus);
if (U_FAILURE(tmpStatus)) {
status = tmpStatus;
return;
@ -2615,11 +2617,7 @@ ulocimp_toLanguageTag(const char* localeID,
key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus);
if (len == 1 && *key == PRIVATEUSE) {
icu::CharString buf;
{
icu::CharStringByteSink sink(&buf);
ulocimp_getKeywordValue(localeID, key, sink, tmpStatus);
}
icu::CharString buf = ulocimp_getKeywordValue(localeID, key, tmpStatus);
if (U_SUCCESS(tmpStatus)) {
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
/* return private use only tag */
@ -2666,6 +2664,17 @@ uloc_forLanguageTag(const char* langtag,
*status);
}
U_EXPORT icu::CharString
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
int32_t* parsedLength,
UErrorCode& status) {
return icu::ByteSinkUtil::viaByteSinkToCharString(
[&](icu::ByteSink& sink, UErrorCode& status) {
ulocimp_forLanguageTag(langtag, tagLen, sink, parsedLength, status);
},
status);
}
U_EXPORT void
ulocimp_forLanguageTag(const char* langtag,

View file

@ -53,6 +53,12 @@ uloc_getCurrentCountryID(const char* oldID);
U_CFUNC const char*
uloc_getCurrentLanguageID(const char* oldID);
U_EXPORT icu::CharString
ulocimp_getKeywords(const char* localeID,
char prev,
bool valuesToo,
UErrorCode& status);
U_EXPORT void
ulocimp_getKeywords(const char* localeID,
char prev,
@ -60,21 +66,38 @@ ulocimp_getKeywords(const char* localeID,
bool valuesToo,
UErrorCode& status);
U_EXPORT icu::CharString
ulocimp_getName(const char* localeID,
UErrorCode& err);
U_EXPORT void
ulocimp_getName(const char* localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
ulocimp_getBaseName(const char* localeID,
UErrorCode& err);
U_EXPORT void
ulocimp_getBaseName(const char* localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
ulocimp_canonicalize(const char* localeID,
UErrorCode& err);
U_EXPORT void
ulocimp_canonicalize(const char* localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
ulocimp_getKeywordValue(const char* localeID,
const char* keywordName,
UErrorCode& status);
U_EXPORT void
ulocimp_getKeywordValue(const char* localeID,
const char* keywordName,
@ -145,11 +168,20 @@ ulocimp_getSubtags(
status);
}
U_EXPORT icu::CharString
ulocimp_getParent(const char* localeID,
UErrorCode& err);
U_EXPORT void
ulocimp_getParent(const char* localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
ulocimp_toLanguageTag(const char* localeID,
bool strict,
UErrorCode& status);
/**
* Writes a well-formed language tag for this locale ID.
*
@ -175,6 +207,12 @@ ulocimp_toLanguageTag(const char* localeID,
bool strict,
UErrorCode& err);
U_EXPORT icu::CharString
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
int32_t* parsedLength,
UErrorCode& status);
/**
* Returns a locale ID for the specified BCP47 language tag string.
* If the specified language tag contains any ill-formed subtags,
@ -232,6 +270,10 @@ U_EXPORT icu::CharString
ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
UErrorCode& status);
U_EXPORT icu::CharString
ulocimp_addLikelySubtags(const char* localeID,
UErrorCode& status);
/**
* Add the likely subtags for a provided locale ID, per the algorithm described
* in the following CLDR technical report:
@ -266,6 +308,11 @@ ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
ulocimp_minimizeSubtags(const char* localeID,
bool favorScript,
UErrorCode& status);
/**
* Minimize the subtags for a provided locale ID, per the algorithm described
* in the following CLDR technical report:

View file

@ -2718,11 +2718,7 @@ ures_openWithType(UResourceBundle *r, const char* path, const char* localeID,
UResourceDataEntry *entry;
if(openType != URES_OPEN_DIRECT) {
/* first "canonicalize" the locale ID */
CharString canonLocaleID;
{
CharStringByteSink sink(&canonLocaleID);
ulocimp_getBaseName(localeID, sink, *status);
}
CharString canonLocaleID = ulocimp_getBaseName(localeID, *status);
if(U_FAILURE(*status)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
@ -3059,11 +3055,7 @@ static void getParentForFunctionalEquivalent(const char* localeID,
// If none there, use normal truncation parent
if (U_FAILURE(subStatus) || parent.isEmpty()) {
subStatus = U_ZERO_ERROR;
parent.clear();
{
CharStringByteSink sink(&parent);
ulocimp_getParent(localeID, sink, subStatus);
}
parent = ulocimp_getParent(localeID, subStatus);
}
}
@ -3074,7 +3066,6 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
{
CharString defVal; /* default value for given locale */
CharString defLoc; /* default value for given locale */
CharString base; /* base locale */
CharString found;
CharString parent;
CharString full;
@ -3083,18 +3074,11 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
UErrorCode subStatus = U_ZERO_ERROR;
int32_t length = 0;
if(U_FAILURE(*status)) return 0;
CharString kwVal;
{
CharStringByteSink sink(&kwVal);
ulocimp_getKeywordValue(locid, keyword, sink, subStatus);
}
CharString kwVal = ulocimp_getKeywordValue(locid, keyword, subStatus);
if(kwVal == DEFAULT_TAG) {
kwVal.clear();
}
{
CharStringByteSink sink(&base);
ulocimp_getBaseName(locid, sink, subStatus);
}
CharString base = ulocimp_getBaseName(locid, subStatus);
#if defined(URES_TREE_DEBUG)
fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n",
locid, keyword, kwVal.data(), base.data(), u_errorName(subStatus));

View file

@ -18,7 +18,6 @@
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#include "unicode/uloc.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -132,11 +131,7 @@ uscript_getCode(const char* nameOrAbbrOrLocale,
if(U_FAILURE(*err) || length != 0) {
return length;
}
icu::CharString likely;
{
icu::CharStringByteSink sink(&likely);
ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, internalErrorCode);
}
icu::CharString likely = ulocimp_addLikelySubtags(nameOrAbbrOrLocale, internalErrorCode);
if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
if(U_FAILURE(*err) || length != 0) {

View file

@ -63,7 +63,6 @@
#include "sharedcalendar.h"
#include "unifiedcache.h"
#include "ulocimp.h"
#include "bytesinkutil.h"
#include "charstr.h"
#if !UCONFIG_NO_SERVICE
@ -259,20 +258,12 @@ static ECalType getCalendarTypeForLocale(const char *locid) {
// e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
// NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and
// the Gregorian calendar is returned instead.
CharString canonicalName;
{
CharStringByteSink sink(&canonicalName);
ulocimp_canonicalize(locid, sink, status);
}
CharString canonicalName = ulocimp_canonicalize(locid, status);
if (U_FAILURE(status)) {
return CALTYPE_GREGORIAN;
}
CharString calTypeBuf;
{
CharStringByteSink sink(&calTypeBuf);
ulocimp_getKeywordValue(canonicalName.data(), "calendar", sink, status);
}
CharString calTypeBuf = ulocimp_getKeywordValue(canonicalName.data(), "calendar", status);
if (U_SUCCESS(status)) {
calType = getCalendarType(calTypeBuf.data());
if (calType != CALTYPE_UNKNOWN) {

View file

@ -24,7 +24,6 @@
#include "unicode/uloc.h"
#include "unicode/unistr.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "collation.h"
@ -606,12 +605,8 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) {
lang.appendInvariantChars(v, errorCode);
if(errorCode == U_MEMORY_ALLOCATION_ERROR) { return; }
// BCP 47 language tag -> ICU locale ID
CharString localeID;
int32_t parsedLength;
{
CharStringByteSink sink(&localeID);
ulocimp_forLanguageTag(lang.data(), -1, sink, &parsedLength, errorCode);
}
CharString localeID = ulocimp_forLanguageTag(lang.data(), -1, &parsedLength, errorCode);
if(U_FAILURE(errorCode) || parsedLength != lang.length()) {
errorCode = U_ZERO_ERROR;
setParseError("expected language tag in [import langTag]", errorCode);
@ -632,11 +627,7 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) {
uprv_memcpy(baseID, "und", 3);
}
// @collation=type, or length=0 if not specified
CharString collationType;
{
CharStringByteSink sink(&collationType);
ulocimp_getKeywordValue(localeID.data(), "collation", sink, errorCode);
}
CharString collationType = ulocimp_getKeywordValue(localeID.data(), "collation", errorCode);
if(U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
setParseError("expected language tag in [import langTag]", errorCode);

View file

@ -14,7 +14,6 @@
#include "dayperiodrules.h"
#include "unicode/ures.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cstring.h"
#include "ucln_in.h"
@ -361,9 +360,7 @@ const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCo
while (*name != '\0') {
ruleSetNum = uhash_geti(data->localeToRuleSetNumMap, name);
if (ruleSetNum == 0) {
CharString parent;
CharStringByteSink sink(&parent);
ulocimp_getParent(name, sink, errorCode);
CharString parent = ulocimp_getParent(name, errorCode);
if (parent.isEmpty()) {
// Saves a lookup in the hash table.
break;

View file

@ -23,7 +23,6 @@
#include <iostream>
#endif
#include "bytesinkutil.h"
#include "cmemory.h"
#include "cstring.h"
#include "unicode/msgfmt.h"
@ -405,11 +404,7 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& status)
"calendar", "calendar", locName, nullptr, false, &status);
localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination
// now get the calendar key value from that locale
CharString calendarType;
{
CharStringByteSink sink(&calendarType);
ulocimp_getKeywordValue(localeWithCalendarKey, "calendar", sink, status);
}
CharString calendarType = ulocimp_getKeywordValue(localeWithCalendarKey, "calendar", status);
if (U_SUCCESS(status)) {
calendarTypeToUse = calendarType.data();
}

View file

@ -29,7 +29,6 @@
#include "unicode/ustring.h"
#include "unicode/rep.h"
#include "unicode/region.h"
#include "bytesinkutil.h"
#include "cpputils.h"
#include "mutex.h"
#include "umutex.h"
@ -904,15 +903,7 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString&
&localStatus);
localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination
// now get the calendar key value from that locale
destination.clear();
{
CharStringByteSink sink(&destination);
ulocimp_getKeywordValue(
localeWithCalendarKey,
"calendar",
sink,
localStatus);
}
destination = ulocimp_getKeywordValue(localeWithCalendarKey, "calendar", localStatus);
// If the input locale was invalid, don't fail with missing resource error, instead
// continue with default of Gregorian.
if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {

View file

@ -25,7 +25,6 @@
#include "unicode/ugender.h"
#include "unicode/ures.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -156,11 +155,9 @@ const GenderInfo* GenderInfo::loadInstance(const Locale& locale, UErrorCode& sta
CharString parentLocaleName(curLocaleName, key_status);
while (s == nullptr) {
{
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(parentLocaleName.data(), sink, status);
if (tmp.isEmpty()) break;
parentLocaleName = std::move(tmp);
CharString tmp = ulocimp_getParent(parentLocaleName.data(), status);
if (tmp.isEmpty()) break;
parentLocaleName = std::move(tmp);
}
key_status = U_ZERO_ERROR;
resLen = 0;

View file

@ -22,7 +22,6 @@
#include "unicode/numfmt.h"
#include "unicode/decimfmt.h"
#include "unicode/numberrangeformatter.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -861,9 +860,7 @@ PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorC
for (;;) {
{
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(parentLocaleName.data(), sink, status);
CharString tmp = ulocimp_getParent(parentLocaleName.data(), status);
if (tmp.isEmpty()) break;
parentLocaleName = std::move(tmp);
}

View file

@ -32,7 +32,6 @@
#include "unicode/utf8.h"
#include "unicode/uversion.h"
#include "bocsu.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "collation.h"
@ -1580,12 +1579,8 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
}
// Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
{
CharString collation;
CharStringByteSink sink(&collation);
ulocimp_getKeywordValue(resultLocale, "collation", sink, errorCode);
appendSubtag(result, 'K', collation.data(), collation.length(), errorCode);
}
CharString collation = ulocimp_getKeywordValue(resultLocale, "collation", errorCode);
appendSubtag(result, 'K', collation.data(), collation.length(), errorCode);
CharString language;
CharString script;
CharString region;

View file

@ -17,7 +17,6 @@
#include "unicode/localpointer.h"
#include "plurrule_impl.h"
#include "uvector.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -563,12 +562,7 @@ TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key,
CharString parentLocale(localeName, status);
U_ASSERT(countToPatterns != nullptr);
for (;;) {
{
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(parentLocale.data(), sink, status);
parentLocale = std::move(tmp);
}
parentLocale = ulocimp_getParent(parentLocale.data(), status);
// look for pattern for srcPluralCount in locale tree
LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_UNIT, parentLocale.data(), &status));
LocalUResourceBundlePointer unitsRes(ures_getByKey(rb.getAlias(), key, nullptr, &status));

View file

@ -19,7 +19,6 @@
#include "unicode/udat.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "tzgnames.h"
#include "cmemory.h"
@ -328,11 +327,7 @@ TimeZoneFormat::TimeZoneFormat(const Locale& locale, UErrorCode& status)
int32_t regionLen = static_cast<int32_t>(uprv_strlen(region));
if (regionLen == 0) {
UErrorCode tempStatus = U_ZERO_ERROR;
CharString loc;
{
CharStringByteSink sink(&loc);
ulocimp_addLikelySubtags(fLocale.getName(), sink, tempStatus);
}
CharString loc = ulocimp_addLikelySubtags(fLocale.getName(), tempStatus);
regionLen = uloc_getCountry(loc.data(), fTargetRegion, sizeof(fTargetRegion), &tempStatus);
if (U_SUCCESS(tempStatus)) {

View file

@ -21,7 +21,6 @@
#include "unicode/strenum.h"
#include "unicode/vtzone.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -410,12 +409,7 @@ TZGNCore::initialize(const Locale& locale, UErrorCode& status) {
const char* region = fLocale.getCountry();
int32_t regionLen = static_cast<int32_t>(uprv_strlen(region));
if (regionLen == 0) {
CharString loc;
{
CharStringByteSink sink(&loc);
ulocimp_addLikelySubtags(fLocale.getName(), sink, status);
}
CharString loc = ulocimp_addLikelySubtags(fLocale.getName(), status);
ulocimp_getSubtags(loc.data(), nullptr, nullptr, &fTargetRegion, nullptr, nullptr, status);
if (U_FAILURE(status)) {
cleanup();

View file

@ -22,7 +22,6 @@
#include "unicode/utf16.h"
#include "tznames_impl.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@ -2161,11 +2160,7 @@ TZDBTimeZoneNames::TZDBTimeZoneNames(const Locale& locale)
int32_t regionLen = static_cast<int32_t>(uprv_strlen(region));
if (regionLen == 0) {
UErrorCode status = U_ZERO_ERROR;
CharString loc;
{
CharStringByteSink sink(&loc);
ulocimp_addLikelySubtags(fLocale.getName(), sink, status);
}
CharString loc = ulocimp_addLikelySubtags(fLocale.getName(), status);
ulocimp_getSubtags(loc.data(), nullptr, nullptr, &fRegion, nullptr, nullptr, status);
if (U_SUCCESS(status)) {
useWorld = false;

View file

@ -21,7 +21,6 @@
#include "utracimp.h"
#include "ucol_imp.h"
#include "ulocimp.h"
#include "bytesinkutil.h"
#include "cmemory.h"
#include "cstring.h"
#include "uresimp.h"
@ -451,22 +450,14 @@ ucol_prepareShortStringOpen( const char *definition,
ucol_sit_readSpecs(&s, definition, parseError, status);
ucol_sit_calculateWholeLocale(&s, *status);
CharString buffer;
{
CharStringByteSink sink(&buffer);
ulocimp_canonicalize(s.locale.data(), sink, *status);
}
CharString buffer = ulocimp_canonicalize(s.locale.data(), *status);
UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer.data(), status);
/* we try to find stuff from keyword */
UResourceBundle *collations = ures_getByKey(b, "collations", nullptr, status);
UResourceBundle *collElem = nullptr;
CharString keyBuffer;
{
// if there is a keyword, we pick it up and try to get elements
CharStringByteSink sink(&keyBuffer);
ulocimp_getKeywordValue(buffer.data(), "collation", sink, *status);
}
// if there is a keyword, we pick it up and try to get elements
CharString keyBuffer = ulocimp_getKeywordValue(buffer.data(), "collation", *status);
if(keyBuffer.isEmpty()) {
// no keyword
// we try to find the default setting, which will give us the keyword value
@ -523,11 +514,7 @@ ucol_openFromShortString( const char *definition,
#ifdef UCOL_TRACE_SIT
fprintf(stderr, "DEF %s, DATA %s, ERR %s\n", definition, s.locale.data(), u_errorName(*status));
#endif
CharString buffer;
{
CharStringByteSink sink(&buffer);
ulocimp_canonicalize(s.locale.data(), sink, *status);
}
CharString buffer = ulocimp_canonicalize(s.locale.data(), *status);
UCollator *result = ucol_open(buffer.data(), status);
int32_t i = 0;

View file

@ -30,7 +30,6 @@
#include "unicode/timezone.h"
#include "unicode/utmscale.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "ulocimp.h"
@ -104,11 +103,7 @@ static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeSt
UErrorCode status = U_ZERO_ERROR;
// Convert from names like "en_CA" and "de_DE@collation=phonebook" to "en-CA" and "de-DE-u-co-phonebk".
CharString asciiBCP47Tag;
{
CharStringByteSink sink(&asciiBCP47Tag);
ulocimp_toLanguageTag(locale.getName(), sink, false, status);
}
CharString asciiBCP47Tag = ulocimp_toLanguageTag(locale.getName(), false, status);
if (U_SUCCESS(status))
{

View file

@ -24,7 +24,6 @@
#include "unicode/locid.h"
#include "unicode/ustring.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "uassert.h"
@ -149,11 +148,7 @@ static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeSt
UErrorCode status = U_ZERO_ERROR;
// Convert from names like "en_CA" and "de_DE@collation=phonebook" to "en-CA" and "de-DE-u-co-phonebk".
CharString asciiBCP47Tag;
{
CharStringByteSink sink(&asciiBCP47Tag);
ulocimp_toLanguageTag(locale.getName(), sink, false, status);
}
CharString asciiBCP47Tag = ulocimp_toLanguageTag(locale.getName(), false, status);
if (U_SUCCESS(status))
{