ICU-20158 Pass ByteSink all the way to _uloc_(addLikely|minimize)Subtags().

This eliminates the need for scratch buffers in any code path that ends
with these functions and also eliminates the need for counting bytes,
something that ByteSink will now handle correctly when needed.

Existing calls to uloc_addLikelySubtags() and uloc_minimizeSubtags()
throughout ICU4C implementation code are also updated to instead use
either the Locale or ulocimp_* functions with the new API.

None of this should have any externally visible effect, it's all about
cleaning up implementation internals.
This commit is contained in:
Fredrik Roubert 2019-02-21 00:23:02 +01:00 committed by Fredrik Roubert
parent b36dbedf34
commit 01c69cfa85
6 changed files with 354 additions and 400 deletions

View file

@ -740,46 +740,10 @@ Locale::addLikelySubtags(UErrorCode& status) {
return;
}
// The maximized locale ID string is often longer, but there is no good
// heuristic to estimate just how much longer. Leave that to CharString.
CharString maximizedLocaleID;
int32_t maximizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
char* buffer;
int32_t reslen;
for (;;) {
buffer = maximizedLocaleID.getAppendBuffer(
/*minCapacity=*/maximizedLocaleIDCapacity,
/*desiredCapacityHint=*/maximizedLocaleIDCapacity,
maximizedLocaleIDCapacity,
status);
if (U_FAILURE(status)) {
return;
}
reslen = uloc_addLikelySubtags(
fullName,
buffer,
maximizedLocaleIDCapacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
maximizedLocaleIDCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
maximizedLocaleID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
{
CharStringByteSink sink(&maximizedLocaleID);
ulocimp_addLikelySubtags(fullName, sink, &status);
}
if (U_FAILURE(status)) {
@ -798,50 +762,10 @@ Locale::minimizeSubtags(UErrorCode& status) {
return;
}
// Except for a few edge cases (like the empty string, that is minimized to
// "en__POSIX"), minimized locale ID strings will be either the same length
// or shorter than their input.
CharString minimizedLocaleID;
int32_t minimizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
char* buffer;
int32_t reslen;
for (;;) {
buffer = minimizedLocaleID.getAppendBuffer(
/*minCapacity=*/minimizedLocaleIDCapacity,
/*desiredCapacityHint=*/minimizedLocaleIDCapacity,
minimizedLocaleIDCapacity,
status);
if (U_FAILURE(status)) {
return;
}
reslen = uloc_minimizeSubtags(
fullName,
buffer,
minimizedLocaleIDCapacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// Because of the internal minimal buffer size of CharString, I can't
// think of any input data for which this could possibly ever happen.
// Maybe it would be better replaced with an assertion instead?
minimizedLocaleIDCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
minimizedLocaleID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
{
CharStringByteSink sink(&minimizedLocaleID);
ulocimp_minimizeSubtags(fullName, sink, &status);
}
if (U_FAILURE(status)) {

View file

@ -19,6 +19,7 @@
* that then do not depend on resource bundle code and likely-subtags data.
*/
#include "unicode/bytestream.h"
#include "unicode/utypes.h"
#include "unicode/locid.h"
#include "unicode/putil.h"
@ -26,6 +27,8 @@
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/uscript.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "ulocimp.h"
@ -145,12 +148,10 @@ static const char* const unknownRegion = "ZZ";
* @param trailing Any trailing data to append to the new tag.
* @param trailingLength The length of the trailing data.
* @param alternateTags A string containing any alternate tags.
* @param tag The output buffer.
* @param tagCapacity The capacity of the output buffer.
* @param sink The output sink receiving the tag string.
* @param err A pointer to a UErrorCode for error reporting.
* @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
**/
static int32_t U_CALLCONV
static void U_CALLCONV
createTagStringWithAlternates(
const char* lang,
int32_t langLength,
@ -161,16 +162,13 @@ createTagStringWithAlternates(
const char* trailing,
int32_t trailingLength,
const char* alternateTags,
char* tag,
int32_t tagCapacity,
icu::ByteSink& sink,
UErrorCode* err) {
if (U_FAILURE(*err)) {
goto error;
}
else if (tag == NULL ||
tagCapacity <= 0 ||
langLength >= ULOC_LANG_CAPACITY ||
else if (langLength >= ULOC_LANG_CAPACITY ||
scriptLength >= ULOC_SCRIPT_CAPACITY ||
regionLength >= ULOC_COUNTRY_CAPACITY) {
goto error;
@ -184,7 +182,6 @@ createTagStringWithAlternates(
**/
char tagBuffer[ULOC_FULLNAME_CAPACITY];
int32_t tagLength = 0;
int32_t capacityRemaining = tagCapacity;
UBool regionAppended = FALSE;
if (langLength > 0) {
@ -311,55 +308,28 @@ createTagStringWithAlternates(
}
}
{
const int32_t toCopy =
tagLength >= tagCapacity ? tagCapacity : tagLength;
/**
* Copy the partial tag from our internal buffer to the supplied
* target.
**/
uprv_memcpy(
tag,
tagBuffer,
toCopy);
capacityRemaining -= toCopy;
}
/**
* Copy the partial tag from our internal buffer to the supplied
* target.
**/
sink.Append(tagBuffer, tagLength);
if (trailingLength > 0) {
if (*trailing != '@' && capacityRemaining > 0) {
tag[tagLength++] = '_';
--capacityRemaining;
if (capacityRemaining > 0 && !regionAppended) {
if (*trailing != '@') {
sink.Append("_", 1);
if (!regionAppended) {
/* extra separator is required */
tag[tagLength++] = '_';
--capacityRemaining;
sink.Append("_", 1);
}
}
if (capacityRemaining > 0) {
/*
* Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
* don't know if the user-supplied buffers overlap.
*/
const int32_t toCopy =
trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
uprv_memmove(
&tag[tagLength],
trailing,
toCopy);
}
/*
* Copy the trailing data into the supplied buffer.
*/
sink.Append(trailing, trailingLength);
}
tagLength += trailingLength;
return u_terminateChars(
tag,
tagCapacity,
tagLength,
err);
return;
}
error:
@ -373,8 +343,6 @@ error:
U_SUCCESS(*err)) {
*err = U_ILLEGAL_ARGUMENT_ERROR;
}
return -1;
}
/**
@ -398,12 +366,10 @@ error:
* @param regionLength The length of the region tag.
* @param trailing Any trailing data to append to the new tag.
* @param trailingLength The length of the trailing data.
* @param tag The output buffer.
* @param tagCapacity The capacity of the output buffer.
* @param sink The output sink receiving the tag string.
* @param err A pointer to a UErrorCode for error reporting.
* @return The length of the tag string, which may be greater than tagCapacity.
**/
static int32_t U_CALLCONV
static void U_CALLCONV
createTagString(
const char* lang,
int32_t langLength,
@ -413,11 +379,10 @@ createTagString(
int32_t regionLength,
const char* trailing,
int32_t trailingLength,
char* tag,
int32_t tagCapacity,
icu::ByteSink& sink,
UErrorCode* err)
{
return createTagStringWithAlternates(
createTagStringWithAlternates(
lang,
langLength,
script,
@ -427,8 +392,7 @@ createTagString(
trailing,
trailingLength,
NULL,
tag,
tagCapacity,
sink,
err);
}
@ -576,7 +540,7 @@ error:
goto exit;
}
static int32_t U_CALLCONV
static UBool U_CALLCONV
createLikelySubtagsString(
const char* lang,
int32_t langLength,
@ -586,17 +550,14 @@ createLikelySubtagsString(
int32_t regionLength,
const char* variants,
int32_t variantsLength,
char* tag,
int32_t tagCapacity,
UErrorCode* err)
{
icu::ByteSink& sink,
UErrorCode* err) {
/**
* ULOC_FULLNAME_CAPACITY will provide enough capacity
* that we can build a string that contains the language,
* script and region code without worrying about overrunning
* the user-supplied buffer.
**/
char tagBuffer[ULOC_FULLNAME_CAPACITY];
char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
if(U_FAILURE(*err)) {
@ -610,25 +571,28 @@ createLikelySubtagsString(
const char* likelySubtags = NULL;
createTagString(
lang,
langLength,
script,
scriptLength,
region,
regionLength,
NULL,
0,
tagBuffer,
sizeof(tagBuffer),
err);
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
createTagString(
lang,
langLength,
script,
scriptLength,
region,
regionLength,
NULL,
0,
sink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
likelySubtags =
findLikelySubtags(
tagBuffer,
tagBuffer.data(),
likelySubtagsBuffer,
sizeof(likelySubtagsBuffer),
err);
@ -640,7 +604,7 @@ createLikelySubtagsString(
/* Always use the language tag from the
maximal string, since it may be more
specific than the one provided. */
return createTagStringWithAlternates(
createTagStringWithAlternates(
NULL,
0,
NULL,
@ -650,9 +614,9 @@ createLikelySubtagsString(
variants,
variantsLength,
likelySubtags,
tag,
tagCapacity,
sink,
err);
return TRUE;
}
}
@ -663,25 +627,28 @@ createLikelySubtagsString(
const char* likelySubtags = NULL;
createTagString(
lang,
langLength,
script,
scriptLength,
NULL,
0,
NULL,
0,
tagBuffer,
sizeof(tagBuffer),
err);
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
createTagString(
lang,
langLength,
script,
scriptLength,
NULL,
0,
NULL,
0,
sink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
likelySubtags =
findLikelySubtags(
tagBuffer,
tagBuffer.data(),
likelySubtagsBuffer,
sizeof(likelySubtagsBuffer),
err);
@ -693,7 +660,7 @@ createLikelySubtagsString(
/* Always use the language tag from the
maximal string, since it may be more
specific than the one provided. */
return createTagStringWithAlternates(
createTagStringWithAlternates(
NULL,
0,
NULL,
@ -703,9 +670,9 @@ createLikelySubtagsString(
variants,
variantsLength,
likelySubtags,
tag,
tagCapacity,
sink,
err);
return TRUE;
}
}
@ -716,25 +683,28 @@ createLikelySubtagsString(
const char* likelySubtags = NULL;
createTagString(
lang,
langLength,
NULL,
0,
region,
regionLength,
NULL,
0,
tagBuffer,
sizeof(tagBuffer),
err);
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
createTagString(
lang,
langLength,
NULL,
0,
region,
regionLength,
NULL,
0,
sink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
likelySubtags =
findLikelySubtags(
tagBuffer,
tagBuffer.data(),
likelySubtagsBuffer,
sizeof(likelySubtagsBuffer),
err);
@ -746,7 +716,7 @@ createLikelySubtagsString(
/* Always use the language tag from the
maximal string, since it may be more
specific than the one provided. */
return createTagStringWithAlternates(
createTagStringWithAlternates(
NULL,
0,
script,
@ -756,9 +726,9 @@ createLikelySubtagsString(
variants,
variantsLength,
likelySubtags,
tag,
tagCapacity,
sink,
err);
return TRUE;
}
}
@ -768,25 +738,28 @@ createLikelySubtagsString(
{
const char* likelySubtags = NULL;
createTagString(
lang,
langLength,
NULL,
0,
NULL,
0,
NULL,
0,
tagBuffer,
sizeof(tagBuffer),
err);
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
createTagString(
lang,
langLength,
NULL,
0,
NULL,
0,
NULL,
0,
sink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
likelySubtags =
findLikelySubtags(
tagBuffer,
tagBuffer.data(),
likelySubtagsBuffer,
sizeof(likelySubtagsBuffer),
err);
@ -798,7 +771,7 @@ createLikelySubtagsString(
/* Always use the language tag from the
maximal string, since it may be more
specific than the one provided. */
return createTagStringWithAlternates(
createTagStringWithAlternates(
NULL,
0,
script,
@ -808,17 +781,13 @@ createLikelySubtagsString(
variants,
variantsLength,
likelySubtags,
tag,
tagCapacity,
sink,
err);
return TRUE;
}
}
return u_terminateChars(
tag,
tagCapacity,
0,
err);
return FALSE;
error:
@ -826,7 +795,7 @@ error:
*err = U_ILLEGAL_ARGUMENT_ERROR;
}
return -1;
return FALSE;
}
#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
@ -848,12 +817,10 @@ error:
} \
}
static int32_t
_uloc_addLikelySubtags(const char* localeID,
char* maximizedLocaleID,
int32_t maximizedLocaleIDCapacity,
UErrorCode* err)
{
static void
_uloc_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err) {
char lang[ULOC_LANG_CAPACITY];
int32_t langLength = sizeof(lang);
char script[ULOC_SCRIPT_CAPACITY];
@ -863,14 +830,12 @@ _uloc_addLikelySubtags(const char* localeID,
const char* trailing = "";
int32_t trailingLength = 0;
int32_t trailingIndex = 0;
int32_t resultLength = 0;
UBool success = FALSE;
if(U_FAILURE(*err)) {
goto error;
}
else if (localeID == NULL ||
maximizedLocaleID == NULL ||
maximizedLocaleIDCapacity <= 0) {
if (localeID == NULL) {
goto error;
}
@ -901,7 +866,7 @@ _uloc_addLikelySubtags(const char* localeID,
CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
resultLength =
success =
createLikelySubtagsString(
lang,
langLength,
@ -911,55 +876,32 @@ _uloc_addLikelySubtags(const char* localeID,
regionLength,
trailing,
trailingLength,
maximizedLocaleID,
maximizedLocaleIDCapacity,
sink,
err);
if (resultLength == 0) {
if (!success) {
const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
/*
* If we get here, we need to return localeID.
*/
uprv_memcpy(
maximizedLocaleID,
localeID,
localIDLength <= maximizedLocaleIDCapacity ?
localIDLength : maximizedLocaleIDCapacity);
resultLength =
u_terminateChars(
maximizedLocaleID,
maximizedLocaleIDCapacity,
localIDLength,
err);
sink.Append(localeID, localIDLength);
}
return resultLength;
return;
error:
if (!U_FAILURE(*err)) {
*err = U_ILLEGAL_ARGUMENT_ERROR;
}
return -1;
}
static int32_t
_uloc_minimizeSubtags(const char* localeID,
char* minimizedLocaleID,
int32_t minimizedLocaleIDCapacity,
UErrorCode* err)
{
/**
* ULOC_FULLNAME_CAPACITY will provide enough capacity
* that we can build a string that contains the language,
* script and region code without worrying about overrunning
* the user-supplied buffer.
**/
char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
static void
_uloc_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err) {
icu::CharString maximizedTagBuffer;
char lang[ULOC_LANG_CAPACITY];
int32_t langLength = sizeof(lang);
@ -974,9 +916,7 @@ _uloc_minimizeSubtags(const char* localeID,
if(U_FAILURE(*err)) {
goto error;
}
else if (localeID == NULL ||
minimizedLocaleID == NULL ||
minimizedLocaleIDCapacity <= 0) {
else if (localeID == NULL) {
goto error;
}
@ -1009,32 +949,32 @@ _uloc_minimizeSubtags(const char* localeID,
CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
createTagString(
lang,
langLength,
script,
scriptLength,
region,
regionLength,
NULL,
0,
maximizedTagBuffer,
maximizedTagBufferLength,
err);
if(U_FAILURE(*err)) {
goto error;
}
{
icu::CharString base;
{
icu::CharStringByteSink sink(&base);
createTagString(
lang,
langLength,
script,
scriptLength,
region,
regionLength,
NULL,
0,
sink,
err);
}
/**
* First, we need to first get the maximization
* from AddLikelySubtags.
**/
maximizedTagBufferLength =
uloc_addLikelySubtags(
maximizedTagBuffer,
maximizedTagBuffer,
maximizedTagBufferLength,
err);
/**
* First, we need to first get the maximization
* from AddLikelySubtags.
**/
{
icu::CharStringByteSink sink(&maximizedTagBuffer);
ulocimp_addLikelySubtags(base.data(), sink, err);
}
}
if(U_FAILURE(*err)) {
goto error;
@ -1044,9 +984,9 @@ _uloc_minimizeSubtags(const char* localeID,
* Start first with just the language.
**/
{
char tagBuffer[ULOC_FULLNAME_CAPACITY];
const int32_t tagBufferLength =
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@ -1056,19 +996,19 @@ _uloc_minimizeSubtags(const char* localeID,
0,
NULL,
0,
tagBuffer,
sizeof(tagBuffer),
sink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
else if (uprv_strnicmp(
maximizedTagBuffer,
tagBuffer,
tagBufferLength) == 0) {
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
return createTagString(
createTagString(
lang,
langLength,
NULL,
@ -1077,9 +1017,9 @@ _uloc_minimizeSubtags(const char* localeID,
0,
trailing,
trailingLength,
minimizedLocaleID,
minimizedLocaleIDCapacity,
sink,
err);
return;
}
}
@ -1088,9 +1028,9 @@ _uloc_minimizeSubtags(const char* localeID,
**/
if (regionLength > 0) {
char tagBuffer[ULOC_FULLNAME_CAPACITY];
const int32_t tagBufferLength =
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@ -1100,19 +1040,19 @@ _uloc_minimizeSubtags(const char* localeID,
regionLength,
NULL,
0,
tagBuffer,
sizeof(tagBuffer),
sink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
else if (uprv_strnicmp(
maximizedTagBuffer,
tagBuffer,
tagBufferLength) == 0) {
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
return createTagString(
createTagString(
lang,
langLength,
NULL,
@ -1121,9 +1061,9 @@ _uloc_minimizeSubtags(const char* localeID,
regionLength,
trailing,
trailingLength,
minimizedLocaleID,
minimizedLocaleIDCapacity,
sink,
err);
return;
}
}
@ -1133,9 +1073,9 @@ _uloc_minimizeSubtags(const char* localeID,
* maximal version that we already have.
**/
if (scriptLength > 0 && regionLength > 0) {
char tagBuffer[ULOC_FULLNAME_CAPACITY];
const int32_t tagBufferLength =
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@ -1145,19 +1085,19 @@ _uloc_minimizeSubtags(const char* localeID,
0,
NULL,
0,
tagBuffer,
sizeof(tagBuffer),
sink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
else if (uprv_strnicmp(
maximizedTagBuffer,
tagBuffer,
tagBufferLength) == 0) {
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
return createTagString(
createTagString(
lang,
langLength,
script,
@ -1166,9 +1106,9 @@ _uloc_minimizeSubtags(const char* localeID,
0,
trailing,
trailingLength,
minimizedLocaleID,
minimizedLocaleIDCapacity,
sink,
err);
return;
}
}
@ -1177,18 +1117,8 @@ _uloc_minimizeSubtags(const char* localeID,
* If we got here, return the locale ID parameter.
**/
const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
uprv_memcpy(
minimizedLocaleID,
localeID,
localeIDLength <= minimizedLocaleIDCapacity ?
localeIDLength : minimizedLocaleIDCapacity);
return u_terminateChars(
minimizedLocaleID,
minimizedLocaleIDCapacity,
localeIDLength,
err);
sink.Append(localeID, localeIDLength);
return;
}
error:
@ -1196,10 +1126,6 @@ error:
if (!U_FAILURE(*err)) {
*err = U_ILLEGAL_ARGUMENT_ERROR;
}
return -1;
}
static UBool
@ -1230,51 +1156,83 @@ do_canonicalize(const char* localeID,
}
U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char* localeID,
char* maximizedLocaleID,
int32_t maximizedLocaleIDCapacity,
UErrorCode* err)
{
uloc_addLikelySubtags(const char* localeID,
char* maximizedLocaleID,
int32_t maximizedLocaleIDCapacity,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(
maximizedLocaleID, maximizedLocaleIDCapacity);
ulocimp_addLikelySubtags(localeID, sink, status);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*status)) {
return sink.Overflowed() ? reslen : -1;
}
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(
maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
}
return reslen;
}
U_CAPI void U_EXPORT2
ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
char localeBuffer[ULOC_FULLNAME_CAPACITY];
if (!do_canonicalize(
localeID,
localeBuffer,
sizeof(localeBuffer),
err)) {
return -1;
if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
_uloc_addLikelySubtags(localeBuffer, sink, status);
}
else {
return _uloc_addLikelySubtags(
localeBuffer,
maximizedLocaleID,
maximizedLocaleIDCapacity,
err);
}
}
U_CAPI int32_t U_EXPORT2
uloc_minimizeSubtags(const char* localeID,
char* minimizedLocaleID,
int32_t minimizedLocaleIDCapacity,
UErrorCode* err)
{
uloc_minimizeSubtags(const char* localeID,
char* minimizedLocaleID,
int32_t minimizedLocaleIDCapacity,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(
minimizedLocaleID, minimizedLocaleIDCapacity);
ulocimp_minimizeSubtags(localeID, sink, status);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*status)) {
return sink.Overflowed() ? reslen : -1;
}
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(
minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
}
return reslen;
}
U_CAPI void U_EXPORT2
ulocimp_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
char localeBuffer[ULOC_FULLNAME_CAPACITY];
if (!do_canonicalize(
localeID,
localeBuffer,
sizeof(localeBuffer),
err)) {
return -1;
if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
_uloc_minimizeSubtags(localeBuffer, sink, status);
}
else {
return _uloc_minimizeSubtags(
localeBuffer,
minimizedLocaleID,
minimizedLocaleIDCapacity,
err);
}
}
// Pairs of (language subtag, + or -) for finding out fast if common languages
@ -1282,7 +1240,7 @@ uloc_minimizeSubtags(const char* localeID,
static const char LANG_DIR_STRING[] =
"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
// Implemented here because this calls uloc_addLikelySubtags().
// Implemented here because this calls ulocimp_addLikelySubtags().
U_CAPI UBool U_EXPORT2
uloc_isRightToLeft(const char *locale) {
UErrorCode errorCode = U_ZERO_ERROR;
@ -1309,12 +1267,15 @@ uloc_isRightToLeft(const char *locale) {
}
// Otherwise, find the likely script.
errorCode = U_ZERO_ERROR;
char likely[ULOC_FULLNAME_CAPACITY];
(void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
icu::CharString likely;
{
icu::CharStringByteSink sink(&likely);
ulocimp_addLikelySubtags(locale, sink, &errorCode);
}
if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
return FALSE;
}
scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
scriptLength == 0) {
return FALSE;
@ -1365,11 +1326,14 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
rgLen = 0;
} else if (rgLen == 0 && inferRegion) {
// no unicode_region_subtag but inferRegion TRUE, try likely subtags
char locBuf[ULOC_FULLNAME_CAPACITY];
rgStatus = U_ZERO_ERROR;
(void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
icu::CharString locBuf;
{
icu::CharStringByteSink sink(&locBuf);
ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
}
if (U_SUCCESS(rgStatus)) {
rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
if (U_FAILURE(*status)) {
rgLen = 0;
}

View file

@ -145,6 +145,74 @@ U_CAPI int32_t U_EXPORT2
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
char *region, int32_t regionCapacity, UErrorCode* status);
/**
* Add the likely subtags for a provided locale ID, per the algorithm described
* in the following CLDR technical report:
*
* http://www.unicode.org/reports/tr35/#Likely_Subtags
*
* If localeID is already in the maximal form, or there is no data available
* for maximization, it will be copied to the output buffer. For example,
* "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
*
* Examples:
*
* "en" maximizes to "en_Latn_US"
*
* "de" maximizes to "de_Latn_US"
*
* "sr" maximizes to "sr_Cyrl_RS"
*
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
*
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
*
* @param localeID The locale to maximize
* @param sink The output sink receiving the maximized locale
* @param err Error information if maximizing the locale failed. If the length
* of the localeID and the null-terminator is greater than the maximum allowed size,
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
* @internal ICU 64
*/
U_STABLE void U_EXPORT2
ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err);
/**
* Minimize the subtags for a provided locale ID, per the algorithm described
* in the following CLDR technical report:
*
* http://www.unicode.org/reports/tr35/#Likely_Subtags
*
* If localeID is already in the minimal form, or there is no data available
* for minimization, it will be copied to the output buffer. Since the
* minimization algorithm relies on proper maximization, see the comments
* for ulocimp_addLikelySubtags for reasons why there might not be any data.
*
* Examples:
*
* "en_Latn_US" minimizes to "en"
*
* "de_Latn_US" minimizes to "de"
*
* "sr_Cyrl_RS" minimizes to "sr"
*
* "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
* script, and minimizing to "zh" would imply "zh_Hans_CN".)
*
* @param localeID The locale to minimize
* @param sink The output sink receiving the maximized locale
* @param err Error information if minimizing the locale failed. If the length
* of the localeID and the null-terminator is greater than the maximum allowed size,
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
* @internal ICU 64
*/
U_STABLE void U_EXPORT2
ulocimp_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err);
U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID);

View file

@ -18,8 +18,11 @@
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#include "unicode/uloc.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "ulocimp.h"
static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
@ -98,7 +101,6 @@ uscript_getCode(const char* nameOrAbbrOrLocale,
int32_t capacity,
UErrorCode* err){
UBool triedCode;
char likely[ULOC_FULLNAME_CAPACITY];
UErrorCode internalErrorCode;
int32_t length;
@ -125,10 +127,13 @@ uscript_getCode(const char* nameOrAbbrOrLocale,
if(U_FAILURE(*err) || length != 0) {
return length;
}
(void)uloc_addLikelySubtags(nameOrAbbrOrLocale,
likely, UPRV_LENGTHOF(likely), &internalErrorCode);
icu::CharString likely;
{
icu::CharStringByteSink sink(&likely);
ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
}
if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
length = getCodesFromLocale(likely, fillIn, capacity, err);
length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
if(U_FAILURE(*err) || length != 0) {
return length;
}

View file

@ -3796,18 +3796,16 @@ Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode&
// 2). If the locale has a script designation then we ignore it,
// then remove it ( i.e. "en_Latn_US" becomes "en_US" )
char minLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 };
UErrorCode myStatus = U_ZERO_ERROR;
uloc_minimizeSubtags(desiredLocale.getName(),minLocaleID,ULOC_FULLNAME_CAPACITY,&myStatus);
Locale min = Locale::createFromName(minLocaleID);
Locale min(desiredLocale);
min.minimizeSubtags(myStatus);
Locale useLocale;
if ( uprv_strlen(desiredLocale.getCountry()) == 0 ||
(uprv_strlen(desiredLocale.getScript()) > 0 && uprv_strlen(min.getScript()) == 0) ) {
char maxLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 };
myStatus = U_ZERO_ERROR;
uloc_addLikelySubtags(desiredLocale.getName(),maxLocaleID,ULOC_FULLNAME_CAPACITY,&myStatus);
Locale max = Locale::createFromName(maxLocaleID);
Locale max(desiredLocale);
max.addLikelySubtags(myStatus);
useLocale = Locale(max.getLanguage(),max.getCountry());
} else {
useLocale = desiredLocale;

View file

@ -615,16 +615,11 @@ U_CFUNC void U_CALLCONV DateTimePatternGenerator::loadAllowedHourFormatsData(UEr
void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErrorCode &status) {
if (U_FAILURE(status)) { return; }
const char *localeID = locale.getName();
char maxLocaleID[ULOC_FULLNAME_CAPACITY];
int32_t length = uloc_addLikelySubtags(localeID, maxLocaleID, ULOC_FULLNAME_CAPACITY, &status);
Locale maxLocale(locale);
maxLocale.addLikelySubtags(status);
if (U_FAILURE(status)) {
return;
} else if (length == ULOC_FULLNAME_CAPACITY) { // no room for NUL
status = U_BUFFER_OVERFLOW_ERROR;
return;
}
Locale maxLocale = Locale(maxLocaleID);
const char *country = maxLocale.getCountry();
if (*country == '\0') { country = "001"; }