mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-22520 Use a ByteSink append buffer instead of a local CharString.
These functions that eventually write their output to a ByteSink need a small temporary buffer for processing the subtag they're about to write and currently use a local CharString object to provide this buffer, which then gets written to the ByteSink and discarded. This intermediate step is unnecessary as a ByteSink can provide an append buffer which can be used instead, eliminating the need to allocate a local temporary buffer and to copy the data around. This approach also makes it natural to split the processing into two steps, first calculating the length of the subtag, then processing it, which makes it possible to return early when no output is requested.
This commit is contained in:
parent
a210fc8351
commit
699555a5bd
1 changed files with 98 additions and 76 deletions
|
@ -1172,7 +1172,7 @@ _getLanguage(const char* localeID,
|
|||
const char** pEnd,
|
||||
UErrorCode& status) {
|
||||
U_ASSERT(pEnd != nullptr);
|
||||
CharString result;
|
||||
*pEnd = localeID;
|
||||
|
||||
if (uprv_stricmp(localeID, "root") == 0) {
|
||||
localeID += 4;
|
||||
|
@ -1184,104 +1184,128 @@ _getLanguage(const char* localeID,
|
|||
localeID += 3;
|
||||
}
|
||||
|
||||
constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1; // Minus NUL.
|
||||
|
||||
/* if it starts with i- or x- then copy that prefix */
|
||||
if(_isIDPrefix(localeID)) {
|
||||
result.append((char)uprv_tolower(*localeID), status);
|
||||
result.append('-', status);
|
||||
localeID+=2;
|
||||
int32_t len = _isIDPrefix(localeID) ? 2 : 0;
|
||||
while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
|
||||
if (len == MAXLEN) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
len++;
|
||||
}
|
||||
|
||||
/* copy the language as far as possible and count its length */
|
||||
while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
|
||||
result.append((char)uprv_tolower(*localeID), status);
|
||||
localeID++;
|
||||
*pEnd = localeID + len;
|
||||
if (sink == nullptr || len == 0) { return; }
|
||||
|
||||
int32_t minCapacity = uprv_max(len, 4); // Minimum 3 letters plus NUL.
|
||||
char scratch[MAXLEN];
|
||||
int32_t capacity = 0;
|
||||
char* buffer = sink->GetAppendBuffer(
|
||||
minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
|
||||
|
||||
for (int32_t i = 0; i < len; ++i) {
|
||||
buffer[i] = uprv_tolower(localeID[i]);
|
||||
}
|
||||
if (_isIDSeparator(localeID[1])) {
|
||||
buffer[1] = '-';
|
||||
}
|
||||
|
||||
if(result.length()==3) {
|
||||
if (len == 3) {
|
||||
/* convert 3 character code to 2 character code if possible *CWB*/
|
||||
int32_t offset = _findIndex(LANGUAGES_3, result.data());
|
||||
U_ASSERT(capacity >= 4);
|
||||
buffer[3] = '\0';
|
||||
int32_t offset = _findIndex(LANGUAGES_3, buffer);
|
||||
if(offset>=0) {
|
||||
result.clear();
|
||||
result.append(LANGUAGES[offset], status);
|
||||
const char* const alias = LANGUAGES[offset];
|
||||
sink->Append(alias, (int32_t)uprv_strlen(alias));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (sink != nullptr && !result.isEmpty()) {
|
||||
sink->Append(result.data(), result.length());
|
||||
}
|
||||
|
||||
*pEnd = localeID;
|
||||
sink->Append(buffer, len);
|
||||
}
|
||||
|
||||
static void
|
||||
_getScript(const char* localeID,
|
||||
ByteSink* sink,
|
||||
const char** pEnd,
|
||||
UErrorCode& status) {
|
||||
const char** pEnd) {
|
||||
U_ASSERT(pEnd != nullptr);
|
||||
CharString result;
|
||||
int32_t idLen = 0;
|
||||
|
||||
*pEnd = localeID;
|
||||
|
||||
/* copy the second item as far as possible and count its length */
|
||||
while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
|
||||
&& uprv_isASCIILetter(localeID[idLen])) {
|
||||
idLen++;
|
||||
constexpr int32_t LENGTH = 4;
|
||||
|
||||
int32_t len = 0;
|
||||
while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
|
||||
uprv_isASCIILetter(localeID[len])) {
|
||||
if (len == LENGTH) { return; }
|
||||
len++;
|
||||
}
|
||||
if (len != LENGTH) { return; }
|
||||
|
||||
*pEnd = localeID + LENGTH;
|
||||
if (sink == nullptr) { return; }
|
||||
|
||||
char scratch[LENGTH];
|
||||
int32_t capacity = 0;
|
||||
char* buffer = sink->GetAppendBuffer(
|
||||
LENGTH, LENGTH, scratch, UPRV_LENGTHOF(scratch), &capacity);
|
||||
|
||||
buffer[0] = uprv_toupper(localeID[0]);
|
||||
for (int32_t i = 1; i < LENGTH; ++i) {
|
||||
buffer[i] = uprv_tolower(localeID[i]);
|
||||
}
|
||||
|
||||
/* If it's exactly 4 characters long, then it's a script and not a country. */
|
||||
if (idLen == 4) {
|
||||
int32_t i;
|
||||
*pEnd = localeID + idLen;
|
||||
if (idLen >= 1) {
|
||||
result.append((char)uprv_toupper(*(localeID++)), status);
|
||||
}
|
||||
for (i = 1; i < idLen; i++) {
|
||||
result.append((char)uprv_tolower(*(localeID++)), status);
|
||||
}
|
||||
}
|
||||
|
||||
if (sink != nullptr && !result.isEmpty()) {
|
||||
sink->Append(result.data(), result.length());
|
||||
}
|
||||
sink->Append(buffer, LENGTH);
|
||||
}
|
||||
|
||||
static void
|
||||
_getRegion(const char* localeID,
|
||||
ByteSink* sink,
|
||||
const char** pEnd,
|
||||
UErrorCode& status) {
|
||||
const char** pEnd) {
|
||||
U_ASSERT(pEnd != nullptr);
|
||||
CharString result;
|
||||
int32_t idLen=0;
|
||||
|
||||
/* copy the country as far as possible and count its length */
|
||||
while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
|
||||
result.append((char)uprv_toupper(localeID[idLen]), status);
|
||||
idLen++;
|
||||
}
|
||||
|
||||
/* the country should be either length 2 or 3 */
|
||||
if (idLen == 2 || idLen == 3) {
|
||||
/* convert 3 character code to 2 character code if possible *CWB*/
|
||||
if(idLen==3) {
|
||||
int32_t offset = _findIndex(COUNTRIES_3, result.data());
|
||||
if(offset>=0) {
|
||||
result.clear();
|
||||
result.append(COUNTRIES[offset], status);
|
||||
}
|
||||
}
|
||||
localeID+=idLen;
|
||||
} else {
|
||||
result.clear();
|
||||
}
|
||||
|
||||
if (sink != nullptr && !result.isEmpty()) {
|
||||
sink->Append(result.data(), result.length());
|
||||
}
|
||||
|
||||
*pEnd = localeID;
|
||||
|
||||
constexpr int32_t MINLEN = 2;
|
||||
constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1; // Minus NUL.
|
||||
|
||||
int32_t len = 0;
|
||||
while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
|
||||
if (len == MAXLEN) { return; }
|
||||
len++;
|
||||
}
|
||||
if (len < MINLEN) { return; }
|
||||
|
||||
*pEnd = localeID + len;
|
||||
if (sink == nullptr) { return; }
|
||||
|
||||
char scratch[ULOC_COUNTRY_CAPACITY];
|
||||
int32_t capacity = 0;
|
||||
char* buffer = sink->GetAppendBuffer(
|
||||
ULOC_COUNTRY_CAPACITY,
|
||||
ULOC_COUNTRY_CAPACITY,
|
||||
scratch,
|
||||
UPRV_LENGTHOF(scratch),
|
||||
&capacity);
|
||||
|
||||
for (int32_t i = 0; i < len; ++i) {
|
||||
buffer[i] = uprv_toupper(localeID[i]);
|
||||
}
|
||||
|
||||
if (len == 3) {
|
||||
/* convert 3 character code to 2 character code if possible *CWB*/
|
||||
U_ASSERT(capacity >= 4);
|
||||
buffer[3] = '\0';
|
||||
int32_t offset = _findIndex(COUNTRIES_3, buffer);
|
||||
if(offset>=0) {
|
||||
const char* const alias = COUNTRIES[offset];
|
||||
sink->Append(alias, (int32_t)uprv_strlen(alias));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sink->Append(buffer, len);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1475,8 +1499,7 @@ ulocimp_getSubtags(
|
|||
if (_isIDSeparator(*localeID)) {
|
||||
const char* begin = localeID + 1;
|
||||
const char* end = nullptr;
|
||||
_getScript(begin, script, &end, status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
_getScript(begin, script, &end);
|
||||
U_ASSERT(end != nullptr);
|
||||
if (end != begin) {
|
||||
localeID = end;
|
||||
|
@ -1489,8 +1512,7 @@ ulocimp_getSubtags(
|
|||
if (_isIDSeparator(*localeID)) {
|
||||
const char* begin = localeID + 1;
|
||||
const char* end = nullptr;
|
||||
_getRegion(begin, region, &end, status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
_getRegion(begin, region, &end);
|
||||
U_ASSERT(end != nullptr);
|
||||
if (end != begin) {
|
||||
hasRegion = true;
|
||||
|
|
Loading…
Add table
Reference in a new issue