mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-22547 fix addLikelySubtags for 4 chars script code
Also fix ICU-22546 to correct the comments in the API doc and add additional unit tests
This commit is contained in:
parent
e04f4427dc
commit
92eeb45811
7 changed files with 127 additions and 18 deletions
|
@ -467,7 +467,14 @@ _uloc_addLikelySubtags(const char* localeID,
|
|||
goto error;
|
||||
}
|
||||
if (langLength > 3) {
|
||||
goto error;
|
||||
if (langLength == 4 && scriptLength == 0) {
|
||||
langLength = 0;
|
||||
scriptLength = 4;
|
||||
uprv_memcpy(script, lang, 4);
|
||||
lang[0] = '\0';
|
||||
} else {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find the length of the trailing portion. */
|
||||
|
|
|
@ -518,20 +518,20 @@ public:
|
|||
* If this Locale is already in the maximal form, or not valid, or there is
|
||||
* no data available for maximization, the Locale will be unchanged.
|
||||
*
|
||||
* For example, "und-Zzzz" cannot be maximized, since there is no
|
||||
* For example, "sh" cannot be maximized, since there is no
|
||||
* reasonable maximization.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* "und_Zzzz" maximizes to "en_Latn_US"
|
||||
*
|
||||
* "en" maximizes to "en_Latn_US"
|
||||
*
|
||||
* "de" maximizes to "de_Latn_US"
|
||||
* "de" maximizes to "de_Latn_DE"
|
||||
*
|
||||
* "sr" maximizes to "sr_Cyrl_RS"
|
||||
*
|
||||
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
|
||||
*
|
||||
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
|
||||
* "zh_Hani" maximizes to "zh_Hani_CN"
|
||||
*
|
||||
* @param status error information if maximizing this Locale failed.
|
||||
* If this Locale is not well-formed, the error code is
|
||||
|
|
|
@ -1158,19 +1158,20 @@ uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
|
|||
*
|
||||
* If localeID is already in the maximal form, or there is no data available
|
||||
* for maximization, it will be copied to the output buffer. For example,
|
||||
* "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
|
||||
* "sh" cannot be maximized, since there is no reasonable maximization.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* "und_Zzzz" maximizes to "en_Latn_US"
|
||||
*
|
||||
* "en" maximizes to "en_Latn_US"
|
||||
*
|
||||
* "de" maximizes to "de_Latn_US"
|
||||
* "de" maximizes to "de_Latn_DE"
|
||||
*
|
||||
* "sr" maximizes to "sr_Cyrl_RS"
|
||||
*
|
||||
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
|
||||
* "zh_Hani" maximizes to "zh_Hani_CN"
|
||||
*
|
||||
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
|
||||
*
|
||||
* @param localeID The locale to maximize
|
||||
* @param maximizedLocaleID The maximized locale
|
||||
|
|
|
@ -3782,6 +3782,38 @@ const char* const basic_maximize_data[][2] = {
|
|||
}, {
|
||||
"_DE@em=emoji",
|
||||
"de_Latn_DE@em=emoji"
|
||||
}, {
|
||||
// ICU-22547
|
||||
// unicode_language_id = "root" |
|
||||
// (unicode_language_subtag (sep unicode_script_subtag)? | unicode_script_subtag)
|
||||
// (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
|
||||
// so "aaaa" is a well-formed unicode_language_id
|
||||
"aaaa",
|
||||
"aaaa",
|
||||
}, {
|
||||
// ICU-22546
|
||||
"und-Zzzz",
|
||||
"en_Latn_US" // If change, please also update common/unicode/uloc.h
|
||||
}, {
|
||||
// ICU-22546
|
||||
"en",
|
||||
"en_Latn_US" // If change, please also update common/unicode/uloc.h
|
||||
}, {
|
||||
// ICU-22546
|
||||
"de",
|
||||
"de_Latn_DE" // If change, please also update common/unicode/uloc.h
|
||||
}, {
|
||||
// ICU-22546
|
||||
"sr",
|
||||
"sr_Cyrl_RS" // If change, please also update common/unicode/uloc.h
|
||||
}, {
|
||||
// ICU-22546
|
||||
"sh",
|
||||
"sh" // If change, please also update common/unicode/uloc.h
|
||||
}, {
|
||||
// ICU-22546
|
||||
"zh_Hani",
|
||||
"zh_Hani_CN" // If change, please also update common/unicode/uloc.h
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -6013,7 +6045,7 @@ static void TestLikelySubtags()
|
|||
}
|
||||
}
|
||||
else if (uprv_stricmp(maximal, buffer) != 0) {
|
||||
log_err(" maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %s\n", maximal, minimal, buffer);
|
||||
log_err("1 maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %s\n", maximal, minimal, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6066,7 +6098,7 @@ static void TestLikelySubtags()
|
|||
}
|
||||
}
|
||||
else if (uprv_stricmp(maximal, buffer) != 0) {
|
||||
log_err(" maximal doesn't match expected \"%s\" in uloc_addLikelySubtags(), minimal \"%s\" = \"%s\"\n", maximal, minimal, buffer);
|
||||
log_err("2 maximal doesn't match expected \"%s\" in uloc_addLikelySubtags(), minimal \"%s\" = \"%s\"\n", maximal, minimal, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6128,7 +6160,7 @@ static void TestLikelySubtags()
|
|||
}
|
||||
else if (status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
if (uprv_strnicmp(maximal, buffer, bufferSize) != 0) {
|
||||
log_err(" maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %*s\n",
|
||||
log_err("3 maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %*s\n",
|
||||
maximal, minimal, (int)sizeof(buffer), buffer);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3842,6 +3842,45 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
|
|||
"und_US",
|
||||
"en_Latn_US",
|
||||
"en"
|
||||
}, {
|
||||
// ICU-22547
|
||||
// unicode_language_id = "root" |
|
||||
// (unicode_language_subtag (sep unicode_script_subtag)? | unicode_script_subtag)
|
||||
// (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
|
||||
// so "aaaa" is a well-formed unicode_language_id
|
||||
"aaaa",
|
||||
"aaaa",
|
||||
"aaaa",
|
||||
}, {
|
||||
// ICU-22546
|
||||
"und-Zzzz",
|
||||
"en_Latn_US", // If change, please also update common/unicode/locid.h
|
||||
"en"
|
||||
}, {
|
||||
// ICU-22546
|
||||
"en",
|
||||
"en_Latn_US", // If change, please also update common/unicode/locid.h
|
||||
"en"
|
||||
}, {
|
||||
// ICU-22546
|
||||
"de",
|
||||
"de_Latn_DE", // If change, please also update common/unicode/locid.h
|
||||
"de"
|
||||
}, {
|
||||
// ICU-22546
|
||||
"sr",
|
||||
"sr_Cyrl_RS", // If change, please also update common/unicode/locid.h
|
||||
"sr"
|
||||
}, {
|
||||
// ICU-22546
|
||||
"sh",
|
||||
"sh",// If change, please also update common/unicode/locid.h
|
||||
"sh"
|
||||
}, {
|
||||
// ICU-22546
|
||||
"zh_Hani",
|
||||
"zh_Hani_CN", // If change, please also update common/unicode/locid.h
|
||||
"zh_Hani"
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1917,6 +1917,38 @@ public class ULocaleTest extends TestFmwk {
|
|||
}, {
|
||||
"zzz",
|
||||
""
|
||||
}, {
|
||||
// ICU-22547
|
||||
// unicode_language_id = "root" |
|
||||
// (unicode_language_subtag (sep unicode_script_subtag)? | unicode_script_subtag)
|
||||
// (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
|
||||
// so "aaaa" is a well-formed unicode_language_id
|
||||
"aaaa",
|
||||
"aaaa",
|
||||
}, {
|
||||
// ICU-22546
|
||||
"und-Zzzz",
|
||||
"en_Latn_US" // If change, please also update ULocale.java
|
||||
}, {
|
||||
// ICU-22546
|
||||
"en",
|
||||
"en_Latn_US" // If change, please also update ULocale.java
|
||||
}, {
|
||||
// ICU-22546
|
||||
"de",
|
||||
"de_Latn_DE" // If change, please also update ULocale.java
|
||||
}, {
|
||||
// ICU-22546
|
||||
"sr",
|
||||
"sr_Cyrl_RS" // If change, please also update ULocale.java
|
||||
}, {
|
||||
// ICU-22546
|
||||
"sh",
|
||||
"sh" // If change, please also update ULocale.java
|
||||
}, {
|
||||
// ICU-22546
|
||||
"zh_Hani",
|
||||
"zh_Hani_CN" // If change, please also update ULocale.java
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -2694,20 +2694,18 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
*
|
||||
* If the provided ULocale instance is already in the maximal form, or there is no
|
||||
* data available available for maximization, it will be returned. For example,
|
||||
* "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
|
||||
* "sh" cannot be maximized, since there is no reasonable maximization.
|
||||
* Otherwise, a new ULocale instance with the maximal form is returned.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* "en" maximizes to "en_Latn_US"
|
||||
*
|
||||
* "de" maximizes to "de_Latn_US"
|
||||
* "de" maximizes to "de_Latn_DE"
|
||||
*
|
||||
* "sr" maximizes to "sr_Cyrl_RS"
|
||||
*
|
||||
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
|
||||
*
|
||||
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
|
||||
* "zh_Hani" maximizes to "zh_Hani_CN"
|
||||
*
|
||||
* @param loc The ULocale to maximize
|
||||
* @return The maximized ULocale instance.
|
||||
|
|
Loading…
Add table
Reference in a new issue