ICU-22364 Modify ulocimp_getRegionForSupplementalData() to ignore the subdivision code, rather than requiring it to

be "zzzz".
This commit is contained in:
Rich Gillam 2023-08-10 16:54:19 -07:00 committed by Rich Gillam
parent 248b1c2a13
commit 52177cc8c7
5 changed files with 50 additions and 11 deletions

View file

@ -811,15 +811,19 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
// First check for rg keyword value
int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
if (U_FAILURE(rgStatus) || rgLen != 6) {
if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) {
rgLen = 0;
} else {
// rgBuf guaranteed to be zero terminated here, with text len 6
char *rgPtr = rgBuf;
for (; *rgPtr!= 0; rgPtr++) {
*rgPtr = uprv_toupper(*rgPtr);
// chop off the subdivision code (which will generally be "zzzz" anyway)
if (uprv_isASCIILetter(rgBuf[0])) {
rgLen = 2;
rgBuf[0] = uprv_toupper(rgBuf[0]);
rgBuf[1] = uprv_toupper(rgBuf[1]);
} else {
// assume three-digit region code
rgLen = 3;
}
rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
}
if (rgLen == 0) {

View file

@ -101,7 +101,10 @@ static const UCalGetTypeTest ucalGetTypeTests[] = {
{ "fr_CH", UCAL_DEFAULT, "gregorian" },
{ "fr_SA", UCAL_DEFAULT, "islamic-umalqura" },
{ "fr_CH@rg=sazzzz", UCAL_DEFAULT, "islamic-umalqura" },
{ "fr_CH@rg=sa14", UCAL_DEFAULT, "islamic-umalqura" },
{ "fr_CH@calendar=japanese;rg=sazzzz", UCAL_DEFAULT, "japanese" },
{ "fr_CH@rg=twcyi", UCAL_DEFAULT, "gregorian" }, // test for ICU-22364
{ "fr_CH@rg=ugw", UCAL_DEFAULT, "gregorian" }, // test for ICU-22364
{ "fr_TH@rg=SA", UCAL_DEFAULT, "buddhist" }, /* ignore malformed rg tag */
{ "th@rg=SA", UCAL_DEFAULT, "buddhist" }, /* ignore malformed rg tag */
{ "", UCAL_GREGORIAN, "gregorian" },
@ -1613,7 +1616,7 @@ void TestGregorianChange() {
}
static void TestGetKeywordValuesForLocale() {
#define PREFERRED_SIZE 16
#define PREFERRED_SIZE 26
#define MAX_NUMBER_OF_KEYWORDS 5
const char *PREFERRED[PREFERRED_SIZE][MAX_NUMBER_OF_KEYWORDS+1] = {
{ "root", "gregorian", NULL, NULL, NULL, NULL },
@ -1632,8 +1635,20 @@ static void TestGetKeywordValuesForLocale() {
{ "zh_TW", "gregorian", "roc", "chinese", NULL, NULL },
{ "ar_IR", "persian", "gregorian", "islamic", "islamic-civil", "islamic-tbla" },
{ "th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa", NULL },
// tests for ICU-22364
{ "zh_CN@rg=TW", "gregorian", "chinese", NULL, NULL, NULL }, // invalid subdivision code
{ "zh_CN@rg=TWzzzz", "gregorian", "roc", "chinese", NULL, NULL }, // whole region
{ "zh_TW@rg=TWxxxx", "gregorian", "roc", "chinese", NULL, NULL }, // invalid subdivision code (ignored)
{ "zh_TW@rg=ARa", "gregorian", NULL, NULL, NULL, NULL }, // single-letter subdivision code
{ "zh_TW@rg=AT1", "gregorian", NULL, NULL, NULL, NULL }, // single-digit subdivision code
{ "zh_TW@rg=USca", "gregorian", NULL, NULL, NULL, NULL }, // two-letter subdivision code
{ "zh_TW@rg=IT53", "gregorian", NULL, NULL, NULL, NULL }, // two-digit subdivision code
{ "zh_TW@rg=AUnsw", "gregorian", NULL, NULL, NULL, NULL }, // three-letter subdivision code
{ "zh_TW@rg=EE130", "gregorian", NULL, NULL, NULL, NULL }, // three-digit subdivision code
{ "zh_TW@rg=417zzzz", "gregorian", NULL, NULL, NULL, NULL }, // three-digit region code
};
const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4 };
const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4, 2, 3, 3, 1, 1, 1, 1, 1, 1, 1 };
UErrorCode status = U_ZERO_ERROR;
int32_t i, size, j;
UEnumeration *all, *pref;

View file

@ -982,10 +982,12 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
public static String getRegionForSupplementalData(
ULocale locale, boolean inferRegion) {
String region = locale.getKeywordValue("rg");
if (region != null && region.length() == 6) {
String regionUpper = AsciiUtil.toUpperString(region);
if (regionUpper.endsWith("ZZZZ")) {
return regionUpper.substring(0,2);
if (region != null && region.length() >= 3 && region.length() <= 7) {
if (Character.isLetter(region.charAt(0))) {
return AsciiUtil.toUpperString(region.substring(0, 2));
} else {
// assume three-digit region code
return region.substring(0, 3);
}
}
region = locale.getCountry();

View file

@ -2182,6 +2182,18 @@ public class CalendarRegressionTest extends com.ibm.icu.dev.test.TestFmwk {
{"zh_TW", "gregorian", "roc", "chinese"},
{"ar_IR", "persian", "gregorian", "islamic", "islamic-civil", "islamic-tbla"},
{"th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa"},
// tests for ICU-22364
{ "zh_CN@rg=TW", "gregorian", "chinese" }, // invalid subdivision code
{ "zh_CN@rg=TWzzzz", "gregorian", "roc", "chinese", }, // whole region
{ "zh_TW@rg=TWxxxx", "gregorian", "roc", "chinese" }, // invalid subdivision code (ignored)
{ "zh_TW@rg=ARa", "gregorian" }, // single-letter subdivision code
{ "zh_TW@rg=AT1", "gregorian" }, // single-digit subdivision code
{ "zh_TW@rg=USca", "gregorian" }, // two-letter subdivision code
{ "zh_TW@rg=IT53", "gregorian" }, // two-digit subdivision code
{ "zh_TW@rg=AUnsw", "gregorian" }, // three-letter subdivision code
{ "zh_TW@rg=EE130", "gregorian" }, // three-digit subdivision code
{ "zh_TW@rg=417zzzz", "gregorian" }, // three-digit region code
};
String[] ALL = Calendar.getKeywordValuesForLocale("calendar", ULocale.getDefault(), false);

View file

@ -1099,7 +1099,10 @@ public class IBMCalendarTest extends CalendarTestFmwk {
"fr_CH",
"fr_SA",
"fr_CH@rg=sazzzz",
"fr_CH@rg=sa14",
"fr_CH@calendar=japanese;rg=sazzzz",
"fr_CH@rg=twcyi", // test for ICU-22364
"fr_CH@rg=ugw", // test for ICU-22364
"fr_TH@rg=SA", // ignore malformed rg tag, use buddhist
"th@rg=SA", // ignore malformed rg tag, use buddhist
};
@ -1121,7 +1124,10 @@ public class IBMCalendarTest extends CalendarTestFmwk {
"gregorian",
"islamic-umalqura",
"islamic-umalqura",
"islamic-umalqura",
"japanese",
"gregorian",
"gregorian",
"buddhist",
"buddhist",
};