From 5e22f0076ec9b55056cd8a84e9ef370632f44174 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Tue, 13 Aug 2024 00:34:51 -0700 Subject: [PATCH] ICU-22854 Implement subdivision validation --- icu4c/source/common/loclikely.cpp | 83 ++++++++++++++++--- icu4c/source/common/ulocimp.h | 15 ++++ icu4c/source/test/cintltst/ccaltst.c | 69 ++++++++++++++- icu4c/source/test/intltest/numbertest_api.cpp | 6 +- icu4c/source/test/intltest/regiontst.cpp | 58 +++++++++++++ icu4c/source/test/intltest/regiontst.h | 1 + .../test/number/NumberFormatterApiTest.java | 6 +- .../main/java/com/ibm/icu/util/ULocale.java | 67 +++++++++++++-- .../test/calendar/CalendarRegressionTest.java | 1 - .../dev/test/calendar/IBMCalendarTest.java | 46 +++++++++- .../com/ibm/icu/dev/test/util/RegionTest.java | 46 ++++++++++ 11 files changed, 368 insertions(+), 30 deletions(-) diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index 1e6538134a6..ccbcbfa7a5d 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -391,19 +391,30 @@ namespace { icu::CharString GetRegionFromKey(const char* localeID, std::string_view key, UErrorCode& status) { icu::CharString result; - // First check for keyword value icu::CharString kw = ulocimp_getKeywordValue(localeID, key, status); int32_t len = kw.length(); - if (U_SUCCESS(status) && len >= 3 && len <= 7) { - // chop off the subdivision code (which will generally be "zzzz" anyway) - const char* const data = kw.data(); - if (uprv_isASCIILetter(data[0])) { - result.append(uprv_toupper(data[0]), status); - result.append(uprv_toupper(data[1]), status); - } else { - // assume three-digit region code - result.append(data, 3, status); + // In UTS35 + // type = alphanum{3,8} (sep alphanum{3,8})* ; + // so we know the subdivision must fit the type already. + // + // unicode_subdivision_id = unicode_region_subtag unicode_subdivision_suffix ; + // unicode_region_subtag = (alpha{2} | digit{3}) ; + // unicode_subdivision_suffix = alphanum{1,4} ; + // But we also know there are no id in start with digit{3} in + // https://github.com/unicode-org/cldr/blob/main/common/validity/subdivision.xml + // Therefore we can simplify as + // unicode_subdivision_id = alpha{2} alphanum{1,4} + // + // and only need to accept/reject the code based on the alpha{2} and the length. + if (U_SUCCESS(status) && len >= 3 && len <= 6 && + uprv_isASCIILetter(kw[0]) && uprv_isASCIILetter(kw[1])) { + // Additional Check + static icu::RegionValidateMap valid; + const char region[] = {kw[0], kw[1], '\0'}; + if (valid.isSet(region)) { + result.append(uprv_toupper(kw[0]), status); + result.append(uprv_toupper(kw[1]), status); } } return result; @@ -436,3 +447,55 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion, return rgBuf; } + +namespace { + +// The following data is generated by unit test code inside +// test/intltest/regiontst.cpp from the resource data while +// the test failed. +const uint32_t gValidRegionMap[] = { + 0xeedf597c, 0xdeddbdef, 0x15943f3f, 0x0e00d580, + 0xb0095c00, 0x0015fb9f, 0x781c068d, 0x0340400f, + 0xf42b1d00, 0xfd4f8141, 0x25d7fffc, 0x0100084b, + 0x538f3c40, 0x40000001, 0xfdf15100, 0x9fbb7ae7, + 0x0410419a, 0x00408557, 0x00004002, 0x00100001, + 0x00400408, 0x00000001, +}; + +} // namespace + // +U_NAMESPACE_BEGIN +RegionValidateMap::RegionValidateMap() { + uprv_memcpy(map, gValidRegionMap, sizeof(map)); +} + +RegionValidateMap::~RegionValidateMap() { +} + +bool RegionValidateMap::isSet(const char* region) const { + int32_t index = value(region); + if (index < 0) { + return false; + } + return 0 != (map[index / 32] & (1L << (index % 32))); +} + +bool RegionValidateMap::equals(const RegionValidateMap& that) const { + return uprv_memcmp(map, that.map, sizeof(map)) == 0; +} + +// The code transform two letter a-z to a integer valued between -1, 26x26. +// -1 indicate the region is outside the range of two letter a-z +// the rest of value is between 0 and 676 (= 26x26) and used as an index +// the the bigmap in map. The map is an array of 22 int32_t. +// since 32x21 < 676/32 < 32x22 we store this 676 bits bitmap into 22 int32_t. +int32_t RegionValidateMap::value(const char* region) const { + if (uprv_isASCIILetter(region[0]) && uprv_isASCIILetter(region[1]) && + region[2] == '\0') { + return (uprv_toupper(region[0])-'A') * 26 + + (uprv_toupper(region[1])-'A'); + } + return -1; +} + +U_NAMESPACE_END diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index 3b4f107da71..1887e2a849a 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -425,4 +425,19 @@ ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length); U_EXPORT bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); +#ifdef __cplusplus +U_NAMESPACE_BEGIN +class U_COMMON_API RegionValidateMap : public UObject { + public: + RegionValidateMap(); + virtual ~RegionValidateMap(); + bool isSet(const char* region) const; + bool equals(const RegionValidateMap& that) const; + protected: + int32_t value(const char* region) const; + uint32_t map[22]; // 26x26/32 = 22; +}; +U_NAMESPACE_END +#endif /* __cplusplus */ + #endif diff --git a/icu4c/source/test/cintltst/ccaltst.c b/icu4c/source/test/cintltst/ccaltst.c index 7741d755ec8..cc1b9c516c9 100644 --- a/icu4c/source/test/cintltst/ccaltst.c +++ b/icu4c/source/test/cintltst/ccaltst.c @@ -45,6 +45,7 @@ void TestUcalOpenBufferRead(void); void TestGetTimeZoneOffsetFromLocal(void); void TestFWWithISO8601(void); +void TestFWwithRGSD(void); void addCalTest(TestNode** root); @@ -71,6 +72,7 @@ void addCalTest(TestNode** root) addTest(root, &TestUcalOpenBufferRead, "tsformat/ccaltst/TestUcalOpenBufferRead"); addTest(root, &TestGetTimeZoneOffsetFromLocal, "tsformat/ccaltst/TestGetTimeZoneOffsetFromLocal"); addTest(root, &TestFWWithISO8601, "tsformat/ccaltst/TestFWWithISO8601"); + addTest(root, &TestFWwithRGSD, "tsformat/ccaltst/TestFWwithRGSD"); addTest(root, &TestGetIanaTimeZoneID, "tstformat/ccaltst/TestGetIanaTimeZoneID"); } @@ -1616,7 +1618,7 @@ void TestGregorianChange(void) { } static void TestGetKeywordValuesForLocale(void) { -#define PREFERRED_SIZE 26 +#define PREFERRED_SIZE 25 #define MAX_NUMBER_OF_KEYWORDS 5 const char *PREFERRED[PREFERRED_SIZE][MAX_NUMBER_OF_KEYWORDS+1] = { { "root", "gregorian", NULL, NULL, NULL, NULL }, @@ -1646,9 +1648,8 @@ static void TestGetKeywordValuesForLocale(void) { { "zh_TW@rg=IT53", "gregorian", NULL, NULL, NULL, NULL }, // two-digit subdivision code { "zh_TW@rg=AUnsw", "gregorian", NULL, NULL, NULL, NULL }, // three-letter subdivision code { "zh_TW@rg=EE130", "gregorian", NULL, NULL, NULL, NULL }, // three-digit subdivision code - { "zh_TW@rg=417zzzz", "gregorian", NULL, NULL, NULL, NULL }, // three-digit region code }; - const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4, 2, 3, 3, 1, 1, 1, 1, 1, 1, 1 }; + const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4, 2, 3, 3, 1, 1, 1, 1, 1, 1 }; UErrorCode status = U_ZERO_ERROR; int32_t i, size, j; UEnumeration *all, *pref; @@ -1688,7 +1689,7 @@ static void TestGetKeywordValuesForLocale(void) { } if (!matchPref) { - log_err("FAIL: Preferred values for locale \"%s\" does not match expected.\n", loc); + log_err("FAIL: Preferred values for locale (%d) \"%s\" does not match expected.\n", i, loc); break; } uenum_close(pref); @@ -2842,6 +2843,66 @@ TestFWWithISO8601(void) { } } +void +TestFWwithRGSD(void) { + typedef struct { + const char* locale; + int32_t first_day_of_week; + int32_t minimal_days; + } TestData; + const TestData TESTDATA[] = { + // Region subtag is missing, so add likely subtags to get region. + {"en", UCAL_SUNDAY, 1}, + + // Explicit region subtag "US" is present. + {"en-US", UCAL_SUNDAY, 1}, + + // Explicit region subtag "DE" is present. + {"en-DE", UCAL_MONDAY, 4}, + + // Explicit region subtag "DE" is present, but there's also a valid + // region override to use "US". + {"en-DE-u-rg-uszzzz", UCAL_SUNDAY, 1}, + + // Explicit region subtag "DE" is present. The region override should be + // ignored, because "AA" is not a valid region. + {"en-DE-u-rg-aazzzz", UCAL_MONDAY, 4}, + + // Explicit region subtag "DE" is present. The region override should be + // ignored, because "001" is a macroregion. + {"en-DE-u-rg-001zzz", UCAL_MONDAY, 4}, + + // Region subtag is missing. The region override should be ignored, because + // "AA" is not a valid region. + {"en-u-rg-aazzzz", UCAL_SUNDAY, 1}, + + // Region subtag is missing. The region override should be ignored, because + // "001" is a macroregion. + {"en-u-rg-001zzz", UCAL_SUNDAY, 1}, + + {NULL, 0, 0}, + }; + for (int32_t i = 0; TESTDATA[i].locale != NULL; i++) { + UErrorCode status = U_ZERO_ERROR; + UCalendar* cal = ucal_open(NULL, 0, TESTDATA[i].locale, UCAL_DEFAULT, &status); + if (U_FAILURE(status)) { + log_err("ucal_open failed: TESTDATA[%d].locale = '%s'\n", i, TESTDATA[i].locale); + continue; + } + int32_t first_day_Of_week = ucal_getAttribute(cal, UCAL_FIRST_DAY_OF_WEEK); + if (first_day_Of_week != TESTDATA[i].first_day_of_week) { + log_err("First day of week of '%s' is %d but expected to be %d\n", TESTDATA[i].locale, + first_day_Of_week, TESTDATA[i].first_day_of_week); + } + int32_t minimal_days = ucal_getAttribute(cal, UCAL_MINIMAL_DAYS_IN_FIRST_WEEK); + if (minimal_days != TESTDATA[i].minimal_days) { + log_err("Minimal days of a week of '%s' is %d but expected to be %d\n", TESTDATA[i].locale, + minimal_days, TESTDATA[i].minimal_days); + } + ucal_close(cal); + } +} + void TestGetIanaTimeZoneID(void) { const UChar* UNKNOWN = u"Etc/Unknown"; diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 87881cea42b..4938818468e 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -2988,15 +2988,15 @@ void NumberFormatterApiTest::unitLocaleTags() { "fahrenheit", 0, "default", "fahrenheit", 0.0, u"0 degrees Fahrenheit"}, // Test the behaviour of the `rg` tag - {u"Test the locale with rg = UK and without usage", "en-US-u-rg-ukzzzz", "fahrenheit", 0, + {u"Test the locale with rg = GB and without usage", "en-US-u-rg-gbzzzz", "fahrenheit", 0, nullptr, "fahrenheit", 0.0, u"0 degrees Fahrenheit"}, - {u"Test the locale with rg = UK and with usage", "en-US-u-rg-ukzzzz", "fahrenheit", 0, "default", + {u"Test the locale with rg = GB and with usage", "en-US-u-rg-gbzzzz", "fahrenheit", 0, "default", "celsius", -18, u"-18 degrees Celsius"}, {"Test the locale with mu = fahrenheit and without usage", "en-US-u-mu-fahrenheit", "celsius", 0, nullptr, "celsius", 0.0, "0 degrees Celsius"}, {"Test the locale with mu = fahrenheit and with usage", "en-US-u-mu-fahrenheit", "celsius", 0, "default", "fahrenheit", 32.0, "32 degrees Fahrenheit"}, - {u"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoi", "fahrenheit", 0, + {u"Test the locale with rg = GBOXF and with usage", "en-US-u-rg-gboxf", "fahrenheit", 0, "default", "celsius", -18.0, u"-18 degrees Celsius"}, // Test the priorities diff --git a/icu4c/source/test/intltest/regiontst.cpp b/icu4c/source/test/intltest/regiontst.cpp index 3304838b336..d72903b0051 100644 --- a/icu4c/source/test/intltest/regiontst.cpp +++ b/icu4c/source/test/intltest/regiontst.cpp @@ -17,7 +17,9 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/region.h" +#include "unicode/ures.h" #include "regiontst.h" +#include "ulocimp.h" typedef struct KnownRegion { const char *code; @@ -359,6 +361,7 @@ RegionTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* TESTCASE_AUTO(TestAvailableTerritories); TESTCASE_AUTO(TestNoContainedRegions); TESTCASE_AUTO(TestGroupingChildren); + TESTCASE_AUTO(TestGetRegionForSupplementalDataMatch); TESTCASE_AUTO_END; } @@ -783,6 +786,61 @@ void RegionTest::TestGroupingChildren() { } } +class MutableRegionValidateMap : public RegionValidateMap { + public: + MutableRegionValidateMap() { + uprv_memset(map, 0, sizeof(map)); + } + virtual ~MutableRegionValidateMap() {} + void add(const char* region) { + int32_t index = value(region); + if (index >= 0) { + map[index / 32] |= (1L << (index % 32)); + } + } + const uint32_t* data(int32_t* length) const { + if (length != nullptr) { + *length = sizeof(map)/sizeof(uint32_t); + } + return map; + } +}; + +void RegionTest::TestGetRegionForSupplementalDataMatch(void) { + RegionValidateMap builtin; + MutableRegionValidateMap prefab; + + UErrorCode status = U_ZERO_ERROR; + LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status)); + + LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status)); + LocalUResourceBundlePointer subdivisions(ures_getByKey(idValidity.getAlias(),"subdivision",nullptr,&status)); + LocalUResourceBundlePointer unknown(ures_getByKey(subdivisions.getAlias(),"unknown",nullptr,&status)); + + while (U_SUCCESS(status) && ures_hasNext(unknown.getAlias())) { + UnicodeString subdivision = ures_getNextUnicodeString(unknown.getAlias(),nullptr,&status); + if (U_SUCCESS(status)) { + std::string str; + subdivision.toUTF8String(str); + str.resize(2); + prefab.add(str.c_str()); + } + } + if (!prefab.equals(builtin)) { + int32_t length; + const uint32_t* data = prefab.data(&length); + printf("const uint32_t gValidRegionMap[] = {"); + for (int32_t i = 0; i < length; i++) { + if (i % 4 == 0) { + printf("\n "); + } + printf("0x%08x, ", data[i]); + } + printf("\n};\n"); + errln("ulocimp_getRegionForSupplementalData() differs from supplementalData"); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ //eof diff --git a/icu4c/source/test/intltest/regiontst.h b/icu4c/source/test/intltest/regiontst.h index f5bd5eed1c3..db2e3d94fa8 100644 --- a/icu4c/source/test/intltest/regiontst.h +++ b/icu4c/source/test/intltest/regiontst.h @@ -39,6 +39,7 @@ public: void TestAvailableTerritories(); void TestNoContainedRegions(); void TestGroupingChildren(); + void TestGetRegionForSupplementalDataMatch(); private: diff --git a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java index edfcbfef8b9..28dbbe617be 100644 --- a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java +++ b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java @@ -511,9 +511,9 @@ public class NumberFormatterApiTest extends CoreTestFmwk { {"Test the locale with ms = Matric (wrong spelling) and with usage", "en-US-u-ms-Matric", "fahrenheit", "0", "default", "fahrenheit", "0.0", "0 degrees Fahrenheit"}, // Test the behaviour of the `rg` tag - {"Test the locale with rg = UK and without usage", "en-US-u-rg-ukzzzz", "fahrenheit", "0", null, "fahrenheit", "0.0", "0 degrees Fahrenheit"}, - {"Test the locale with rg = UK and with usage", "en-US-u-rg-ukzzzz", "fahrenheit", "0", "default", "celsius", "-18", "-18 degrees Celsius"}, - {"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoi", "fahrenheit", "0", "default", "celsius", "-18" , "-18 degrees Celsius"}, + {"Test the locale with rg = GB and without usage", "en-US-u-rg-gbzzzz", "fahrenheit", "0", null, "fahrenheit", "0.0", "0 degrees Fahrenheit"}, + {"Test the locale with rg = GB and with usage", "en-US-u-rg-gbzzzz", "fahrenheit", "0", "default", "celsius", "-18", "-18 degrees Celsius"}, + {"Test the locale with rg = GBOXF and with usage", "en-US-u-rg-gboxf", "fahrenheit", "0", "default", "celsius", "-18" , "-18 degrees Celsius"}, // Test the priorities {"Test the locale with mu,ms,rg --> mu tag wins", "en-US-u-mu-celsius-ms-ussystem-rg-uszzzz", "celsius", "0", "default", "celsius", "0.0", "0 degrees Celsius"}, diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java b/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java index 0fa09230d9b..bb6b32cdd54 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java @@ -973,14 +973,26 @@ public final class ULocale implements Serializable, Comparable { * Get region code from a key in locale or null. */ private static String getRegionFromKey(ULocale locale, String key) { - String region = locale.getKeywordValue(key); - if (region != null && region.length() >= 3 && region.length() <= 7) { - if (Character.isLetter(region.charAt(0))) { - return AsciiUtil.toUpperString(region.substring(0, 2)); - } else { - // assume three-digit region code - return region.substring(0, 3); - } + String subdivision = locale.getKeywordValue(key); + // In UTS35 + // type = alphanum{3,8} (sep alphanum{3,8})* ; + // so we know the subdivision must fit the type already. + // + // unicode_subdivision_id = unicode_region_subtag unicode_subdivision_suffix ; + // unicode_region_subtag = (alpha{2} | digit{3}) ; + // unicode_subdivision_suffix = alphanum{1,4} ; + // But we also know there are no id in start with digit{3} in + // https://github.com/unicode-org/cldr/blob/main/common/validity/subdivision.xml + // Therefore we can simplify as + // unicode_subdivision_id = alpha{2} alphanum{1,4} + // + // and only need to accept/reject the code based on the alpha{2} and the length. + if (subdivision == null || subdivision.length() < 3 || subdivision.length() > 6) { + return null; + } + String region = subdivision.substring(0, 2).toUpperCase(); + if (RegionValidateMap.BUILTIN.isSet(region)) { + return region; } return null; } @@ -4248,4 +4260,43 @@ public final class ULocale implements Serializable, Comparable { } } } + /** + * @internal Visible For Testing + * @deprecated This API is ICU internal only. + */ + @Deprecated + public static class RegionValidateMap { + public RegionValidateMap() { + this.map = Arrays.copyOf(gValidRegionMap, gValidRegionMap.length); + } + public boolean isSet(String region) { + int index = value(region); + if (index < 0) { + return false; + } + return 0 != (map[index / 32] & (1 << (index % 32))); + } + public boolean equals(RegionValidateMap that) { + return Arrays.equals(map, that.map); + } + protected int value(String region) { + if (region.matches("[a-zA-Z][a-zA-Z]")) { + region = region.toLowerCase(); + int aValue = "a".codePointAt(0); + return (region.codePointAt(0) - aValue) * 26 + region.codePointAt(1) - aValue; + } + return -1; + } + protected int[] map; + static private int[] gValidRegionMap = { + 0xeedf597c, 0xdeddbdef, 0x15943f3f, 0x0e00d580, + 0xb0095c00, 0x0015fb9f, 0x781c068d, 0x0340400f, + 0xf42b1d00, 0xfd4f8141, 0x25d7fffc, 0x0100084b, + 0x538f3c40, 0x40000001, 0xfdf15100, 0x9fbb7ae7, + 0x0410419a, 0x00408557, 0x00004002, 0x00100001, + 0x00400408, 0x00000001, + }; + + public static RegionValidateMap BUILTIN = new RegionValidateMap(); + } } diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java index 5124d722149..746b5b08bd3 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java @@ -2190,7 +2190,6 @@ public class CalendarRegressionTest extends CoreTestFmwk { { "zh_TW@rg=IT53", "gregorian" }, // two-digit subdivision code { "zh_TW@rg=AUnsw", "gregorian" }, // three-letter subdivision code { "zh_TW@rg=EE130", "gregorian" }, // three-digit subdivision code - { "zh_TW@rg=417zzzz", "gregorian" }, // three-digit region code }; String[] ALL = Calendar.getKeywordValuesForLocale("calendar", ULocale.getDefault(), false); diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java index 1dc5ac7965b..476ccc183c9 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java @@ -205,8 +205,12 @@ public class IBMCalendarTest extends CalendarTestFmwk { } } + private void verifyMinimalDaysInFirstWeek(String l, int minimalDays) { + assertEquals(l + " minimalDaysInFirstWeek", minimalDays, + Calendar.getInstance(Locale.forLanguageTag(l)).getMinimalDaysInFirstWeek()); + } private void verifyFirstDayOfWeek(String l, int weekday) { - assertEquals(l, weekday, + assertEquals(l + " firstDayOfWeek", weekday, Calendar.getInstance(Locale.forLanguageTag(l)).getFirstDayOfWeek()); } /** @@ -278,6 +282,46 @@ public class IBMCalendarTest extends CalendarTestFmwk { verifyFirstDayOfWeek("zxx", Calendar.MONDAY); } + @Test + public void TestFWwithRGSD() { + // Region subtag is missing, so add likely subtags to get region. + verifyFirstDayOfWeek("en", Calendar.SUNDAY); + verifyMinimalDaysInFirstWeek("en", 1); + + // Explicit region subtag "US" is present. + verifyFirstDayOfWeek("en-US", Calendar.SUNDAY); + verifyMinimalDaysInFirstWeek("en-US", 1); + + // Explicit region subtag "DE" is present. + verifyFirstDayOfWeek("en-DE", Calendar.MONDAY); + verifyMinimalDaysInFirstWeek("en-DE", 4); + + // Explicit region subtag "DE" is present, but there's also a valid + // region override to use "US". + verifyFirstDayOfWeek("en-DE-u-rg-uszzzz", Calendar.SUNDAY); + verifyMinimalDaysInFirstWeek("en-DE-u-rg-uszzzz", 1); + + // Explicit region subtag "DE" is present. The region override should be + // ignored, because "AA" is not a valid region. + verifyFirstDayOfWeek("en-DE-u-rg-aazzzz", Calendar.MONDAY); + verifyMinimalDaysInFirstWeek("en-DE-u-rg-aazzzz", 4); + + // Explicit region subtag "DE" is present. The region override should be + // ignored, because "001" is a macroregion. + verifyFirstDayOfWeek("en-DE-u-rg-001zzz", Calendar.MONDAY); + verifyMinimalDaysInFirstWeek("en-DE-u-rg-001zzz", 4); + + // Region subtag is missing. The region override should be ignored, because + // "AA" is not a valid region. + verifyFirstDayOfWeek("en-u-rg-aazzzz", Calendar.SUNDAY); + verifyMinimalDaysInFirstWeek("en-u-rg-aazzzz", 1); + + // Region subtag is missing. The region override should be ignored, because + // "001" is a macroregion. + verifyFirstDayOfWeek("en-u-rg-001zzz", Calendar.SUNDAY); + verifyMinimalDaysInFirstWeek("en-u-rg-001zzz", 1); + } + /** * Verify that BuddhistCalendar shifts years to Buddhist Era but otherwise * behaves like GregorianCalendar. diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/util/RegionTest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/util/RegionTest.java index 92c7cc79203..709497f2626 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/util/RegionTest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/util/RegionTest.java @@ -13,6 +13,7 @@ package com.ibm.icu.dev.test.util; +import java.util.Arrays; import java.util.List; import java.util.Set; @@ -21,8 +22,13 @@ import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import com.ibm.icu.dev.test.CoreTestFmwk; +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.util.Region; import com.ibm.icu.util.Region.RegionType; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + /** * @test @@ -627,4 +633,44 @@ public class RegionTest extends CoreTestFmwk { } } } + + public static class MutableRegionValidateMap extends ULocale.RegionValidateMap { + public MutableRegionValidateMap() { + Arrays.fill(map, 0); + } + public void add(String region) { + int index = value(region); + if (index >= 0) { + map[index / 32] |= (1 << (index % 32)); + } + } + public int[] data() { + return map; + } + } + + @Test + public void TestGetRegionForSupplementalDataMatch() { + UResourceBundle supplementalData = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle idValidity = supplementalData.get("idValidity"); + UResourceBundle subdivisions = idValidity.get("subdivision"); + UResourceBundle unknown = subdivisions.get("unknown"); + MutableRegionValidateMap prefab = new MutableRegionValidateMap(); + for ( String r : unknown.getStringArray()) { + prefab.add(r.substring(0, 2)); + } + if (!ULocale.RegionValidateMap.BUILTIN.equals(prefab)) { + int[] data = prefab.data(); + System.out.println("Please update the following in main/core/src/main/java/com/ibm/icu/util/ULocale.java"); + System.out.print(" static int[] gValidRegionMap = {"); + for (int i = 0; i < data.length; i++) { + if (i % 4 == 0) { + System.out.print("\n "); + } + System.out.printf("0x%08x, ", data[i]); + } + System.out.println("\n};"); + errln("Error !!!!"); + } + } }