ICU-22449 Fixed SimpleDateFormat (in C++ and Java) to correctly honor the rg and hc subtags in the locale when choosing the hour cycle.

This commit is contained in:
Rich Gillam 2023-08-23 19:14:00 -04:00 committed by Rich Gillam
parent ea0dbd4c41
commit 8817c25c1e
8 changed files with 247 additions and 51 deletions

View file

@ -38,6 +38,7 @@
#include "hash.h"
#include "uhash.h"
#include "uresimp.h"
#include "ulocimp.h"
#include "dtptngen_impl.h"
#include "ucln_in.h"
#include "charstr.h"
@ -655,17 +656,9 @@ void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErro
if (U_FAILURE(status)) { return; }
const char *language = locale.getLanguage();
const char *country = locale.getCountry();
char regionOverride[8];
int32_t regionOverrideLength = locale.getKeywordValue("rg", regionOverride, sizeof(regionOverride), status);
if (U_SUCCESS(status) && regionOverrideLength > 0) {
country = regionOverride;
if (regionOverrideLength > 2) {
// chop off any subdivision codes that may have been included
regionOverride[2] = '\0';
}
}
char baseCountry[8];
ulocimp_getRegionForSupplementalData(locale.getName(), false, baseCountry, 8, &status);
const char* country = baseCountry;
Locale maxLocale; // must be here for correct lifetime
if (*language == '\0' || *country == '\0') {

View file

@ -721,19 +721,28 @@ void SimpleDateFormat::construct(EStyle timeStyle,
UnicodeString timePattern;
if (timeStyle >= kFull && timeStyle <= kShort) {
bool hasRgOrHcSubtag = false;
// also use DTPG if the locale has the "rg" or "hc" ("hours") subtag-- even if the overriding region
// or hour cycle is the same as the one we get by default, we go through the DateTimePatternGenerator
UErrorCode dummyErr1 = U_ZERO_ERROR, dummyErr2 = U_ZERO_ERROR;
if (locale.getKeywordValue("rg", nullptr, 0, dummyErr1) > 0 || locale.getKeywordValue("hours", nullptr, 0, dummyErr2) > 0) {
hasRgOrHcSubtag = true;
}
const char* baseLocID = locale.getBaseName();
if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) {
UErrorCode useStatus = U_ZERO_ERROR;
Locale baseLoc(baseLocID);
Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus));
if (U_SUCCESS(useStatus) && validLoc!=baseLoc) {
bool useDTPG = false;
if (hasRgOrHcSubtag || (U_SUCCESS(useStatus) && validLoc!=baseLoc)) {
bool useDTPG = hasRgOrHcSubtag;
const char* baseReg = baseLoc.getCountry(); // empty string if no region
if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0)
|| uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) {
// use DTPG if
// * baseLoc has a region and validLoc does not have the same one (or has none), OR
// * validLoc has a different language code than baseLoc
// * the original locale has the rg or hc subtag
useDTPG = true;
}
if (useDTPG) {

View file

@ -31,10 +31,12 @@
#include "cintltst.h"
#include "cdattst.h"
#include "cformtst.h"
#include "cstring.h"
#include "cmemory.h"
#include <math.h>
#include <stdbool.h>
#include <stdio.h> // for sprintf()
static void TestExtremeDates(void);
static void TestAllLocales(void);
@ -48,6 +50,7 @@ static void TestMapDateToCalFields(void);
static void TestNarrowQuarters(void);
static void TestExtraneousCharacters(void);
static void TestParseTooStrict(void);
static void TestHourCycle(void);
void addDateForTest(TestNode** root);
@ -72,6 +75,7 @@ void addDateForTest(TestNode** root)
TESTCASE(TestNarrowQuarters);
TESTCASE(TestExtraneousCharacters);
TESTCASE(TestParseTooStrict);
TESTCASE(TestHourCycle);
}
/* Testing the DateFormat API */
static void TestDateFormat()
@ -2077,4 +2081,52 @@ static void TestParseTooStrict(void) {
udat_close(df);
}
static void TestHourCycle(void) {
static const UDate date = -845601267742; // March 16, 1943 at 3:45 PM
const UChar* testCases[] = {
// test some locales for which we have data
u"en_US", u"Tuesday, March 16, 1943 at 3:45:32PM",
u"en_CA", u"Tuesday, March 16, 1943 at 3:45:32p.m.",
u"en_GB", u"Tuesday, 16 March 1943 at 15:45:32",
u"en_AU", u"Tuesday, 16 March 1943 at 3:45:32pm",
// test a couple locales for which we don't have specific locale files (we should still get the correct hour cycle)
u"en_CO", u"Tuesday, March 16, 1943 at 3:45:32PM",
u"en_MX", u"Tuesday, March 16, 1943 at 15:45:32",
// test that the rg subtag does the right thing
u"en_US@rg=GBzzzz", u"Tuesday, March 16, 1943 at 15:45:32",
u"en_US@rg=CAzzzz", u"Tuesday, March 16, 1943 at 3:45:32PM",
u"en_CA@rg=USzzzz", u"Tuesday, March 16, 1943 at 3:45:32p.m.",
u"en_GB@rg=USzzzz", u"Tuesday, 16 March 1943 at 3:45:32pm",
u"en_GB@rg=CAzzzz", u"Tuesday, 16 March 1943 at 3:45:32pm",
u"en_GB@rg=AUzzzz", u"Tuesday, 16 March 1943 at 3:45:32pm",
// test that the hc ("hours") subtag does the right thing
u"en_US@hours=h23", u"Tuesday, March 16, 1943 at 15:45:32",
u"en_GB@hours=h12", u"Tuesday, 16 March 1943 at 3:45:32pm",
// test that the rg and hc subtags do the right thing when used together
u"en_US@rg=GBzzzz;hours=h12", u"Tuesday, March 16, 1943 at 3:45:32PM",
u"en_GB@rg=USzzzz;hours=h23", u"Tuesday, 16 March 1943 at 15:45:32",
};
for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); i += 2) {
char errorMessage[200];
char* locale = austrdup(testCases[i]);
UErrorCode err = U_ZERO_ERROR;
sprintf(errorMessage, "Error creating formatter for %s", locale);
if (assertSuccess(errorMessage, &err)) {
UDateFormat* df = udat_open(UDAT_MEDIUM, UDAT_FULL, austrdup(testCases[i]), u"America/Los_Angeles", -1, NULL, 0, &err);
sprintf(errorMessage, "Error formatting value for %s", locale);
if (assertSuccess(errorMessage, &err)) {
UChar result[100];
udat_format(df, date, result, 100, NULL, &err);
sprintf(errorMessage, "Wrong result for %s", locale);
assertUEquals(errorMessage, testCases[i + 1], result);
udat_close(df);
}
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -24,7 +24,7 @@
#include "cmemory.h"
#include "cstring.h"
#include "caltest.h" // for fieldName
#include <stdio.h> // for snprintf
#include "charstr.h"
#if U_PLATFORM_USES_ONLY_WIN32_API
#include "windttst.h"
@ -133,6 +133,8 @@ void DateFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &nam
TESTCASE_AUTO(Test20741_ABFields);
TESTCASE_AUTO(Test22023_UTCWithMinusZero);
TESTCASE_AUTO(TestNumericFieldStrictParse);
TESTCASE_AUTO(TestHourCycle);
TESTCASE_AUTO(TestHCInLocale);
TESTCASE_AUTO_END;
}
@ -5826,6 +5828,99 @@ void DateFormatTest::TestNumericFieldStrictParse() {
}
}
void DateFormatTest::TestHourCycle() {
static const UDate date = -845601267742; // March 16, 1943 at 3:45 PM
static const struct {
const char* languageTag;
UnicodeString expectedResult;
} TEST_CASES[] = {
// test some locales for which we have data
{ "en-us", u"Tuesday, March 16, 1943 at 3:45:32PM" },
{ "en-ca", u"Tuesday, March 16, 1943 at 3:45:32p.m." },
{ "en-gb", u"Tuesday, 16 March 1943 at 15:45:32" },
{ "en-au", u"Tuesday, 16 March 1943 at 3:45:32pm" },
// test a couple locales for which we don't have specific locale files (we should still get the correct hour cycle)
{ "en-co", u"Tuesday, March 16, 1943 at 3:45:32PM" },
{ "en-mx", u"Tuesday, March 16, 1943 at 15:45:32" },
// test that the rg subtag does the right thing
{ "en-us-u-rg-gbzzzz", u"Tuesday, March 16, 1943 at 15:45:32" },
{ "en-us-u-rg-cazzzz", u"Tuesday, March 16, 1943 at 3:45:32PM" },
{ "en-ca-u-rg-uszzzz", u"Tuesday, March 16, 1943 at 3:45:32p.m." },
{ "en-gb-u-rg-uszzzz", u"Tuesday, 16 March 1943 at 3:45:32pm" },
{ "en-gb-u-rg-cazzzz", u"Tuesday, 16 March 1943 at 3:45:32pm" },
{ "en-gb-u-rg-auzzzz", u"Tuesday, 16 March 1943 at 3:45:32pm" },
// test that the hc ("hours") subtag does the right thing
{ "en-us-u-hc-h23", u"Tuesday, March 16, 1943 at 15:45:32" },
{ "en-gb-u-hc-h12", u"Tuesday, 16 March 1943 at 3:45:32pm" },
// test that the rg and hc subtags do the right thing when used together
{ "en-us-u-rg-gbzzzz-hc-h12", u"Tuesday, March 16, 1943 at 3:45:32PM" },
{ "en-gb-u-rg-uszzzz-hc-h23", u"Tuesday, 16 March 1943 at 15:45:32" },
};
for (int32_t i = 0; i < UPRV_LENGTHOF(TEST_CASES); i++) {
UErrorCode err = U_ZERO_ERROR;
Locale locale = Locale::forLanguageTag(TEST_CASES[i].languageTag, err);
LocalPointer<DateFormat> df(DateFormat::createDateTimeInstance(DateFormat::FULL, DateFormat::MEDIUM, locale));
df->adoptTimeZone(TimeZone::createTimeZone(u"America/Los_Angeles"));
UnicodeString actualResult;
FieldPosition fp;
df->format(date, actualResult, fp);
err = U_ZERO_ERROR; // throw away result from Locale::forLangageTag()-- if that fails, it's a coding errir in this test
CharString errorMessage;
errorMessage.append("Wrong result for ", err);
errorMessage.append(TEST_CASES[i].languageTag, err);
assertEquals(errorMessage.data(), TEST_CASES[i].expectedResult, actualResult);
}
}
void DateFormatTest::TestHCInLocale() {
IcuTestErrorCode status(*this, "TestHCInLocale");
LocalPointer<Calendar> midnight(Calendar::createInstance(status));
midnight->set(2020, 0, 1, 0, 0);
LocalPointer<Calendar> noon(Calendar::createInstance(status));
noon->set(2020, 0, 1, 12, 0);
bool expected[][3] = {
// midnightContains12 midnightContains24 noonContains12
{ false, false, false}, // "en-u-hc-h11"
{ true, false, true}, // "en-u-hc-h12"
{ false, false, true}, // "en-u-hc-h23"
{ false, true, true}, // "en-u-hc-h24"
};
Locale locales[] = {"en-u-hc-h11", "en-u-hc-h12", "en-u-hc-h23", "en-u-hc-h24"};
int i = 0;
for (Locale locale : locales) {
for (DateFormat::EStyle style : {DateFormat::kFull, DateFormat::kLong, DateFormat::kMedium, DateFormat::kShort}) {
LocalPointer<DateFormat> timeFormat(DateFormat::createTimeInstance(style, locale));
LocalPointer<DateFormat> dateTimeFormat(DateFormat::createDateTimeInstance(style, style, locale));
UnicodeString actualMidnightTime, actualMidnightDateTime, actualNoonTime, actualNoonDateTime;
actualMidnightTime = timeFormat->format(*midnight, actualMidnightTime, nullptr, status);
actualNoonTime = timeFormat->format(*noon, actualNoonTime, nullptr, status);
actualMidnightDateTime = dateTimeFormat->format(*midnight, actualMidnightDateTime, nullptr, status);
actualNoonDateTime = dateTimeFormat->format(*noon, actualNoonDateTime, nullptr, status);
bool midnightContains12 = expected[i][0];
bool midnightContains24 = expected[i][1];
bool noonContains12 = expected[i][2];
assertEquals("Midnight contains '12:'?", midnightContains12, actualMidnightTime.indexOf("12:") >= 0);
assertEquals("Midnight contains '12:'?", midnightContains12, actualMidnightDateTime.indexOf("12:") >= 0);
assertEquals("Midnight contains '24:'?", midnightContains24, actualMidnightTime.indexOf("24:") >= 0);
assertEquals("Midnight contains '24:'?", midnightContains24, actualMidnightDateTime.indexOf("24:") >= 0);
assertEquals("Noon contains '12:'?", noonContains12, actualNoonTime.indexOf("12:") >= 0);
assertEquals("Noon contains '12:'?", noonContains12, actualNoonDateTime.indexOf("12:") >= 0);
}
i++;
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */
//eof

View file

@ -268,6 +268,8 @@ public:
void Test20741_ABFields();
void Test22023_UTCWithMinusZero();
void TestNumericFieldStrictParse();
void TestHourCycle();
void TestHCInLocale();
private:
UBool showParse(DateFormat &format, const UnicodeString &formattedString);

View file

@ -357,7 +357,7 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
// }
String language = uLocale.getLanguage();
String country = uLocale.getCountry();
String country = ULocale.getRegionForSupplementalData(uLocale, false);
if (language.isEmpty() || country.isEmpty()) {
// Note: addLikelySubtags is documented not to throw in Java,
@ -367,15 +367,6 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
country = max.getCountry();
}
String regionOverride = uLocale.getKeywordValue("rg");
if (regionOverride != null && !regionOverride.isEmpty()) {
// chop off any subdivision codes that may have been included
if (regionOverride.length() > 2) {
regionOverride = regionOverride.substring(0, 2);
}
country = regionOverride;
}
if (language.isEmpty()) {
// Unexpected, but fail gracefully
language = "und";

View file

@ -3790,7 +3790,13 @@ public abstract class Calendar implements Serializable, Cloneable, Comparable<Ca
// First, try to get a pattern from PATTERN_CACHE
String calType = cal.getType();
String key = loc.getBaseName() + "+" + calType;
PatternData patternData = PATTERN_CACHE.get(key);
PatternData patternData = null;
boolean hasHourCycleKeywords = loc.getKeywordValue("rg") != null
|| loc.getKeywordValue("hours") != null;
if (!hasHourCycleKeywords) {
// don't look in the cache if the locale specifies the rg or hc ("hours") keywords
patternData = PATTERN_CACHE.get(key);
}
if (patternData == null) {
// Cache missed. Get one from bundle
try {
@ -3798,7 +3804,9 @@ public abstract class Calendar implements Serializable, Cloneable, Comparable<Ca
} catch (MissingResourceException e) {
patternData = new PatternData(DEFAULT_PATTERNS, null, DEFAULT_ATTIME_PATTERNS);
}
PATTERN_CACHE.put(key, patternData);
if (!hasHourCycleKeywords) {
PATTERN_CACHE.put(key, patternData);
}
}
return patternData;
}
@ -3824,35 +3832,43 @@ public abstract class Calendar implements Serializable, Cloneable, Comparable<Ca
String[] dateTimePatternsOverrides = new String[patternsSize];
int i = 0; // index for dateTimePatterns, dateTimePatternsOverrides
String baseLocID = locale.getBaseName();
if (baseLocID.length() > 0 && !baseLocID.equals("und")) {
ULocale baseLoc = new ULocale(baseLocID);
// The following is different from ICU4C, where we can get the valid locale
// for the SimpleDateFormat object. Here we do not have a SimpleDateFormat and
// valid locale for the Calendar is a bit meaningless.
ULocale validLoc = ULocale.addLikelySubtags(dtPatternsRb.getULocale());
if (validLoc != baseLoc) {
String baseReg = baseLoc.getCountry();
if ((baseReg.length() > 0 && !baseReg.equals(validLoc.getCountry()))
|| !baseLoc.getLanguage().equals(validLoc.getLanguage())) {
// use DTPG if the standard time formats may have the wrong time cycle,
// because the valid locale differs in important ways (region, language)
// from the base locale.
// We could *also* check whether they do actually have a mismatch with
// the time cycle preferences for the region, but that is a lot more
// work for little or no additional benefit, since just going ahead
// and always synthesizing the time format as per the following should
// create a locale-appropriate pattern with cycle that matches the
// region preferences anyway.
// In this case we get the first 4 entries of dateTimePatterns using
// DateTimePatternGenerator, not resource data.
DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstanceNoStdPat(locale);
for (; i < 4; i++) {
dateTimePatterns[i] = dtpg.getBestPattern(TIME_SKELETONS[i]);
boolean useDTPG = false;
if (locale.getKeywordValue("rg") != null || locale.getKeywordValue("hours") != null) {
useDTPG = true;
} else {
String baseLocID = locale.getBaseName();
if (!baseLocID.isEmpty() && !baseLocID.equals("und")) {
ULocale baseLoc = new ULocale(baseLocID);
// The following is different from ICU4C, where we can get the valid locale
// for the SimpleDateFormat object. Here we do not have a SimpleDateFormat and
// valid locale for the Calendar is a bit meaningless.
ULocale validLoc = ULocale.addLikelySubtags(dtPatternsRb.getULocale());
if (validLoc != baseLoc) {
String baseReg = baseLoc.getCountry();
if ((!baseReg.isEmpty() && !baseReg.equals(validLoc.getCountry()))
|| !baseLoc.getLanguage().equals(validLoc.getLanguage())) {
useDTPG = true;
}
}
}
}
if (useDTPG) {
// use DTPG if the standard time formats may have the wrong time cycle,
// because the valid locale differs in important ways (region, language)
// from the base locale.
// We could *also* check whether they do actually have a mismatch with
// the time cycle preferences for the region, but that is a lot more
// work for little or no additional benefit, since just going ahead
// and always synthesizing the time format as per the following should
// create a locale-appropriate pattern with cycle that matches the
// region preferences anyway.
// In this case we get the first 4 entries of dateTimePatterns using
// DateTimePatternGenerator, not resource data.
DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstanceNoStdPat(locale);
for (; i < TIME_SKELETONS.length; i++) {
dateTimePatterns[i] = dtpg.getBestPattern(TIME_SKELETONS[i]);
}
}
for (; i < patternsSize; i++) { // get all or remaining dateTimePatterns entries
ICUResourceBundle concatenationPatternRb = (ICUResourceBundle) dtPatternsRb.get(i);

View file

@ -5694,4 +5694,42 @@ public class DateFormatTest extends TestFmwk {
}
}
}
@Test
public void TestHourCycle() {
final Date DATE = new Date(-845601268000L); // March 16, 1943 at 3:45 PM
final String[][] TEST_CASES = new String[][] {
// test some locales for which we have data
{ "en-us", "Tuesday, March 16, 1943 at 3:45:32\u202fPM" },
{ "en-ca", "Tuesday, March 16, 1943 at 3:45:32\u202fp.m." },
{ "en-gb", "Tuesday, 16 March 1943 at 15:45:32" },
{ "en-au", "Tuesday, 16 March 1943 at 3:45:32\u202fpm" },
// test a couple locales for which we don't have specific locale files (we should still get the correct hour cycle)
{ "en-co", "Tuesday, March 16, 1943 at 3:45:32\u202fPM" },
{ "en-mx", "Tuesday, March 16, 1943 at 15:45:32" },
// test that the rg subtag does the right thing
{ "en-us-u-rg-gbzzzz", "Tuesday, March 16, 1943 at 15:45:32" },
{ "en-us-u-rg-cazzzz", "Tuesday, March 16, 1943 at 3:45:32\u202fPM" },
{ "en-ca-u-rg-uszzzz", "Tuesday, March 16, 1943 at 3:45:32\u202fp.m." },
{ "en-gb-u-rg-uszzzz", "Tuesday, 16 March 1943 at 3:45:32\u202fpm" },
{ "en-gb-u-rg-cazzzz", "Tuesday, 16 March 1943 at 3:45:32\u202fpm" },
{ "en-gb-u-rg-auzzzz", "Tuesday, 16 March 1943 at 3:45:32\u202fpm" },
// test that the hc ("hours") subtag does the right thing
{ "en-us-u-hc-h23", "Tuesday, March 16, 1943 at 15:45:32" },
{ "en-gb-u-hc-h12", "Tuesday, 16 March 1943 at 3:45:32\u202fpm" },
// test that the rg and hc subtags do the right thing when used together
{ "en-us-u-rg-gbzzzz-hc-h12", "Tuesday, March 16, 1943 at 3:45:32\u202fPM" },
{ "en-gb-u-rg-uszzzz-hc-h23", "Tuesday, 16 March 1943 at 15:45:32" },
};
for (String[] testCase : TEST_CASES) {
Locale locale = Locale.forLanguageTag(testCase[0]);
DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM, locale);
df.setTimeZone(TimeZone.getTimeZone("America/Los_Angeles"));
String actualResult = df.format(DATE);
assertEquals("Wrong result for " + locale, testCase[1], actualResult);
}
}
}