From 3d8ae5eb6dc3542cbac230e195be3a38ed38e953 Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Tue, 22 Sep 2020 10:10:30 -0700 Subject: [PATCH] ICU-20590 Ensure time format consistency for SHORT vs j patterns if no locale data --- icu4c/source/i18n/dtptngen.cpp | 20 ++- icu4c/source/i18n/smpdtfmt.cpp | 128 ++++++++++++------ icu4c/source/i18n/unicode/dtptngen.h | 11 +- icu4c/source/test/intltest/dtptngts.cpp | 56 +++++++- icu4c/source/test/intltest/dtptngts.h | 1 + .../icu/text/DateTimePatternGenerator.java | 31 ++++- .../core/src/com/ibm/icu/util/Calendar.java | 42 +++++- .../test/format/DateTimeGeneratorTest.java | 38 +++++- 8 files changed, 272 insertions(+), 55 deletions(-) diff --git a/icu4c/source/i18n/dtptngen.cpp b/icu4c/source/i18n/dtptngen.cpp index 32d3fc4db8d..21f2362d171 100644 --- a/icu4c/source/i18n/dtptngen.cpp +++ b/icu4c/source/i18n/dtptngen.cpp @@ -311,6 +311,16 @@ DateTimePatternGenerator::createInstance(const Locale& locale, UErrorCode& statu return U_SUCCESS(status) ? result.orphan() : nullptr; } +DateTimePatternGenerator* U_EXPORT2 +DateTimePatternGenerator::createInstanceNoStdPat(const Locale& locale, UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + LocalPointer result( + new DateTimePatternGenerator(locale, status, true), status); + return U_SUCCESS(status) ? result.orphan() : nullptr; +} + DateTimePatternGenerator* U_EXPORT2 DateTimePatternGenerator::createEmptyInstance(UErrorCode& status) { if (U_FAILURE(status)) { @@ -336,7 +346,7 @@ DateTimePatternGenerator::DateTimePatternGenerator(UErrorCode &status) : } } -DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status) : +DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) : skipMatcher(nullptr), fAvailableFormatKeyHash(nullptr), fDefaultHourFormatChar(0), @@ -350,7 +360,7 @@ DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorC internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR; } else { - initData(locale, status); + initData(locale, status, skipStdPatterns); } } @@ -489,13 +499,15 @@ enum AllowedHourFormat{ } // namespace void -DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status) { +DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) { //const char *baseLangName = locale.getBaseName(); // unused skipMatcher = nullptr; fAvailableFormatKeyHash=nullptr; addCanonicalItems(status); - addICUPatterns(locale, status); + if (!skipStdPatterns) { // skip to prevent circular dependency when called from SimpleDateFormat::construct + addICUPatterns(locale, status); + } addCLDRData(locale, status); setDateTimeFromCalendar(locale, status); setDecimalSymbols(locale, status); diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 8beadd6eca3..4717899cf38 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -54,6 +54,7 @@ #include "unicode/udisplaycontext.h" #include "unicode/brkiter.h" #include "unicode/rbnf.h" +#include "unicode/dtptngen.h" #include "uresimp.h" #include "olsontz.h" #include "patternprops.h" @@ -650,6 +651,12 @@ SimpleDateFormat::operator==(const Format& other) const } //---------------------------------------------------------------------- +static const UChar* timeSkeletons[4] = { + u"jmmsszzzz", // kFull + u"jmmssz", // kLong + u"jmmss", // kMedium + u"jmm", // kShort +}; void SimpleDateFormat::construct(EStyle timeStyle, EStyle dateStyle, @@ -714,35 +721,75 @@ void SimpleDateFormat::construct(EStyle timeStyle, fDateOverride.setToBogus(); fTimeOverride.setToBogus(); + UnicodeString timePattern; + if (timeStyle >= kFull && timeStyle <= kShort) { + const char* baseLocID = locale.getBaseName(); + if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) { + UErrorCode useStatus = U_ZERO_ERROR; + Locale baseLoc(baseLocID); + Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus)); + if (U_SUCCESS(useStatus) && validLoc!=baseLoc) { + bool useDTPG = false; + const char* baseReg = baseLoc.getCountry(); // empty string if no region + if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0) + || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) { + // use DTPG if + // * baseLoc has a region and validLoc does not have the same one (or has none), OR + // * validLoc has a different language code than baseLoc + useDTPG = true; + } + if (useDTPG) { + // The standard time formats may have the wrong time cycle, because: + // the valid locale differs in important ways (region, language) from + // the base locale. + // We could *also* check whether they do actually have a mismatch with + // the time cycle preferences for the region, but that is a lot more + // work for little or no additional benefit, since just going ahead + // and always synthesizing the time format as per the following should + // create a locale-appropriate pattern with cycle that matches the + // region preferences anyway. + LocalPointer dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus)); + if (U_SUCCESS(useStatus)) { + UnicodeString timeSkeleton(TRUE, timeSkeletons[timeStyle], -1); + timePattern = dtpg->getBestPattern(timeSkeleton, useStatus); + } + } + } + } + } + // if the pattern should include both date and time information, use the date/time // pattern string as a guide to tell use how to glue together the appropriate date // and time pattern strings. if ((timeStyle != kNone) && (dateStyle != kNone)) { - currentBundle.adoptInstead( - ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); - if (U_FAILURE(status)) { - status = U_INVALID_FORMAT_ERROR; - return; - } - switch (ures_getType(currentBundle.getAlias())) { - case URES_STRING: { - resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); - break; - } - case URES_ARRAY: { - resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); - ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); - fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen); - break; - } - default: { + UnicodeString tempus1(timePattern); + if (tempus1.length() == 0) { + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); + if (U_FAILURE(status)) { status = U_INVALID_FORMAT_ERROR; return; } - } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } + } - UnicodeString tempus1(TRUE, resStr, resStrLen); + tempus1.setTo(TRUE, resStr, resStrLen); + } currentBundle.adoptInstead( ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, NULL, &status)); @@ -784,29 +831,32 @@ void SimpleDateFormat::construct(EStyle timeStyle, // pattern string from the resources // setTo() - see DateFormatSymbols::assignArray comments else if (timeStyle != kNone) { - currentBundle.adoptInstead( - ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); - if (U_FAILURE(status)) { - status = U_INVALID_FORMAT_ERROR; - return; - } - switch (ures_getType(currentBundle.getAlias())) { - case URES_STRING: { - resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); - break; - } - case URES_ARRAY: { - resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); - ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); - fDateOverride.setTo(TRUE, ovrStr, ovrStrLen); - break; - } - default: { + fPattern.setTo(timePattern); + if (fPattern.length() == 0) { + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); + if (U_FAILURE(status)) { status = U_INVALID_FORMAT_ERROR; return; } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fDateOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } + } + fPattern.setTo(TRUE, resStr, resStrLen); } - fPattern.setTo(TRUE, resStr, resStrLen); } else if (dateStyle != kNone) { currentBundle.adoptInstead( diff --git a/icu4c/source/i18n/unicode/dtptngen.h b/icu4c/source/i18n/unicode/dtptngen.h index dd99d58e65e..828c0a9854b 100644 --- a/icu4c/source/i18n/unicode/dtptngen.h +++ b/icu4c/source/i18n/unicode/dtptngen.h @@ -76,6 +76,13 @@ public: #ifndef U_HIDE_INTERNAL_API + /** + * For ICU use only. Skips loading the standard date/time patterns (which is done via DateFormat). + * + * @internal + */ + static DateTimePatternGenerator* U_EXPORT2 createInstanceNoStdPat(const Locale& uLocale, UErrorCode& status); + /** * For ICU use only * @@ -526,7 +533,7 @@ private: /** * Constructor. */ - DateTimePatternGenerator(const Locale& locale, UErrorCode & status); + DateTimePatternGenerator(const Locale& locale, UErrorCode & status, UBool skipStdPatterns = false); /** * Copy constructor. @@ -573,7 +580,7 @@ private: // with #13183, no longer need flags for b, B }; - void initData(const Locale &locale, UErrorCode &status); + void initData(const Locale &locale, UErrorCode &status, UBool skipStdPatterns = false); void addCanonicalItems(UErrorCode &status); void addICUPatterns(const Locale& locale, UErrorCode& status); void hackTimes(const UnicodeString& hackPattern, UErrorCode& status); diff --git a/icu4c/source/test/intltest/dtptngts.cpp b/icu4c/source/test/intltest/dtptngts.cpp index 1b2aa2a4099..faa850aa838 100644 --- a/icu4c/source/test/intltest/dtptngts.cpp +++ b/icu4c/source/test/intltest/dtptngts.cpp @@ -44,6 +44,7 @@ void IntlTestDateTimePatternGeneratorAPI::runIndexedTest( int32_t index, UBool e TESTCASE(8, test20640_HourCyclArsEnNH); TESTCASE(9, testFallbackWithDefaultRootLocale); TESTCASE(10, testGetDefaultHourCycle_OnEmptyInstance); + TESTCASE(11, test_jConsistencyOddLocales); default: name = ""; break; } } @@ -1396,7 +1397,9 @@ void IntlTestDateTimePatternGeneratorAPI::testJjMapping() { shortPattern.extract(0, shortPattern.length(), spBuf, 32); jPattern.extract(0, jPattern.length(), jpBuf, 32); const char* dfmtCalType = (dfmt->getCalendar())->getType(); - errln("ERROR: locale %s, expected j resolved char %s to occur in short time pattern '%s' for %s (best pattern: '%s')", localeID, jcBuf, spBuf, dfmtCalType, jpBuf); + const char* validLoc = dfmt->getLocaleID(ULOC_VALID_LOCALE, status); + errln("ERROR: locale %s (valid %s), expected j resolved char %s to occur in short time pattern '%s' for %s (best pattern: '%s')", + localeID, validLoc, jcBuf, spBuf, dfmtCalType, jpBuf); } break; } @@ -1420,8 +1423,7 @@ void IntlTestDateTimePatternGeneratorAPI::test20640_HourCyclArsEnNH() { // formerly New Hebrides, now Vanuatu => VU => h. {"en_NH", u"h a", u"h:mm a", UDAT_HOUR_CYCLE_12}, // ch_ZH is a typo (should be zh_CN), but we should fail gracefully. - // {"cn_ZH", u"HH", u"H:mm"}, // TODO(ICU-20653): Desired behavior - {"cn_ZH", u"HH", u"h:mm a", UDAT_HOUR_CYCLE_23 }, // Actual behavior + {"cn_ZH", u"HH", u"HH:mm", UDAT_HOUR_CYCLE_23 }, // Desired & now actual behavior (does this fix ICU-20653?) // a non-BCP47 locale without a country code should not fail {"ja_TRADITIONAL", u"H時", u"H:mm", UDAT_HOUR_CYCLE_23}, }; @@ -1507,4 +1509,52 @@ void IntlTestDateTimePatternGeneratorAPI::testGetDefaultHourCycle_OnEmptyInstanc } } +void IntlTestDateTimePatternGeneratorAPI::test_jConsistencyOddLocales() { // ICU-20590 + static const char* localeIDs[] = { + "en", "ro", // known languages 12h / 24h + "en-RO", "ro-US", // known languages with known regions, hour conflict language vs region + "en-XZ", "ro-XZ", // known languages 12h / 24h, unknown region + "xz-RO", "xz-US", // unknown language with known regions + "xz", // unknown language + "xz-ZX", // unknown language with unknown country + "ars", "wuu" // aliased locales + }; + static const UChar* skeleton = u"jm"; + for (const char* localeID: localeIDs) { + UErrorCode status = U_ZERO_ERROR; + Locale locale(localeID); + LocalPointer dtfShort(DateFormat::createTimeInstance(DateFormat::kShort, locale), status); + if (U_FAILURE(status)) { + errln("DateFormat::createTimeInstance failed for locale %s: %s", localeID, u_errorName(status)); + continue; + } + LocalPointer dtfSkel(DateFormat::createInstanceForSkeleton(skeleton, locale, status)); + if (U_FAILURE(status)) { + errln("DateFormat::createInstanceForSkeleton failed for locale %s: %s", localeID, u_errorName(status)); + continue; + } + LocalPointer dtpg(DateTimePatternGenerator::createInstance(locale, status)); + if (U_FAILURE(status)) { + errln("DateTimePatternGenerator::createInstance failed for locale %s: %s", localeID, u_errorName(status)); + continue; + } + UnicodeString dtfShortPattern, dtfSkelPattern; + dynamic_cast(dtfShort.getAlias())->toPattern(dtfShortPattern); + dynamic_cast(dtfSkel.getAlias())->toPattern(dtfSkelPattern); + UnicodeString dtpgPattern = (dtpg.getAlias())->getBestPattern(skeleton, status); + if (U_FAILURE(status)) { + errln("DateTimePatternGenerator::getBestPattern failed for locale %s: %s", localeID, u_errorName(status)); + continue; + } + if (dtfShortPattern != dtfSkelPattern || dtfSkelPattern != dtpgPattern) { + const char* dtfShortValidLoc = dtfShort->getLocaleID(ULOC_VALID_LOCALE, status); + const char* dtfShortActualLoc = dtfShort->getLocaleID(ULOC_ACTUAL_LOCALE, status); + errln(UnicodeString("For locale ") + localeID + + " expected same pattern from DateTimePatGen: " + dtpgPattern + + ", DateFmt-forSkel: " + dtfSkelPattern + ", DateFmt-short: " + dtfShortPattern + + "; latter has validLoc " + dtfShortValidLoc + ", actualLoc " + dtfShortActualLoc); + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/dtptngts.h b/icu4c/source/test/intltest/dtptngts.h index 90f65c94288..b3fb02e2bd2 100644 --- a/icu4c/source/test/intltest/dtptngts.h +++ b/icu4c/source/test/intltest/dtptngts.h @@ -36,6 +36,7 @@ private: void test20640_HourCyclArsEnNH(); void testFallbackWithDefaultRootLocale(); void testGetDefaultHourCycle_OnEmptyInstance(); + void test_jConsistencyOddLocales(); }; #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateTimePatternGenerator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateTimePatternGenerator.java index 17fb34b80a3..8a09b68ff6b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateTimePatternGenerator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateTimePatternGenerator.java @@ -125,7 +125,7 @@ public class DateTimePatternGenerator implements Freezable 0 && !baseLocID.equals("und")) { + ULocale baseLoc = new ULocale(baseLocID); + // The following is different from ICU4C, where we can get the valid locale + // for the SimpleDateFormat object. Here we do not have a SimpleDateFormat and + // valid locale for the Calendar is a bit meaningless. + ULocale validLoc = ULocale.addLikelySubtags(dtPatternsRb.getULocale()); + if (validLoc != baseLoc) { + String baseReg = baseLoc.getCountry(); + if ((baseReg.length() > 0 && !baseReg.equals(validLoc.getCountry())) + || !baseLoc.getLanguage().equals(validLoc.getLanguage())) { + // use DTPG if the standard time formats may have the wrong time cycle, + // because the valid locale differs in important ways (region, language) + // from the base locale. + // We could *also* check whether they do actually have a mismatch with + // the time cycle preferences for the region, but that is a lot more + // work for little or no additional benefit, since just going ahead + // and always synthesizing the time format as per the following should + // create a locale-appropriate pattern with cycle that matches the + // region preferences anyway. + // In this case we get the first 4 entries of dateTimePatterns using + // DateTimePatternGenerator, not resource data. + DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstanceNoStdPat(locale); + for (; i < 4; i++) { + dateTimePatterns[i] = dtpg.getBestPattern(TIME_SKELETONS[i]); + } + } + } + } + + for (; i < patternsSize; i++) { // get all or remaining dateTimePatterns entries ICUResourceBundle concatenationPatternRb = (ICUResourceBundle) dtPatternsRb.get(i); switch (concatenationPatternRb.getType()) { case UResourceBundle.STRING: diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java index 02444193448..ff462c74cc8 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java @@ -1737,8 +1737,7 @@ public class DateTimeGeneratorTest extends TestFmwk { // en_NH is interesting because NH is a depregated region code. {"en_NH", "h a", "h:mm a", "HOUR_CYCLE_12"}, // ch_ZH is a typo (should be zh_CN), but we should fail gracefully. - // {"cn_ZH", "HH", "H:mm"}, // TODO(ICU-20653): Desired behavior - {"cn_ZH", "HH", "h:mm a", "HOUR_CYCLE_23"}, // Actual behavior + {"cn_ZH", "HH", "HH:mm", "HOUR_CYCLE_23"}, // Desired & now actual behavior (does this fix ICU-20653?) // a non-BCP47 locale without a country code should not fail {"ja_TRADITIONAL", "H時", "H:mm", "HOUR_CYCLE_23"}, }; @@ -1759,4 +1758,39 @@ public class DateTimeGeneratorTest extends TestFmwk { cas[3], dtpg.getDefaultHourCycle().toString()); } } + + @Test + public void test_jConsistencyOddLocales() { // ICU-20590 + String[] localeIDs = { + "en", "ro", // known languages 12h / 24h + "en-RO", "ro-US", // known languages with known regions, hour conflict language vs region + "en-XZ", "ro-XZ", // known languages 12h / 24h, unknown region + "xz-RO", "xz-US", // unknown language with known regions + "xz", // unknown language + "xz-ZX", // unknown language with unknown country + "ars", "wuu" // aliased locales + }; + final String skeleton = "jm"; + for (String localeID : localeIDs) { + ULocale locale = new ULocale(localeID); + + DateFormat dtfShort = DateFormat.getTimeInstance(DateFormat.SHORT, locale); + String dtfShortPattern = ((SimpleDateFormat)dtfShort).toPattern(); + + DateFormat dtfSkel = DateFormat.getInstanceForSkeleton(skeleton, locale); + String dtfSkelPattern = ((SimpleDateFormat)dtfSkel).toPattern(); + + DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(locale); + String dtpgPattern = dtpg.getBestPattern(skeleton); + + if (!dtfShortPattern.equals(dtfSkelPattern) || !dtfSkelPattern.equals(dtpgPattern)) { + String dtfShortValidLoc = dtfShort.getLocale(ULocale.VALID_LOCALE).getName(); + String dtfShortActualLoc = dtfShort.getLocale(ULocale.ACTUAL_LOCALE).getName(); + errln("For locale " + localeID + + " expected same pattern from DateTimePatGen: " + dtpgPattern + + ", DateFmt-forSkel: " + dtfSkelPattern + ", DateFmt-short: " + dtfShortPattern + + "; latter has validLoc " + dtfShortValidLoc + ", actualLoc " + dtfShortActualLoc); + } + } + } }