diff --git a/icu4c/source/i18n/dtitv_impl.h b/icu4c/source/i18n/dtitv_impl.h index c7addf37fb6..6fc16bb3e08 100644 --- a/icu4c/source/i18n/dtitv_impl.h +++ b/icu4c/source/i18n/dtitv_impl.h @@ -64,11 +64,13 @@ #define LOW_Z ((UChar)0x007A) #define CAP_A ((UChar)0x0041) +#define CAP_B ((UChar)0x0042) #define CAP_C ((UChar)0x0043) #define CAP_D ((UChar)0x0044) #define CAP_E ((UChar)0x0045) #define CAP_F ((UChar)0x0046) #define CAP_G ((UChar)0x0047) +#define CAP_J ((UChar)0x004A) #define CAP_H ((UChar)0x0048) #define CAP_K ((UChar)0x004B) #define CAP_L ((UChar)0x004C) diff --git a/icu4c/source/i18n/dtitvfmt.cpp b/icu4c/source/i18n/dtitvfmt.cpp index 9b9e11d4dcc..3b65c9461c1 100644 --- a/icu4c/source/i18n/dtitvfmt.cpp +++ b/icu4c/source/i18n/dtitvfmt.cpp @@ -751,7 +751,7 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { /* the difference between time skeleton and normalizedTimeSkeleton are: * 1. (Formerly, normalized time skeleton folded 'H' to 'h'; no longer true) - * 2. 'a' is omitted in normalized time skeleton. + * 2. (Formerly, 'a' was omitted in normalized time skeleton; this is now handled elsewhere) * 3. there is only one appearance for 'h' or 'H', 'm','v', 'z' in normalized * time skeleton * @@ -760,7 +760,8 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { * 2. 'E' and 'EE' are normalized into 'EEE' * 3. 'MM' is normalized into 'M' */ - getDateTimeSkeleton(fSkeleton, dateSkeleton, normalizedDateSkeleton, + UnicodeString convertedSkeleton = normalizeHourMetacharacters(fSkeleton); + getDateTimeSkeleton(convertedSkeleton, dateSkeleton, normalizedDateSkeleton, timeSkeleton, normalizedTimeSkeleton); #ifdef DTITVFMT_DEBUG @@ -899,6 +900,91 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { +UnicodeString +DateIntervalFormat::normalizeHourMetacharacters(const UnicodeString& skeleton) const { + UnicodeString result = skeleton; + + UChar hourMetachar = u'\0'; + int32_t metacharStart = 0; + int32_t metacharCount = 0; + for (int32_t i = 0; i < result.length(); i++) { + UChar c = result[i]; + if (c == LOW_J || c == CAP_J || c == CAP_C) { + if (hourMetachar == u'\0') { + hourMetachar = c; + metacharStart = i; + } + ++metacharCount; + } else { + if (hourMetachar != u'\0') { + break; + } + } + } + + if (hourMetachar != u'\0') { + UErrorCode err = U_ZERO_ERROR; + UChar hourChar = CAP_H; + UChar dayPeriodChar = LOW_A; + UnicodeString convertedPattern = DateFormat::getBestPattern(fLocale, UnicodeString(hourMetachar), err); + + if (U_SUCCESS(err)) { + // strip literal text from the pattern (so literal characters don't get mistaken for pattern + // characters-- such as the 'h' in 'Uhr' in Germam) + int32_t firstQuotePos; + while ((firstQuotePos = convertedPattern.indexOf(u'\'')) != -1) { + int32_t secondQuotePos = convertedPattern.indexOf(u'\'', firstQuotePos + 1); + if (secondQuotePos == -1) { + secondQuotePos = firstQuotePos; + } + convertedPattern.replace(firstQuotePos, (secondQuotePos - firstQuotePos) + 1, UnicodeString()); + } + + if (convertedPattern.indexOf(LOW_H) != -1) { + hourChar = LOW_H; + } else if (convertedPattern.indexOf(CAP_K) != -1) { + hourChar = CAP_K; + } else if (convertedPattern.indexOf(LOW_K) != -1) { + hourChar = LOW_K; + } + + if (convertedPattern.indexOf(LOW_B) != -1) { + dayPeriodChar = LOW_B; + } else if (convertedPattern.indexOf(CAP_B) != -1) { + dayPeriodChar = CAP_B; + } + } + + if (hourChar == CAP_H || hourChar == LOW_K) { + result.replace(metacharStart, metacharCount, hourChar); + } else { + UnicodeString hourAndDayPeriod(hourChar); + switch (metacharCount) { + case 1: + case 2: + default: + hourAndDayPeriod.append(UnicodeString(dayPeriodChar)); + break; + case 3: + case 4: + for (int32_t i = 0; i < 4; i++) { + hourAndDayPeriod.append(dayPeriodChar); + } + break; + case 5: + case 6: + for (int32_t i = 0; i < 5; i++) { + hourAndDayPeriod.append(dayPeriodChar); + } + break; + } + result.replace(metacharStart, metacharCount, hourAndDayPeriod); + } + } + return result; +} + + void U_EXPORT2 DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, UnicodeString& dateSkeleton, @@ -911,11 +997,10 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, int32_t dCount = 0; int32_t MCount = 0; int32_t yCount = 0; - int32_t hCount = 0; - int32_t HCount = 0; int32_t mCount = 0; int32_t vCount = 0; int32_t zCount = 0; + UChar hourChar = u'\0'; int32_t i; for (i = 0; i < skeleton.length(); ++i) { @@ -956,17 +1041,14 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, normalizedDateSkeleton.append(ch); dateSkeleton.append(ch); break; - case LOW_A: - // 'a' is implicitly handled - timeSkeleton.append(ch); - break; case LOW_H: - timeSkeleton.append(ch); - ++hCount; - break; case CAP_H: + case LOW_K: + case CAP_K: timeSkeleton.append(ch); - ++HCount; + if (hourChar == u'\0') { + hourChar = ch; + } break; case LOW_M: timeSkeleton.append(ch); @@ -980,14 +1062,15 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, ++vCount; timeSkeleton.append(ch); break; + case LOW_A: case CAP_V: case CAP_Z: - case LOW_K: - case CAP_K: case LOW_J: case LOW_S: case CAP_S: case CAP_A: + case LOW_B: + case CAP_B: timeSkeleton.append(ch); normalizedTimeSkeleton.append(ch); break; @@ -1023,11 +1106,8 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, } /* generate normalized form for time */ - if ( HCount != 0 ) { - normalizedTimeSkeleton.append(CAP_H); - } - else if ( hCount != 0 ) { - normalizedTimeSkeleton.append(LOW_H); + if ( hourChar != u'\0' ) { + normalizedTimeSkeleton.append(hourChar); } if ( mCount != 0 ) { normalizedTimeSkeleton.append(LOW_M); @@ -1335,10 +1415,11 @@ DateIntervalFormat::setIntervalPattern(UCalendarDateFields field, } } if ( !pattern.isEmpty() ) { - if ( differenceInfo != 0 ) { + UBool suppressDayPeriodField = fSkeleton.indexOf(CAP_J) != -1; + if ( differenceInfo != 0 || suppressDayPeriodField) { UnicodeString adjustIntervalPattern; adjustFieldWidth(*skeleton, *bestSkeleton, pattern, differenceInfo, - adjustIntervalPattern); + suppressDayPeriodField, adjustIntervalPattern); setIntervalPattern(field, adjustIntervalPattern); } else { setIntervalPattern(field, pattern); @@ -1530,6 +1611,7 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, const UnicodeString& bestMatchSkeleton, const UnicodeString& bestIntervalPattern, int8_t differenceInfo, + UBool suppressDayPeriodField, UnicodeString& adjustedPtn) { adjustedPtn = bestIntervalPattern; int32_t inputSkeletonFieldWidth[] = @@ -1556,19 +1638,44 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + const int8_t PATTERN_CHAR_BASE = 0x41; + DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); - if ( differenceInfo == 2 ) { - adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */), - UnicodeString((UChar)0x7a /* z */)); + if (suppressDayPeriodField) { + adjustedPtn.findAndReplace(UnicodeString(LOW_A), UnicodeString()); + adjustedPtn.findAndReplace(UnicodeString(" "), UnicodeString(" ")); + adjustedPtn.trim(); } + if ( differenceInfo == 2 ) { + if (inputSkeleton.indexOf(LOW_Z) != -1) { + adjustedPtn.findAndReplace(UnicodeString(LOW_V), + UnicodeString(LOW_Z)); + } + if (inputSkeleton.indexOf(CAP_K) != -1) { + adjustedPtn.findAndReplace(UnicodeString(LOW_H), + UnicodeString(CAP_K)); + } + if (inputSkeleton.indexOf(LOW_K) != -1) { + adjustedPtn.findAndReplace(UnicodeString(CAP_H), + UnicodeString(LOW_K)); + } + if (inputSkeleton.indexOf(LOW_B) != -1) { + adjustedPtn.findAndReplace(UnicodeString(LOW_A), + UnicodeString(LOW_B)); + } + } + if (adjustedPtn.indexOf(LOW_A) != -1 && bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] == 0) { + bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] = 1; + } + if (adjustedPtn.indexOf(LOW_B) != -1 && bestMatchSkeletonFieldWidth[LOW_B - PATTERN_CHAR_BASE] == 0) { + bestMatchSkeletonFieldWidth[LOW_B - PATTERN_CHAR_BASE] = 1; + } UBool inQuote = false; UChar prevCh = 0; int32_t count = 0; - const int8_t PATTERN_CHAR_BASE = 0x41; - // loop through the pattern string character by character int32_t adjustedPtnLength = adjustedPtn.length(); int32_t i; diff --git a/icu4c/source/i18n/dtitvinf.cpp b/icu4c/source/i18n/dtitvinf.cpp index 25536346ec7..39fd44a392d 100644 --- a/icu4c/source/i18n/dtitvinf.cpp +++ b/icu4c/source/i18n/dtitvinf.cpp @@ -339,6 +339,9 @@ struct DateIntervalInfo::DateIntervalSink : public ResourceSink { return UCAL_DATE; } else if (c0 == 'a') { return UCAL_AM_PM; + } else if (c0 == 'B') { + // TODO: Using AM/PM as a proxy for flexible day period isn't really correct, but it's close + return UCAL_AM_PM; } else if (c0 == 'h' || c0 == 'H') { return UCAL_HOUR; } else if (c0 == 'm') { @@ -594,20 +597,23 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, const int32_t DIFFERENT_FIELD = 0x1000; const int32_t STRING_NUMERIC_DIFFERENCE = 0x100; const int32_t BASE = 0x41; - const UChar CHAR_V = 0x0076; - const UChar CHAR_Z = 0x007A; - // hack for 'v' and 'z'. - // resource bundle only have time skeletons ending with 'v', - // but not for time skeletons ending with 'z'. - UBool replaceZWithV = false; + // hack for certain alternate characters + // resource bundles only have time skeletons containing 'v', 'h', and 'H' + // but not time skeletons containing 'z', 'K', or 'k' + // the skeleton may also include 'a' or 'b', which never occur in the resource bundles, so strip them out too + UBool replacedAlternateChars = false; const UnicodeString* inputSkeleton = &skeleton; UnicodeString copySkeleton; - if ( skeleton.indexOf(CHAR_Z) != -1 ) { + if ( skeleton.indexOf(LOW_Z) != -1 || skeleton.indexOf(LOW_K) != -1 || skeleton.indexOf(CAP_K) != -1 || skeleton.indexOf(LOW_A) != -1 || skeleton.indexOf(LOW_B) != -1 ) { copySkeleton = skeleton; - copySkeleton.findAndReplace(UnicodeString(CHAR_Z), UnicodeString(CHAR_V)); + copySkeleton.findAndReplace(UnicodeString(LOW_Z), UnicodeString(LOW_V)); + copySkeleton.findAndReplace(UnicodeString(LOW_K), UnicodeString(CAP_H)); + copySkeleton.findAndReplace(UnicodeString(CAP_K), UnicodeString(LOW_H)); + copySkeleton.findAndReplace(UnicodeString(LOW_A), UnicodeString()); + copySkeleton.findAndReplace(UnicodeString(LOW_B), UnicodeString()); inputSkeleton = ©Skeleton; - replaceZWithV = true; + replacedAlternateChars = true; } parseSkeleton(*inputSkeleton, inputSkeletonFieldWidth); @@ -616,7 +622,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, // 0 means exact the same skeletons; // 1 means having the same field, but with different length, - // 2 means only z/v differs + // 2 means only z/v, h/K, or H/k differs // -1 means having different field. bestMatchDistanceInfo = 0; int8_t fieldLength = UPRV_LENGTHOF(skeletonFieldWidth); @@ -672,7 +678,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, break; } } - if ( replaceZWithV && bestMatchDistanceInfo != -1 ) { + if ( replacedAlternateChars && bestMatchDistanceInfo != -1 ) { bestMatchDistanceInfo = 2; } return bestSkeleton; diff --git a/icu4c/source/i18n/unicode/dtitvfmt.h b/icu4c/source/i18n/unicode/dtitvfmt.h index 4e4d712b4f5..f05110690e5 100644 --- a/icu4c/source/i18n/unicode/dtitvfmt.h +++ b/icu4c/source/i18n/unicode/dtitvfmt.h @@ -864,6 +864,19 @@ private: void setFallbackPattern(UCalendarDateFields field, const UnicodeString& skeleton, UErrorCode& status); + + + + /** + * Converts special hour metacharacters (such as 'j') in the skeleton into locale-appropriate + * pattern characters. + * + * + * @param skeleton The skeleton to convert + * @return A copy of the skeleton, which "j" and any other special hour metacharacters converted to the regular ones. + * + */ + UnicodeString normalizeHourMetacharacters(const UnicodeString& skeleton) const; @@ -984,6 +997,7 @@ private: * @param differenceInfo the difference between 2 skeletons * 1 means only field width differs * 2 means v/z exchange + * @param suppressDayPeriodField if true, remove the day period field from the pattern, if there is one * @param adjustedIntervalPattern adjusted interval pattern */ static void U_EXPORT2 adjustFieldWidth( @@ -991,6 +1005,7 @@ private: const UnicodeString& bestMatchSkeleton, const UnicodeString& bestMatchIntervalPattern, int8_t differenceInfo, + UBool suppressDayPeriodField, UnicodeString& adjustedIntervalPattern); /** diff --git a/icu4c/source/test/intltest/dtifmtts.cpp b/icu4c/source/test/intltest/dtifmtts.cpp index bb10c6b47b4..85d74c6a753 100644 --- a/icu4c/source/test/intltest/dtifmtts.cpp +++ b/icu4c/source/test/intltest/dtifmtts.cpp @@ -60,6 +60,7 @@ void DateIntervalFormatTest::runIndexedTest( int32_t index, UBool exec, const ch TESTCASE(11, testCreateInstanceForAllLocales); TESTCASE(12, testTicket20707); TESTCASE(13, testFormatMillisecond); + TESTCASE(14, testHourMetacharacters); default: name = ""; break; } } @@ -1076,6 +1077,93 @@ void DateIntervalFormatTest::testFormat() { } +/** + * Test handling of hour and day period metacharacters + */ +void DateIntervalFormatTest::testHourMetacharacters() { + // first item is date pattern + // followed by a group of locale/from_data/to_data/skeleton/interval_data + // Note that from_data/to_data are specified using era names from root, for the calendar specified by locale. + const char* DATA[] = { + "GGGGG y MM dd HH:mm:ss", // pattern for from_data/to_data + + // This test is for tickets ICU-21154, ICU-21155, and ICU-21156 and is intended to verify + // that all of the special skeleton characters for hours and day periods work as expected + // with date intervals: + // - If a, b, or B is included in the skeleton, it correctly sets the length of the day-period field + // - If k or K is included, it behaves the same as H or h, except for the difference in the actual + // number used for the hour. + // - If j is included, it behaves the same as either h or H as appropriate, and multiple j's have the + // intended effect on the length of the day period field (if there is one) + // - If J is included, it correctly suppresses the day period field if j would include it + // - If C is included, it behaves the same as j and brings up the correct day period field + // - In all cases, if the day period of both ends of the range is the same, you only see it once + + // baseline (h and H) + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hh", "12 \\u2013 1 AM", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "HH", "00\\u201301 Uhr", + + // k and K (ICU-21154 and ICU-21156) + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "KK", "0 \\u2013 1 AM", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "kk", "24\\u201301 Uhr", + + // different lengths of the 'a' field + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "ha", "10 AM \\u2013 1 PM", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "ha", "12 \\u2013 1 AM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "haaaaa", "10 a \\u2013 12 p", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "haaaaa", "12 \\u2013 1 a", + + // j (ICU-21155) + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 AM \\u2013 1 PM", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12 \\u2013 1 AM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10 a \\u2013 1 p", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "12 \\u2013 1 a", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "00\\u201301 Uhr", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "00\\u201301 Uhr", + + // b and B + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hb", "10 AM \\u2013 12 noon", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hbbbbb", "10 a \\u2013 12 n", + "en", "CE 2010 09 27 13:00:00", "CE 2010 09 27 14:00:00", "hb", "1 \\u2013 2 PM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "10 in the morning \\u2013 1 in the afternoon", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "12 \\u2013 1 at night", + + // J + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10 \\u2013 1", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "12 \\u2013 1", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "00\\u201301 Uhr", + + // C + // (for English and German, C should do the same thing as j) + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10 AM \\u2013 1 PM", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "12 \\u2013 1 AM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10 a \\u2013 1 p", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "12 \\u2013 1 a", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "00\\u201301 Uhr", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "00\\u201301 Uhr", + // (for zh_HK and hi_IN, j maps to ha, but C maps to hB) + "zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "\\u4E0A\\u534810\\u6642\\u81F3\\u4E0B\\u53481\\u6642", + "zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "\\u4E0A\\u534812\\u6642\\u81F31\\u6642", + "zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642", + "zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u51CC\\u666812\\u20131\\u6642", + "zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642", + "zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u51CC\\u666812\\u20131\\u6642", + "hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 am \\u2013 1 pm", + "hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12\\u20131 am", + "hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1", + "hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u0930\\u093E\\u0924 12\\u20131", + "hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1", + "hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u0930\\u093E\\u0924 12\\u20131", + }; + expect(DATA, UPRV_LENGTHOF(DATA)); +} + + void DateIntervalFormatTest::expect(const char** data, int32_t data_length) { int32_t i = 0; UErrorCode ec = U_ZERO_ERROR; diff --git a/icu4c/source/test/intltest/dtifmtts.h b/icu4c/source/test/intltest/dtifmtts.h index 69b3d938d4f..7c96eea38b7 100644 --- a/icu4c/source/test/intltest/dtifmtts.h +++ b/icu4c/source/test/intltest/dtifmtts.h @@ -34,6 +34,12 @@ public: */ void testFormat(); + + /** + * Test handling of hour and day period metacharacters + */ + void testHourMetacharacters(); + void testFormatMillisecond(); /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java index 3ba5517e529..f6d0293046e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java @@ -1331,7 +1331,7 @@ public class DateIntervalFormat extends UFormat { // or by getInstance(String skeleton, .... ) fSkeleton = dtpng.getSkeleton(fullPattern); } - String skeleton = fSkeleton; + String skeleton = normalizeHourMetacharacters(fSkeleton, locale); HashMap intervalPatterns = new HashMap<>(); @@ -1547,6 +1547,89 @@ public class DateIntervalFormat extends UFormat { } */ + private String normalizeHourMetacharacters(String skeleton, ULocale locale) { + StringBuilder result = new StringBuilder(skeleton); + + char hourMetachar = '\0'; + int metacharStart = 0; + int metacharCount = 0; + for (int i = 0; i < result.length(); i++) { + char c = result.charAt(i); + if (c == 'j' || c == 'J' || c == 'C') { + if (hourMetachar == '\0') { + hourMetachar = c; + metacharStart = i; + } + ++metacharCount; + } else { + if (hourMetachar != '\0') { + break; + } + } + } + + if (hourMetachar != '\0') { + char hourChar = 'H'; + char dayPeriodChar = 'a'; + + DateTimePatternGenerator dtptng = DateTimePatternGenerator.getInstance(locale); + String convertedPattern = dtptng.getBestPattern(String.valueOf(hourMetachar)); + + // strip literal text from the pattern (so literal characters don't get mistaken for pattern + // characters-- such as the 'h' in 'Uhr' in German) + int firstQuotePos; + while ((firstQuotePos = convertedPattern.indexOf('\'')) != -1) { + int secondQuotePos = convertedPattern.indexOf('\'', firstQuotePos + 1); + if (secondQuotePos == -1) { + secondQuotePos = firstQuotePos; + } + convertedPattern = convertedPattern.substring(0, firstQuotePos) + convertedPattern.substring(secondQuotePos + 1); + } + + if (convertedPattern.indexOf('h') != -1) { + hourChar = 'h'; + } else if (convertedPattern.indexOf('K') != -1) { + hourChar = 'K'; + } else if (convertedPattern.indexOf('k') != -1) { + hourChar = 'k'; + } + + if (convertedPattern.indexOf('b') != -1) { + dayPeriodChar = 'b'; + } else if (convertedPattern.indexOf('B') != -1) { + dayPeriodChar = 'B'; + } + + if (hourChar == 'H' || hourChar == 'k') { + result.replace(metacharStart, metacharStart + metacharCount, String.valueOf(hourChar)); + } else { + StringBuilder hourAndDayPeriod = new StringBuilder(); + hourAndDayPeriod.append(hourChar); + switch (metacharCount) { + case 1: + case 2: + default: + hourAndDayPeriod.append(dayPeriodChar); + break; + case 3: + case 4: + for (int i = 0; i < 4; i++) { + hourAndDayPeriod.append(dayPeriodChar); + } + break; + case 5: + case 6: + for (int i = 0; i < 5; i++) { + hourAndDayPeriod.append(dayPeriodChar); + } + break; + } + result.replace(metacharStart, metacharStart + metacharCount, hourAndDayPeriod.toString()); + } + } + return result.toString(); + } + /* * get separated date and time skeleton from a combined skeleton. * @@ -1583,11 +1666,10 @@ public class DateIntervalFormat extends UFormat { int dCount = 0; int MCount = 0; int yCount = 0; - int hCount = 0; - int HCount = 0; int mCount = 0; int vCount = 0; int zCount = 0; + char hourChar = '\0'; for (i = 0; i < skeleton.length(); ++i) { char ch = skeleton.charAt(i); @@ -1627,17 +1709,14 @@ public class DateIntervalFormat extends UFormat { normalizedDateSkeleton.append(ch); dateSkeleton.append(ch); break; - case 'a': - // 'a' is implicitly handled - timeSkeleton.append(ch); - break; case 'h': - timeSkeleton.append(ch); - ++hCount; - break; case 'H': + case 'k': + case 'K': timeSkeleton.append(ch); - ++HCount; + if (hourChar == '\0') { + hourChar = ch; + } break; case 'm': timeSkeleton.append(ch); @@ -1651,14 +1730,15 @@ public class DateIntervalFormat extends UFormat { ++vCount; timeSkeleton.append(ch); break; + case 'a': case 'V': case 'Z': - case 'k': - case 'K': case 'j': case 's': case 'S': case 'A': + case 'b': + case 'B': timeSkeleton.append(ch); normalizedTimeSkeleton.append(ch); break; @@ -1694,11 +1774,8 @@ public class DateIntervalFormat extends UFormat { } /* generate normalized form for time */ - if ( HCount != 0 ) { - normalizedTimeSkeleton.append('H'); - } - else if ( hCount != 0 ) { - normalizedTimeSkeleton.append('h'); + if ( hourChar != '\0' ) { + normalizedTimeSkeleton.append(hourChar); } if ( mCount != 0 ) { normalizedTimeSkeleton.append('m'); @@ -1894,10 +1971,11 @@ public class DateIntervalFormat extends UFormat { } if ( pattern != null ) { if ( differenceInfo != 0 ) { + boolean suppressDayPeriodField = fSkeleton.indexOf('J') != -1; String part1 = adjustFieldWidth(skeleton, bestSkeleton, - pattern.getFirstPart(), differenceInfo); + pattern.getFirstPart(), differenceInfo, suppressDayPeriodField); String part2 = adjustFieldWidth(skeleton, bestSkeleton, - pattern.getSecondPart(), differenceInfo); + pattern.getSecondPart(), differenceInfo, suppressDayPeriodField); pattern = new PatternInfo(part1, part2, pattern.firstDateInPtnIsLaterDate()); } else { @@ -1936,12 +2014,14 @@ public class DateIntervalFormat extends UFormat { * @param differenceInfo the difference between 2 skeletons * 1 means only field width differs * 2 means v/z exchange + * @param suppressDayPeriodField if true, remove the day period field from the result * @return the adjusted interval pattern */ private static String adjustFieldWidth(String inputSkeleton, String bestMatchSkeleton, String bestMatchIntervalPattern, - int differenceInfo ) { + int differenceInfo, + boolean suppressDayPeriodField ) { if ( bestMatchIntervalPattern == null ) { return null; // the 2nd part could be null @@ -1963,20 +2043,46 @@ public class DateIntervalFormat extends UFormat { */ + int PATTERN_CHAR_BASE = 0x41; + DateIntervalInfo.parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); DateIntervalInfo.parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); + if (suppressDayPeriodField) { + if (bestMatchIntervalPattern.indexOf(" a") != -1) { + bestMatchIntervalPattern = bestMatchIntervalPattern.replace(" a", ""); + } else if (bestMatchIntervalPattern.indexOf("a ") != -1) { + bestMatchIntervalPattern = bestMatchIntervalPattern.replace("a ", ""); + } + bestMatchIntervalPattern = bestMatchIntervalPattern.replace("a", ""); + } if ( differenceInfo == 2 ) { - bestMatchIntervalPattern = bestMatchIntervalPattern.replace('v', 'z'); + if (inputSkeleton.indexOf('z') != -1) { + bestMatchIntervalPattern = bestMatchIntervalPattern.replace('v', 'z'); + } + if (inputSkeleton.indexOf('K') != -1) { + bestMatchIntervalPattern = bestMatchIntervalPattern.replace('h', 'K'); + } + if (inputSkeleton.indexOf('k') != -1) { + bestMatchIntervalPattern = bestMatchIntervalPattern.replace('H', 'k'); + } + if (inputSkeleton.indexOf('b') != -1) { + bestMatchIntervalPattern = bestMatchIntervalPattern.replace('a', 'b'); + } + } + if (bestMatchIntervalPattern.indexOf('a') != -1 && bestMatchSkeletonFieldWidth['a' - PATTERN_CHAR_BASE] == 0) { + bestMatchSkeletonFieldWidth['a' - PATTERN_CHAR_BASE] = 1; + } + if (bestMatchIntervalPattern.indexOf('b') != -1 && bestMatchSkeletonFieldWidth['b' - PATTERN_CHAR_BASE] == 0) { + bestMatchSkeletonFieldWidth['b' - PATTERN_CHAR_BASE] = 1; } StringBuilder adjustedPtn = new StringBuilder(bestMatchIntervalPattern); + boolean inQuote = false; char prevCh = 0; int count = 0; - int PATTERN_CHAR_BASE = 0x41; - // loop through the pattern string character by character int adjustedPtnLength = adjustedPtn.length(); for (int i = 0; i < adjustedPtnLength; ++i) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java index d042bb93164..cd9e9940cca 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java @@ -532,7 +532,7 @@ public class DateIntervalInfo implements Cloneable, Freezable, // Check that the pattern letter is accepted char letter = patternLetter.charAt(0); - if (ACCEPTED_PATTERN_LETTERS.indexOf(letter) < 0) { + if (ACCEPTED_PATTERN_LETTERS.indexOf(letter) < 0 && letter != 'B') { return null; } @@ -540,6 +540,12 @@ public class DateIntervalInfo implements Cloneable, Freezable, if (letter == CALENDAR_FIELD_TO_PATTERN_LETTER[Calendar.HOUR_OF_DAY].charAt(0)) { patternLetter = CALENDAR_FIELD_TO_PATTERN_LETTER[Calendar.HOUR]; } + + // Replace 'a' for 'B' + // TODO: Using AM/PM as a proxy for flexible day period isn’t really correct, but it’s close + if (letter == 'B') { + patternLetter = CALENDAR_FIELD_TO_PATTERN_LETTER[Calendar.AM_PM]; + } return patternLetter; } @@ -1081,20 +1087,25 @@ public class DateIntervalInfo implements Cloneable, Freezable, final int STRING_NUMERIC_DIFFERENCE = 0x100; final int BASE = 0x41; - // TODO: this is a hack for 'v' and 'z' - // resource bundle only have time skeletons ending with 'v', - // but not for time skeletons ending with 'z'. - boolean replaceZWithV = false; - if ( inputSkeleton.indexOf('z') != -1 ) { + // hack for certain alternate characters + // resource bundles only have time skeletons containing 'v', 'h', and 'H' + // but not time skeletons containing 'z', 'K', or 'k' + // the skeleton may also include 'a' or 'b', which never occur in the resource bundles, so strip them out too + boolean replacedAlternateChars = false; + if ( inputSkeleton.indexOf('z') != -1 || inputSkeleton.indexOf('k') != -1 || inputSkeleton.indexOf('K') != -1 || inputSkeleton.indexOf('a') != -1 || inputSkeleton.indexOf('b') != -1 ) { inputSkeleton = inputSkeleton.replace('z', 'v'); - replaceZWithV = true; + inputSkeleton = inputSkeleton.replace('k', 'H'); + inputSkeleton = inputSkeleton.replace('K', 'h'); + inputSkeleton = inputSkeleton.replace("a", ""); + inputSkeleton = inputSkeleton.replace("b", ""); + replacedAlternateChars = true; } parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); int bestDistance = Integer.MAX_VALUE; // 0 means exact the same skeletons; // 1 means having the same field, but with different length, - // 2 means only z/v differs + // 2 means only z/v, h/K, or H/k differs // -1 means having different field. int bestFieldDifference = 0; for (String skeleton : fIntervalPatterns.keySet()) { @@ -1135,7 +1146,7 @@ public class DateIntervalInfo implements Cloneable, Freezable, break; } } - if ( replaceZWithV && bestFieldDifference != -1 ) { + if ( replacedAlternateChars && bestFieldDifference != -1 ) { bestFieldDifference = 2; } return new DateIntervalFormat.BestMatchInfo(bestSkeleton, bestFieldDifference); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java index bc706c59cc4..5d1fc810156 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java @@ -721,6 +721,94 @@ public class DateIntervalFormatTest extends TestFmwk { } + /** + * Test handling of hour and day period metacharacters + */ + @Test + public void TestHourMetacharacters() { + // first item is date pattern + // followed by a group of locale/from_data/to_data/skeleton/interval_data + // Note that from_data/to_data are specified using era names from root, for the calendar specified by locale. + String[] DATA = { + "GGGGG y MM dd HH:mm:ss", // pattern for from_data/to_data + + // This test is for tickets ICU-21154, ICU-21155, and ICU-21156 and is intended to verify + // that all of the special skeleton characters for hours and day periods work as expected + // with date intervals: + // - If a, b, or B is included in the skeleton, it correctly sets the length of the day-period field + // - If k or K is included, it behaves the same as H or h, except for the difference in the actual + // number used for the hour. + // - If j is included, it behaves the same as either h or H as appropriate, and multiple j's have the + // intended effect on the length of the day period field (if there is one) + // - If J is included, it correctly suppresses the day period field if j would include it + // - If C is included, it behaves the same as j and brings up the correct day period field + // - In all cases, if the day period of both ends of the range is the same, you only see it once + + // baseline (h and H) + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hh", "12 \\u2013 1 AM", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "HH", "00\\u201301 Uhr", + + // k and K (ICU-21154 and ICU-21156) + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "KK", "0 \\u2013 1 AM", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "kk", "24\\u201301 Uhr", + + // different lengths of the 'a' field + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "ha", "10 AM \\u2013 1 PM", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "ha", "12 \\u2013 1 AM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "haaaaa", "10 a \\u2013 12 p", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "haaaaa", "12 \\u2013 1 a", + + // j (ICU-21155) + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 AM \\u2013 1 PM", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12 \\u2013 1 AM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10 a \\u2013 1 p", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "12 \\u2013 1 a", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "00\\u201301 Uhr", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "00\\u201301 Uhr", + + // b and B + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hb", "10 AM \\u2013 12 noon", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hbbbbb", "10 a \\u2013 12 n", + "en", "CE 2010 09 27 13:00:00", "CE 2010 09 27 14:00:00", "hb", "1 \\u2013 2 PM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "10 in the morning \\u2013 1 in the afternoon", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "12 \\u2013 1 at night", + + // J + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10 \\u2013 1", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "12 \\u2013 1", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "00\\u201301 Uhr", + + // C + // (for English and German, C should do the same thing as j) + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10 AM \\u2013 1 PM", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "12 \\u2013 1 AM", + "en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10 a \\u2013 1 p", + "en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "12 \\u2013 1 a", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "00\\u201301 Uhr", + "de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10\\u201313 Uhr", + "de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "00\\u201301 Uhr", + // (for zh_HK and hi_IN, j maps to ha, but C maps to hB) + "zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "\\u4E0A\\u534810\\u6642\\u81F3\\u4E0B\\u53481\\u6642", + "zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "\\u4E0A\\u534812\\u6642\\u81F31\\u6642", + "zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642", + "zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u51CC\\u666812\\u20131\\u6642", + "zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642", + "zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u51CC\\u666812\\u20131\\u6642", + "hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 am \\u2013 1 pm", + "hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12\\u20131 am", + "hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1", + "hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u0930\\u093E\\u0924 12\\u20131", + "hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1", + "hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u0930\\u093E\\u0924 12\\u20131", + }; + expect(DATA, DATA.length); + } + + private void expect(String[] data, int data_length) { int i = 0; String pattern = data[i++];