ICU-22763 MF2: Handle time zones correctly in :datetime

Previously, the time zone components of date/time literals were ignored.
In order to store the time zone properly for re-formatting,
this change also changes the representation of `Formattable` date
values to use a `GregorianCalendar` rather than a `UDate`.
(This is a public API change.)
This commit is contained in:
Tim Chevalier 2024-04-29 14:15:31 -07:00
parent 9cc28a6428
commit f8c67ece8b
7 changed files with 215 additions and 64 deletions

View file

@ -37,7 +37,6 @@ namespace message2 {
Formattable::Formattable(const Formattable& other) {
contents = other.contents;
holdsDate = other.holdsDate;
}
Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) {
@ -55,7 +54,7 @@ namespace message2 {
UFormattableType Formattable::getType() const {
if (std::holds_alternative<double>(contents)) {
return holdsDate ? UFMT_DATE : UFMT_DOUBLE;
return UFMT_DOUBLE;
}
if (std::holds_alternative<int64_t>(contents)) {
return UFMT_INT64;
@ -76,6 +75,9 @@ namespace message2 {
}
}
}
if (isDate()) {
return UFMT_DATE;
}
if (std::holds_alternative<const FormattableObject*>(contents)) {
return UFMT_OBJECT;
}
@ -225,12 +227,19 @@ namespace message2 {
return df.orphan();
}
void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) {
void formatDateWithDefaults(const Locale& locale, const GregorianCalendar& cal, UnicodeString& result, UErrorCode& errorCode) {
CHECK_ERROR(errorCode);
LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode));
CHECK_ERROR(errorCode);
df->format(date, result, 0, errorCode);
// We have to copy the `const` reference that was passed in,
// because DateFormat::format() takes a non-const reference.
LocalPointer<GregorianCalendar> copied(new GregorianCalendar(cal));
if (!copied.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
df->format(*copied, result, nullptr, errorCode);
}
// Called when output is required and the contents are an unevaluated `Formattable`;
@ -261,9 +270,9 @@ namespace message2 {
switch (type) {
case UFMT_DATE: {
UnicodeString result;
UDate d = toFormat.getDate(status);
const GregorianCalendar* cal = toFormat.getDate(status);
U_ASSERT(U_SUCCESS(status));
formatDateWithDefaults(locale, d, result, status);
formatDateWithDefaults(locale, *cal, result, status);
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
}
case UFMT_DOUBLE: {

View file

@ -17,6 +17,7 @@
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/normalizer2.h"
#include "unicode/simpletz.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "double-conversion.h"
@ -1007,6 +1008,135 @@ Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& loc
return result;
}
static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorCode& errorCode) {
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(UnicodeString(pat), errorCode));
if (U_FAILURE(errorCode)) {
return 0;
}
return dateParser->parse(sourceStr, errorCode);
}
static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
d = tryPattern("YYYY-MM-dd", sourceStr, errorCode);
}
return d;
}
static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
d = tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode);
}
return d;
}
SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);
int32_t len = offsetStr.length();
// `offsetStr` has a format like +03:30 or -03:30
if (len <= 2) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
int32_t sign = offsetStr.charAt(0) == '-' ? -1 : 1;
int32_t indexOfColon = offsetStr.indexOf(':');
if (indexOfColon <= 2) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon);
UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len);
double tzHour, tzMin;
strToDouble(tzPart1, tzHour, errorCode);
strToDouble(tzPart2, tzMin, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
int32_t offset = sign * (static_cast<int32_t>(tzHour) * 60 + static_cast<int32_t>(tzMin)) * 60 * 1000;
LocalPointer<SimpleTimeZone> tz(new SimpleTimeZone(offset, UnicodeString("offset")));
if (!tz.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return tz.orphan();
}
static bool hasTzOffset(const UnicodeString& sourceStr) {
int32_t len = sourceStr.length();
if (len <= 6) {
return false;
}
return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN)
&& sourceStr[len - 3] == COLON);
}
static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sourceStr, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);
UDate absoluteDate;
// Try time zone part
int32_t indexOfZ = sourceStr.indexOf('Z');
int32_t indexOfPlus = sourceStr.lastIndexOf('+');
int32_t indexOfMinus = sourceStr.lastIndexOf('-');
int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus;
bool isTzOffset = hasTzOffset(sourceStr);
bool isGMT = indexOfZ > 0;
UnicodeString offsetPart;
bool hasTimeZone = isTzOffset || isGMT;
if (!hasTimeZone) {
// No time zone
absoluteDate = tryPatterns(sourceStr, errorCode);
} else {
// Try to split into time zone and non-time-zone parts
UnicodeString noTimeZone;
if (isGMT) {
noTimeZone = sourceStr.tempSubString(0, indexOfZ);
} else {
noTimeZone = sourceStr.tempSubString(0, indexOfSign);
}
tryPatterns(noTimeZone, errorCode);
// Failure -- can't parse this string
NULL_ON_ERROR(errorCode);
// Success -- now handle the time zone part
if (isGMT) {
noTimeZone += UnicodeString("GMT");
absoluteDate = tryTimeZonePatterns(noTimeZone, errorCode);
} else {
// Finally, try [+-]nn:nn
absoluteDate = tryTimeZonePatterns(sourceStr, errorCode);
offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length());
}
}
LocalPointer<GregorianCalendar> cal(new GregorianCalendar(errorCode));
NULL_ON_ERROR(errorCode);
cal->setTime(absoluteDate, errorCode);
if (hasTimeZone) {
if (isGMT) {
cal->setTimeZone(*TimeZone::getGMT());
} else {
LocalPointer<SimpleTimeZone> tz(createTimeZonePart(offsetPart, errorCode));
NULL_ON_ERROR(errorCode);
cal->adoptTimeZone(tz.orphan());
}
}
return cal.orphan();
}
FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat,
FunctionOptions&& opts,
UErrorCode& errorCode) const {
@ -1195,39 +1325,32 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&&
case UFMT_STRING: {
const UnicodeString& sourceStr = source.getString(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
// Pattern for ISO 8601 format - datetime
UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss");
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(pattern, errorCode));
LocalPointer<GregorianCalendar> cal(createCalendarFromDateString(sourceStr, errorCode));
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
// Parse the date
UDate d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
// Pattern for ISO 8601 format - date
UnicodeString pattern("YYYY-MM-dd");
errorCode = U_ZERO_ERROR;
dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode));
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
}
}
}
// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
toFormat = FormattedPlaceholder(message2::Formattable::forDate(d),
toFormat.getFallback());
df->format(d, result, 0, errorCode);
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
return {};
}
// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
df->format(*cal, result, 0, errorCode);
toFormat = FormattedPlaceholder(message2::Formattable(std::move(*cal)),
toFormat.getFallback());
break;
}
case UFMT_DATE: {
df->format(source.asICUFormattable(errorCode), result, 0, errorCode);
const GregorianCalendar* cal = source.getDate(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
U_ASSERT(cal != nullptr);
LocalPointer<GregorianCalendar> copied(new GregorianCalendar(*cal));
if (!copied.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return {};
}
df->format(*copied, result, 0, errorCode);
if (U_FAILURE(errorCode)) {
if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;

View file

@ -15,8 +15,10 @@
#if !UCONFIG_NO_MF2
#include "unicode/chariter.h"
#include "unicode/gregocal.h"
#include "unicode/numberformatter.h"
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/smpdtfmt.h"
#ifndef U_HIDE_DEPRECATED_API
@ -84,6 +86,7 @@ template class U_I18N_API std::_Variant_storage_<false,
int64_t,
icu::UnicodeString,
icu::Formattable,
icu::GregorianCalendar,
const icu::message2::FormattableObject *,
std::pair<const icu::message2::Formattable *,int32_t>>;
#endif
@ -92,6 +95,7 @@ template class U_I18N_API std::variant<double,
int64_t,
icu::UnicodeString,
icu::Formattable,
icu::GregorianCalendar,
const icu::message2::FormattableObject*,
P>;
#endif
@ -228,22 +232,24 @@ namespace message2 {
}
/**
* Gets the Date value of this object. If this object is not of type
* kDate then the result is undefined and the error code is set.
* Gets the calendar representing the date value of this object.
* If this object is not of type kDate then the result is
* undefined and the error code is set.
*
* @param status Input/output error code.
* @return the Date value of this object.
* @return A non-owned pointer to a GregorianCalendar object
* representing the underlying date of this object.
* @internal ICU 75 technology preview
* @deprecated This API is for technology preview only.
*/
UDate getDate(UErrorCode& status) const {
const GregorianCalendar* getDate(UErrorCode& status) const {
if (U_SUCCESS(status)) {
if (isDate()) {
return *std::get_if<double>(&contents);
return std::get_if<GregorianCalendar>(&contents);
}
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return 0;
return nullptr;
}
/**
@ -301,7 +307,6 @@ namespace message2 {
using std::swap;
swap(f1.contents, f2.contents);
swap(f1.holdsDate, f2.holdsDate);
}
/**
* Copy constructor.
@ -353,18 +358,15 @@ namespace message2 {
*/
Formattable(int64_t i) : contents(i) {}
/**
* Date factory method.
* Date constructor.
*
* @param d A UDate value to wrap as a Formattable.
* @param c A GregorianCalendar value representing a date,
* to wrap as a Formattable.
* Passed by move
* @internal ICU 75 technology preview
* @deprecated This API is for technology preview only.
*/
static Formattable forDate(UDate d) {
Formattable f;
f.contents = d;
f.holdsDate = true;
return f;
}
Formattable(GregorianCalendar&& c) : contents(std::move(c)) {}
/**
* Creates a Formattable object of an appropriate numeric type from a
* a decimal number in string form. The Formattable will retain the
@ -424,16 +426,16 @@ namespace message2 {
int64_t,
UnicodeString,
icu::Formattable, // represents a Decimal
GregorianCalendar,
const FormattableObject*,
std::pair<const Formattable*, int32_t>> contents;
bool holdsDate = false; // otherwise, we get type errors about UDate being a duplicate type
UnicodeString bogusString; // :((((
UBool isDecimal() const {
return std::holds_alternative<icu::Formattable>(contents);
}
UBool isDate() const {
return std::holds_alternative<double>(contents) && holdsDate;
return std::holds_alternative<GregorianCalendar>(contents);
}
}; // class Formattable

View file

@ -158,13 +158,12 @@ void TestMessageFormat2::testAPISimple() {
.setLocale(locale)
.build(errorCode);
Calendar* cal(Calendar::createInstance(errorCode));
GregorianCalendar cal(errorCode);
// Sunday, October 28, 2136 8:39:12 AM PST
cal->set(2136, Calendar::OCTOBER, 28, 8, 39, 12);
UDate date = cal->getTime(errorCode);
cal.set(2136, Calendar::OCTOBER, 28, 8, 39, 12);
argsBuilder.clear();
argsBuilder["today"] = message2::Formattable::forDate(date);
argsBuilder["today"] = message2::Formattable(std::move(cal));
args = MessageArguments(argsBuilder, errorCode);
result = mf.formatToString(args, errorCode);
assertEquals("testAPI", "Today is Sunday, October 28, 2136.", result);
@ -187,8 +186,6 @@ void TestMessageFormat2::testAPISimple() {
.build(errorCode);
result = mf.formatToString(args, errorCode);
assertEquals("testAPI", "Maria added 12 photos to her album.", result);
delete cal;
}
// Design doc example, with more details
@ -213,7 +210,7 @@ void TestMessageFormat2::testAPI() {
test = testBuilder.setName("testAPI")
.setPattern("Today is {$today :date style=full}.")
.setDateArgument("today", date)
.setDateArgument("today", date, errorCode)
.setExpected("Today is Sunday, October 28, 2136.")
.setLocale("en_US")
.build();

View file

@ -112,8 +112,16 @@ class TestCase : public UMemory {
arguments[k] = Formattable(val);
return *this;
}
Builder& setDateArgument(const UnicodeString& k, UDate date) {
arguments[k] = Formattable::forDate(date);
Builder& setDateArgument(const UnicodeString& k, UDate date, UErrorCode& errorCode) {
// This ignores time zones; the data-driven tests represent date/time values
// as a datestamp, so this code suffices to handle those.
// Date/time literal strings would be handled using `setArgument()` with a string
// argument.
GregorianCalendar cal(errorCode);
THIS_ON_ERROR(errorCode);
cal.setTime(date, errorCode);
THIS_ON_ERROR(errorCode);
arguments[k] = Formattable(std::move(cal));
return *this;
}
Builder& setDecimalArgument(const UnicodeString& k, std::string_view decimal, UErrorCode& errorCode) {

View file

@ -108,13 +108,11 @@
},
{
"src": "Expires at {|2024-07-02T19:23:45Z| :datetime timeStyle=long}",
"exp": "Expires at 7:23:45PM GMT",
"ignoreTest": "ICU-22754 Time zones not working yet (bug)"
"exp": "Expires at 7:23:45PM GMT"
},
{
"src": "Expires at {|2024-07-02T19:23:45+03:30| :datetime timeStyle=full}",
"exp": "Expires at 7:23:45PM GMT+03:30",
"ignoreTest": "ICU-22754 Time zones not working yet (bug)"
"exp": "Expires at 7:23:45PM GMT+03:30"
}
],
"Chaining" : [

View file

@ -113,5 +113,19 @@
"locale": "ro",
"params": [{ "name": "val", "value": {"decimal": "1234567890123456789.987654321"} }]
}
],
"datetime_extra": [
{
"srcs": [".local $d = {|2024-07-02T19:23:45+03:30| :datetime timeStyle=long}\n",
"{{Expires at {$d} or {$d :datetime timeStyle=full}}}"],
"exp": "Expires at 7:23:45\u202FPM GMT+3:30 or 7:23:45\u202FPM GMT+03:30",
"comment": "This checks that the return value of :datetime preserves the time zone if specified"
},
{
"src": "{$d :datetime timeStyle=full}",
"exp": "7:23:45\u202FPM GMT+03:30",
"params": {"d": "2024-07-02T19:23:45+03:30"},
"comment": "This checks that an argument can be a date literal string."
}
]
}