mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 09:21:03 +00:00
ICU-22763 MF2: Handle time zones correctly in :datetime
Previously, the time zone components of date/time literals were ignored. In order to store the time zone properly for re-formatting, this change also changes the representation of `Formattable` date values to use a `GregorianCalendar` rather than a `UDate`. (This is a public API change.)
This commit is contained in:
parent
9cc28a6428
commit
f8c67ece8b
7 changed files with 215 additions and 64 deletions
|
@ -37,7 +37,6 @@ namespace message2 {
|
|||
|
||||
Formattable::Formattable(const Formattable& other) {
|
||||
contents = other.contents;
|
||||
holdsDate = other.holdsDate;
|
||||
}
|
||||
|
||||
Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) {
|
||||
|
@ -55,7 +54,7 @@ namespace message2 {
|
|||
|
||||
UFormattableType Formattable::getType() const {
|
||||
if (std::holds_alternative<double>(contents)) {
|
||||
return holdsDate ? UFMT_DATE : UFMT_DOUBLE;
|
||||
return UFMT_DOUBLE;
|
||||
}
|
||||
if (std::holds_alternative<int64_t>(contents)) {
|
||||
return UFMT_INT64;
|
||||
|
@ -76,6 +75,9 @@ namespace message2 {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (isDate()) {
|
||||
return UFMT_DATE;
|
||||
}
|
||||
if (std::holds_alternative<const FormattableObject*>(contents)) {
|
||||
return UFMT_OBJECT;
|
||||
}
|
||||
|
@ -225,12 +227,19 @@ namespace message2 {
|
|||
return df.orphan();
|
||||
}
|
||||
|
||||
void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) {
|
||||
void formatDateWithDefaults(const Locale& locale, const GregorianCalendar& cal, UnicodeString& result, UErrorCode& errorCode) {
|
||||
CHECK_ERROR(errorCode);
|
||||
|
||||
LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode));
|
||||
CHECK_ERROR(errorCode);
|
||||
df->format(date, result, 0, errorCode);
|
||||
// We have to copy the `const` reference that was passed in,
|
||||
// because DateFormat::format() takes a non-const reference.
|
||||
LocalPointer<GregorianCalendar> copied(new GregorianCalendar(cal));
|
||||
if (!copied.isValid()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
df->format(*copied, result, nullptr, errorCode);
|
||||
}
|
||||
|
||||
// Called when output is required and the contents are an unevaluated `Formattable`;
|
||||
|
@ -261,9 +270,9 @@ namespace message2 {
|
|||
switch (type) {
|
||||
case UFMT_DATE: {
|
||||
UnicodeString result;
|
||||
UDate d = toFormat.getDate(status);
|
||||
const GregorianCalendar* cal = toFormat.getDate(status);
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
formatDateWithDefaults(locale, d, result, status);
|
||||
formatDateWithDefaults(locale, *cal, result, status);
|
||||
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
|
||||
}
|
||||
case UFMT_DOUBLE: {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "unicode/messageformat2_data_model_names.h"
|
||||
#include "unicode/messageformat2_function_registry.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/simpletz.h"
|
||||
#include "unicode/smpdtfmt.h"
|
||||
#include "charstr.h"
|
||||
#include "double-conversion.h"
|
||||
|
@ -1007,6 +1008,135 @@ Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& loc
|
|||
return result;
|
||||
}
|
||||
|
||||
static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorCode& errorCode) {
|
||||
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(UnicodeString(pat), errorCode));
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
return dateParser->parse(sourceStr, errorCode);
|
||||
}
|
||||
|
||||
static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
d = tryPattern("YYYY-MM-dd", sourceStr, errorCode);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
d = tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) {
|
||||
NULL_ON_ERROR(errorCode);
|
||||
|
||||
int32_t len = offsetStr.length();
|
||||
|
||||
// `offsetStr` has a format like +03:30 or -03:30
|
||||
if (len <= 2) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int32_t sign = offsetStr.charAt(0) == '-' ? -1 : 1;
|
||||
int32_t indexOfColon = offsetStr.indexOf(':');
|
||||
if (indexOfColon <= 2) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon);
|
||||
UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len);
|
||||
double tzHour, tzMin;
|
||||
strToDouble(tzPart1, tzHour, errorCode);
|
||||
strToDouble(tzPart2, tzMin, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
int32_t offset = sign * (static_cast<int32_t>(tzHour) * 60 + static_cast<int32_t>(tzMin)) * 60 * 1000;
|
||||
LocalPointer<SimpleTimeZone> tz(new SimpleTimeZone(offset, UnicodeString("offset")));
|
||||
if (!tz.isValid()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return tz.orphan();
|
||||
}
|
||||
|
||||
static bool hasTzOffset(const UnicodeString& sourceStr) {
|
||||
int32_t len = sourceStr.length();
|
||||
|
||||
if (len <= 6) {
|
||||
return false;
|
||||
}
|
||||
return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN)
|
||||
&& sourceStr[len - 3] == COLON);
|
||||
}
|
||||
|
||||
static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sourceStr, UErrorCode& errorCode) {
|
||||
NULL_ON_ERROR(errorCode);
|
||||
|
||||
UDate absoluteDate;
|
||||
|
||||
// Try time zone part
|
||||
int32_t indexOfZ = sourceStr.indexOf('Z');
|
||||
int32_t indexOfPlus = sourceStr.lastIndexOf('+');
|
||||
int32_t indexOfMinus = sourceStr.lastIndexOf('-');
|
||||
int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus;
|
||||
bool isTzOffset = hasTzOffset(sourceStr);
|
||||
bool isGMT = indexOfZ > 0;
|
||||
UnicodeString offsetPart;
|
||||
bool hasTimeZone = isTzOffset || isGMT;
|
||||
if (!hasTimeZone) {
|
||||
// No time zone
|
||||
absoluteDate = tryPatterns(sourceStr, errorCode);
|
||||
} else {
|
||||
// Try to split into time zone and non-time-zone parts
|
||||
UnicodeString noTimeZone;
|
||||
if (isGMT) {
|
||||
noTimeZone = sourceStr.tempSubString(0, indexOfZ);
|
||||
} else {
|
||||
noTimeZone = sourceStr.tempSubString(0, indexOfSign);
|
||||
}
|
||||
tryPatterns(noTimeZone, errorCode);
|
||||
// Failure -- can't parse this string
|
||||
NULL_ON_ERROR(errorCode);
|
||||
// Success -- now handle the time zone part
|
||||
if (isGMT) {
|
||||
noTimeZone += UnicodeString("GMT");
|
||||
absoluteDate = tryTimeZonePatterns(noTimeZone, errorCode);
|
||||
} else {
|
||||
// Finally, try [+-]nn:nn
|
||||
absoluteDate = tryTimeZonePatterns(sourceStr, errorCode);
|
||||
offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length());
|
||||
}
|
||||
}
|
||||
LocalPointer<GregorianCalendar> cal(new GregorianCalendar(errorCode));
|
||||
NULL_ON_ERROR(errorCode);
|
||||
cal->setTime(absoluteDate, errorCode);
|
||||
if (hasTimeZone) {
|
||||
if (isGMT) {
|
||||
cal->setTimeZone(*TimeZone::getGMT());
|
||||
} else {
|
||||
LocalPointer<SimpleTimeZone> tz(createTimeZonePart(offsetPart, errorCode));
|
||||
NULL_ON_ERROR(errorCode);
|
||||
cal->adoptTimeZone(tz.orphan());
|
||||
}
|
||||
}
|
||||
return cal.orphan();
|
||||
}
|
||||
|
||||
FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat,
|
||||
FunctionOptions&& opts,
|
||||
UErrorCode& errorCode) const {
|
||||
|
@ -1195,39 +1325,32 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&&
|
|||
case UFMT_STRING: {
|
||||
const UnicodeString& sourceStr = source.getString(errorCode);
|
||||
U_ASSERT(U_SUCCESS(errorCode));
|
||||
// Pattern for ISO 8601 format - datetime
|
||||
UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss");
|
||||
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(pattern, errorCode));
|
||||
|
||||
LocalPointer<GregorianCalendar> cal(createCalendarFromDateString(sourceStr, errorCode));
|
||||
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_MF_FORMATTING_ERROR;
|
||||
} else {
|
||||
// Parse the date
|
||||
UDate d = dateParser->parse(sourceStr, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
// Pattern for ISO 8601 format - date
|
||||
UnicodeString pattern("YYYY-MM-dd");
|
||||
errorCode = U_ZERO_ERROR;
|
||||
dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode));
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_MF_FORMATTING_ERROR;
|
||||
} else {
|
||||
d = dateParser->parse(sourceStr, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Use the parsed date as the source value
|
||||
// in the returned FormattedPlaceholder; this is necessary
|
||||
// so the date can be re-formatted
|
||||
toFormat = FormattedPlaceholder(message2::Formattable::forDate(d),
|
||||
toFormat.getFallback());
|
||||
df->format(d, result, 0, errorCode);
|
||||
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
|
||||
return {};
|
||||
}
|
||||
|
||||
// Use the parsed date as the source value
|
||||
// in the returned FormattedPlaceholder; this is necessary
|
||||
// so the date can be re-formatted
|
||||
df->format(*cal, result, 0, errorCode);
|
||||
toFormat = FormattedPlaceholder(message2::Formattable(std::move(*cal)),
|
||||
toFormat.getFallback());
|
||||
break;
|
||||
}
|
||||
case UFMT_DATE: {
|
||||
df->format(source.asICUFormattable(errorCode), result, 0, errorCode);
|
||||
const GregorianCalendar* cal = source.getDate(errorCode);
|
||||
U_ASSERT(U_SUCCESS(errorCode));
|
||||
U_ASSERT(cal != nullptr);
|
||||
LocalPointer<GregorianCalendar> copied(new GregorianCalendar(*cal));
|
||||
if (!copied.isValid()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return {};
|
||||
}
|
||||
df->format(*copied, result, 0, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
|
||||
|
|
|
@ -15,8 +15,10 @@
|
|||
#if !UCONFIG_NO_MF2
|
||||
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/gregocal.h"
|
||||
#include "unicode/numberformatter.h"
|
||||
#include "unicode/messageformat2_data_model_names.h"
|
||||
#include "unicode/smpdtfmt.h"
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
|
@ -84,6 +86,7 @@ template class U_I18N_API std::_Variant_storage_<false,
|
|||
int64_t,
|
||||
icu::UnicodeString,
|
||||
icu::Formattable,
|
||||
icu::GregorianCalendar,
|
||||
const icu::message2::FormattableObject *,
|
||||
std::pair<const icu::message2::Formattable *,int32_t>>;
|
||||
#endif
|
||||
|
@ -92,6 +95,7 @@ template class U_I18N_API std::variant<double,
|
|||
int64_t,
|
||||
icu::UnicodeString,
|
||||
icu::Formattable,
|
||||
icu::GregorianCalendar,
|
||||
const icu::message2::FormattableObject*,
|
||||
P>;
|
||||
#endif
|
||||
|
@ -228,22 +232,24 @@ namespace message2 {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets the Date value of this object. If this object is not of type
|
||||
* kDate then the result is undefined and the error code is set.
|
||||
* Gets the calendar representing the date value of this object.
|
||||
* If this object is not of type kDate then the result is
|
||||
* undefined and the error code is set.
|
||||
*
|
||||
* @param status Input/output error code.
|
||||
* @return the Date value of this object.
|
||||
* @return A non-owned pointer to a GregorianCalendar object
|
||||
* representing the underlying date of this object.
|
||||
* @internal ICU 75 technology preview
|
||||
* @deprecated This API is for technology preview only.
|
||||
*/
|
||||
UDate getDate(UErrorCode& status) const {
|
||||
const GregorianCalendar* getDate(UErrorCode& status) const {
|
||||
if (U_SUCCESS(status)) {
|
||||
if (isDate()) {
|
||||
return *std::get_if<double>(&contents);
|
||||
return std::get_if<GregorianCalendar>(&contents);
|
||||
}
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -301,7 +307,6 @@ namespace message2 {
|
|||
using std::swap;
|
||||
|
||||
swap(f1.contents, f2.contents);
|
||||
swap(f1.holdsDate, f2.holdsDate);
|
||||
}
|
||||
/**
|
||||
* Copy constructor.
|
||||
|
@ -353,18 +358,15 @@ namespace message2 {
|
|||
*/
|
||||
Formattable(int64_t i) : contents(i) {}
|
||||
/**
|
||||
* Date factory method.
|
||||
* Date constructor.
|
||||
*
|
||||
* @param d A UDate value to wrap as a Formattable.
|
||||
* @param c A GregorianCalendar value representing a date,
|
||||
* to wrap as a Formattable.
|
||||
* Passed by move
|
||||
* @internal ICU 75 technology preview
|
||||
* @deprecated This API is for technology preview only.
|
||||
*/
|
||||
static Formattable forDate(UDate d) {
|
||||
Formattable f;
|
||||
f.contents = d;
|
||||
f.holdsDate = true;
|
||||
return f;
|
||||
}
|
||||
Formattable(GregorianCalendar&& c) : contents(std::move(c)) {}
|
||||
/**
|
||||
* Creates a Formattable object of an appropriate numeric type from a
|
||||
* a decimal number in string form. The Formattable will retain the
|
||||
|
@ -424,16 +426,16 @@ namespace message2 {
|
|||
int64_t,
|
||||
UnicodeString,
|
||||
icu::Formattable, // represents a Decimal
|
||||
GregorianCalendar,
|
||||
const FormattableObject*,
|
||||
std::pair<const Formattable*, int32_t>> contents;
|
||||
bool holdsDate = false; // otherwise, we get type errors about UDate being a duplicate type
|
||||
UnicodeString bogusString; // :((((
|
||||
|
||||
UBool isDecimal() const {
|
||||
return std::holds_alternative<icu::Formattable>(contents);
|
||||
}
|
||||
UBool isDate() const {
|
||||
return std::holds_alternative<double>(contents) && holdsDate;
|
||||
return std::holds_alternative<GregorianCalendar>(contents);
|
||||
}
|
||||
}; // class Formattable
|
||||
|
||||
|
|
|
@ -158,13 +158,12 @@ void TestMessageFormat2::testAPISimple() {
|
|||
.setLocale(locale)
|
||||
.build(errorCode);
|
||||
|
||||
Calendar* cal(Calendar::createInstance(errorCode));
|
||||
GregorianCalendar cal(errorCode);
|
||||
// Sunday, October 28, 2136 8:39:12 AM PST
|
||||
cal->set(2136, Calendar::OCTOBER, 28, 8, 39, 12);
|
||||
UDate date = cal->getTime(errorCode);
|
||||
cal.set(2136, Calendar::OCTOBER, 28, 8, 39, 12);
|
||||
|
||||
argsBuilder.clear();
|
||||
argsBuilder["today"] = message2::Formattable::forDate(date);
|
||||
argsBuilder["today"] = message2::Formattable(std::move(cal));
|
||||
args = MessageArguments(argsBuilder, errorCode);
|
||||
result = mf.formatToString(args, errorCode);
|
||||
assertEquals("testAPI", "Today is Sunday, October 28, 2136.", result);
|
||||
|
@ -187,8 +186,6 @@ void TestMessageFormat2::testAPISimple() {
|
|||
.build(errorCode);
|
||||
result = mf.formatToString(args, errorCode);
|
||||
assertEquals("testAPI", "Maria added 12 photos to her album.", result);
|
||||
|
||||
delete cal;
|
||||
}
|
||||
|
||||
// Design doc example, with more details
|
||||
|
@ -213,7 +210,7 @@ void TestMessageFormat2::testAPI() {
|
|||
|
||||
test = testBuilder.setName("testAPI")
|
||||
.setPattern("Today is {$today :date style=full}.")
|
||||
.setDateArgument("today", date)
|
||||
.setDateArgument("today", date, errorCode)
|
||||
.setExpected("Today is Sunday, October 28, 2136.")
|
||||
.setLocale("en_US")
|
||||
.build();
|
||||
|
|
|
@ -112,8 +112,16 @@ class TestCase : public UMemory {
|
|||
arguments[k] = Formattable(val);
|
||||
return *this;
|
||||
}
|
||||
Builder& setDateArgument(const UnicodeString& k, UDate date) {
|
||||
arguments[k] = Formattable::forDate(date);
|
||||
Builder& setDateArgument(const UnicodeString& k, UDate date, UErrorCode& errorCode) {
|
||||
// This ignores time zones; the data-driven tests represent date/time values
|
||||
// as a datestamp, so this code suffices to handle those.
|
||||
// Date/time literal strings would be handled using `setArgument()` with a string
|
||||
// argument.
|
||||
GregorianCalendar cal(errorCode);
|
||||
THIS_ON_ERROR(errorCode);
|
||||
cal.setTime(date, errorCode);
|
||||
THIS_ON_ERROR(errorCode);
|
||||
arguments[k] = Formattable(std::move(cal));
|
||||
return *this;
|
||||
}
|
||||
Builder& setDecimalArgument(const UnicodeString& k, std::string_view decimal, UErrorCode& errorCode) {
|
||||
|
|
|
@ -108,13 +108,11 @@
|
|||
},
|
||||
{
|
||||
"src": "Expires at {|2024-07-02T19:23:45Z| :datetime timeStyle=long}",
|
||||
"exp": "Expires at 7:23:45 PM GMT",
|
||||
"ignoreTest": "ICU-22754 Time zones not working yet (bug)"
|
||||
"exp": "Expires at 7:23:45 PM GMT"
|
||||
},
|
||||
{
|
||||
"src": "Expires at {|2024-07-02T19:23:45+03:30| :datetime timeStyle=full}",
|
||||
"exp": "Expires at 7:23:45 PM GMT+03:30",
|
||||
"ignoreTest": "ICU-22754 Time zones not working yet (bug)"
|
||||
"exp": "Expires at 7:23:45 PM GMT+03:30"
|
||||
}
|
||||
],
|
||||
"Chaining" : [
|
||||
|
|
|
@ -113,5 +113,19 @@
|
|||
"locale": "ro",
|
||||
"params": [{ "name": "val", "value": {"decimal": "1234567890123456789.987654321"} }]
|
||||
}
|
||||
],
|
||||
"datetime_extra": [
|
||||
{
|
||||
"srcs": [".local $d = {|2024-07-02T19:23:45+03:30| :datetime timeStyle=long}\n",
|
||||
"{{Expires at {$d} or {$d :datetime timeStyle=full}}}"],
|
||||
"exp": "Expires at 7:23:45\u202FPM GMT+3:30 or 7:23:45\u202FPM GMT+03:30",
|
||||
"comment": "This checks that the return value of :datetime preserves the time zone if specified"
|
||||
},
|
||||
{
|
||||
"src": "{$d :datetime timeStyle=full}",
|
||||
"exp": "7:23:45\u202FPM GMT+03:30",
|
||||
"params": {"d": "2024-07-02T19:23:45+03:30"},
|
||||
"comment": "This checks that an argument can be a date literal string."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue