From d0e30acc684eefd61fb42a510f8fef84221a5126 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 28 Feb 2025 11:49:00 -0800 Subject: [PATCH] ICU-23059 ICU4C MF2: Spec test updates Update spec tests to current version from message-format-wg - Update parser for changed name-start grammar rule - Validate number literals in :number implementation (since parser no longer does this) - Disallow `:number`/`:integer` select option set from variable See https://github.com/unicode-org/message-format-wg/pull/1016 As part of this, un-skip tests where the `bad-option` error is expected, and implement validating digit size options (pending PR https://github.com/unicode-org/icu/pull/2973 is intended to do this more fully) --- icu4c/source/i18n/messageformat2.cpp | 3 +- icu4c/source/i18n/messageformat2_errors.cpp | 22 ++ icu4c/source/i18n/messageformat2_errors.h | 15 ++ .../source/i18n/messageformat2_evaluation.cpp | 95 ++++++++- icu4c/source/i18n/messageformat2_evaluation.h | 13 ++ .../source/i18n/messageformat2_formatter.cpp | 12 -- .../i18n/messageformat2_function_registry.cpp | 201 ++++++++++++------ ...essageformat2_function_registry_internal.h | 50 +++++ icu4c/source/i18n/messageformat2_parser.cpp | 166 +++++---------- icu4c/source/i18n/messageformat2_parser.h | 1 + .../i18n/unicode/messageformat2_formattable.h | 10 +- .../intltest/messageformat2test_read_json.cpp | 19 +- icu4c/source/tools/toolutil/json-json.hpp | 12 +- .../invalid-number-literals-diagnostics.json | 25 --- .../message2/spec/functions/datetime.json | 2 +- testdata/message2/spec/functions/integer.json | 36 +++- testdata/message2/spec/functions/number.json | 141 +++++++++++- testdata/message2/spec/functions/string.json | 4 +- testdata/message2/spec/syntax-errors.json | 6 +- testdata/message2/spec/syntax.json | 63 ++++-- .../message2/syntax-errors-diagnostics.json | 9 - .../message2/unsupported-expressions.json | 3 - 22 files changed, 645 insertions(+), 263 deletions(-) delete mode 100644 testdata/message2/invalid-number-literals-diagnostics.json diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index a5ef68a2472..b7202ccb8ad 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -169,7 +169,8 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O FormattedPlaceholder optValue = rhsVal->forceFormatting(context.getErrors(), status); resolvedOpt.adoptInstead(create (ResolvedFunctionOption(k, - optValue.asFormattable()), + optValue.asFormattable(), + v.isLiteral()), status)); if (U_FAILURE(status)) { return {}; diff --git a/icu4c/source/i18n/messageformat2_errors.cpp b/icu4c/source/i18n/messageformat2_errors.cpp index 5d3d938f020..c4a96544fe6 100644 --- a/icu4c/source/i18n/messageformat2_errors.cpp +++ b/icu4c/source/i18n/messageformat2_errors.cpp @@ -29,6 +29,14 @@ namespace message2 { addError(DynamicError(DynamicErrorType::FormattingError, UnicodeString("unknown formatter")), status); } + void DynamicErrors::setBadOption(const FunctionName& formatterName, UErrorCode& status) { + addError(DynamicError(DynamicErrorType::BadOptionError, formatterName), status); + } + + void DynamicErrors::setRecoverableBadOption(const FunctionName& formatterName, UErrorCode& status) { + addError(DynamicError(DynamicErrorType::RecoverableBadOptionError, formatterName), status); + } + void DynamicErrors::setOperandMismatchError(const FunctionName& formatterName, UErrorCode& status) { addError(DynamicError(DynamicErrorType::OperandMismatchError, formatterName), status); } @@ -137,6 +145,11 @@ namespace message2 { status = U_MF_FORMATTING_ERROR; break; } + case DynamicErrorType::BadOptionError: + case DynamicErrorType::RecoverableBadOptionError: { + status = U_MF_BAD_OPTION; + break; + } case DynamicErrorType::OperandMismatchError: { status = U_MF_OPERAND_MISMATCH_ERROR; break; @@ -228,6 +241,15 @@ namespace message2 { resolutionAndFormattingErrors->adoptElement(errorP, status); break; } + case DynamicErrorType::BadOptionError: { + badOptionError = true; + resolutionAndFormattingErrors->adoptElement(errorP, status); + break; + } + case DynamicErrorType::RecoverableBadOptionError: { + resolutionAndFormattingErrors->adoptElement(errorP, status); + break; + } } } diff --git a/icu4c/source/i18n/messageformat2_errors.h b/icu4c/source/i18n/messageformat2_errors.h index 085263e88b0..42cea5b2b12 100644 --- a/icu4c/source/i18n/messageformat2_errors.h +++ b/icu4c/source/i18n/messageformat2_errors.h @@ -66,6 +66,17 @@ namespace message2 { enum DynamicErrorType { UnresolvedVariable, FormattingError, + BadOptionError, + /** + This is used to signal errors from :number and :integer when a + bad `select` option is passed. In this case, fallback output + is not used, so it must be distinguished from a regular bad + option error (but it maps to a bad option error in the final + error code). + See https://github.com/unicode-org/message-format-wg/blob/main/spec/functions/number.md#number-selection + "The formatting of the _resolved value_ is not affected by the `select` option.") + */ + RecoverableBadOptionError, OperandMismatchError, SelectorError, UnknownFunction, @@ -114,6 +125,7 @@ namespace message2 { const StaticErrors& staticErrors; LocalPointer resolutionAndFormattingErrors; bool formattingError = false; + bool badOptionError = false; bool selectorError = false; bool unknownFunctionError = false; bool unresolvedVariableError = false; @@ -128,9 +140,12 @@ namespace message2 { void setFormattingError(const FunctionName&, UErrorCode&); // Used when the name of the offending formatter is unknown void setFormattingError(UErrorCode&); + void setBadOption(const FunctionName&, UErrorCode&); + void setRecoverableBadOption(const FunctionName&, UErrorCode&); void setOperandMismatchError(const FunctionName&, UErrorCode&); bool hasDataModelError() const { return staticErrors.hasDataModelError(); } bool hasFormattingError() const { return formattingError; } + bool hasBadOptionError() const { return badOptionError; } bool hasSelectorError() const { return selectorError; } bool hasSyntaxError() const { return staticErrors.hasSyntaxError(); } bool hasUnknownFunctionError() const { return unknownFunctionError; } diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 44796248dd3..1a88ece4724 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -11,6 +11,7 @@ #include "messageformat2_allocation.h" #include "messageformat2_evaluation.h" +#include "messageformat2_function_registry_internal.h" #include "messageformat2_macros.h" #include "uvector.h" // U_ASSERT @@ -28,6 +29,7 @@ using namespace data_model; ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) { name = std::move(other.name); value = std::move(other.value); + sourceIsLiteral = other.sourceIsLiteral; } ResolvedFunctionOption::~ResolvedFunctionOption() {} @@ -46,6 +48,20 @@ FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) { options = moveVectorToArray(optionsVector, status); } +// Returns false if option doesn't exist +UBool FunctionOptions::wasSetFromLiteral(const UnicodeString& key) const { + if (options == nullptr) { + U_ASSERT(functionOptionsLen == 0); + } + for (int32_t i = 0; i < functionOptionsLen; i++) { + const ResolvedFunctionOption& opt = options[i]; + if (opt.getName() == key) { + return opt.isLiteral(); + } + } + return false; +} + UBool FunctionOptions::getFunctionOption(std::u16string_view key, Formattable& option) const { if (options == nullptr) { U_ASSERT(functionOptionsLen == 0); @@ -303,12 +319,25 @@ PrioritizedVariant::~PrioritizedVariant() {} FunctionOptions opts; InternalValue* p = this; FunctionName selectorName = name; + + bool operandSelect = false; while (std::holds_alternative(p->argument)) { if (p->name != selectorName) { // Can only compose calls to the same selector errorCode = U_ILLEGAL_ARGUMENT_ERROR; return; } + // Very special case to detect something like: + // .local $sel = {1 :integer select=exact} .local $bad = {$sel :integer} .match $bad 1 {{ONE}} * {{operand select {$bad}}} + // This can be done better once function composition is fully implemented. + if (p != this && + !p->options.getStringFunctionOption(options::SELECT).isEmpty() + && (selectorName == functions::NUMBER || selectorName == functions::INTEGER)) { + // In this case, we want to call the selector normally but emit a + // `bad-option` error, possibly with the outcome of normal-looking output (with relaxed + // error handling) and an error (with strict error handling). + operandSelect = true; + } // First argument to mergeOptions takes precedence opts = opts.mergeOptions(std::move(p->options), errorCode); if (U_FAILURE(errorCode)) { @@ -319,15 +348,50 @@ PrioritizedVariant::~PrioritizedVariant() {} } FormattedPlaceholder arg = std::move(*std::get_if(&p->argument)); + // This condition can't be checked in the selector. + // Effectively, there are two different kinds of "bad option" errors: + // one that can be recovered from (used for select=$var) and one that + // can't (used for bad digit size options and other cases). + // The checking of the recoverable error has to be done here; otherwise, + // the "bad option" signaled by the selector implementation would cause + // fallback output to be used when formatting the `*` pattern. + bool badSelectOption = !checkSelectOption(); + selector->selectKey(std::move(arg), std::move(opts), keys, keysLen, prefs, prefsLen, errorCode); - if (U_FAILURE(errorCode)) { + if (errorCode == U_MF_SELECTOR_ERROR) { errorCode = U_ZERO_ERROR; errs.setSelectorError(selectorName, errorCode); + } else if (errorCode == U_MF_BAD_OPTION) { + errorCode = U_ZERO_ERROR; + errs.setBadOption(selectorName, errorCode); + } else if (operandSelect || badSelectOption) { + errs.setRecoverableBadOption(selectorName, errorCode); + // In this case, only the `*` variant should match + prefsLen = 0; } } + bool InternalValue::checkSelectOption() const { + if (name != UnicodeString("number") && name != UnicodeString("integer")) { + return true; + } + + // Per the spec, if the "select" option is present, it must have been + // set from a literal + + Formattable opt; + // Returns false if the `select` option is present and it was not set from a literal + + // OK if the option wasn't present + if (!options.getFunctionOption(UnicodeString("select"), opt)) { + return true; + } + // Otherwise, return true if the option was set from a literal + return options.wasSetFromLiteral(UnicodeString("select")); + } + FormattedPlaceholder InternalValue::forceFormatting(DynamicErrors& errs, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; @@ -368,24 +432,45 @@ PrioritizedVariant::~PrioritizedVariant() {} fallback = arg.getFallback(); } + // Very special case for :number select=foo and :integer select=foo + // This check can't be done inside the function implementation because + // it doesn't have a way to both signal an error and return usable output, + // and the spec stipulates that fallback output shouldn't be used in the + // case of a bad `select` option to a formatting call. + bool badSelect = !checkSelectOption(); + // Call the function with the argument FormattedPlaceholder result = formatter->format(std::move(arg), std::move(options), errorCode); + if (U_SUCCESS(errorCode) && errorCode == U_USING_DEFAULT_WARNING) { + // Ignore this warning + errorCode = U_ZERO_ERROR; + } if (U_FAILURE(errorCode)) { if (errorCode == U_MF_OPERAND_MISMATCH_ERROR) { errorCode = U_ZERO_ERROR; errs.setOperandMismatchError(name, errorCode); + } else if (errorCode == U_MF_BAD_OPTION) { + errorCode = U_ZERO_ERROR; + errs.setBadOption(name, errorCode); } else { errorCode = U_ZERO_ERROR; - // Convey any error generated by the formatter - // as a formatting error, except for operand mismatch errors + // Convey any other error generated by the formatter + // as a formatting error errs.setFormattingError(name, errorCode); } } // Ignore the output if any error occurred - if (errs.hasFormattingError()) { + // We don't ignore the output in the case of a Bad Option Error, + // because of the select=bad case where we want both an error + // and non-fallback output. + if (errs.hasFormattingError() || errs.hasBadOptionError()) { return FormattedPlaceholder(fallback); } - + if (badSelect) { + // In this case, we want to set an error but not replace + // the output with a fallback + errs.setRecoverableBadOption(name, errorCode); + } return result; } diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 9ef2d798b05..f73b444229f 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -31,6 +31,18 @@ U_NAMESPACE_BEGIN namespace message2 { + namespace functions { + static constexpr std::u16string_view DATETIME = u"datetime"; + static constexpr std::u16string_view DATE = u"date"; + static constexpr std::u16string_view TIME = u"time"; + static constexpr std::u16string_view NUMBER = u"number"; + static constexpr std::u16string_view INTEGER = u"integer"; + static constexpr std::u16string_view TEST_FUNCTION = u"test:function"; + static constexpr std::u16string_view TEST_FORMAT = u"test:format"; + static constexpr std::u16string_view TEST_SELECT = u"test:select"; + static constexpr std::u16string_view STRING = u"string"; + } + using namespace data_model; // PrioritizedVariant @@ -201,6 +213,7 @@ namespace message2 { FunctionName name; const Selector* selector; // May be null const Formatter* formatter; // May be null, but one or the other should be non-null unless argument is a FormattedPlaceholder + bool checkSelectOption() const; }; // class InternalValue } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index a1c5196a42b..6c555ecbc0e 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -24,18 +24,6 @@ U_NAMESPACE_BEGIN namespace message2 { - namespace functions { - static constexpr std::u16string_view DATETIME = u"datetime"; - static constexpr std::u16string_view DATE = u"date"; - static constexpr std::u16string_view TIME = u"time"; - static constexpr std::u16string_view NUMBER = u"number"; - static constexpr std::u16string_view INTEGER = u"integer"; - static constexpr std::u16string_view TEST_FUNCTION = u"test:function"; - static constexpr std::u16string_view TEST_FORMAT = u"test:format"; - static constexpr std::u16string_view TEST_SELECT = u"test:select"; - static constexpr std::u16string_view STRING = u"string"; - } - // MessageFormatter::Builder // ------------------------------------- diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 668f0c4251d..d0e6bf62235 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -41,54 +41,6 @@ U_NAMESPACE_BEGIN namespace message2 { -// Constants for option names -namespace options { -static constexpr std::u16string_view ALWAYS = u"always"; -static constexpr std::u16string_view COMPACT = u"compact"; -static constexpr std::u16string_view COMPACT_DISPLAY = u"compactDisplay"; -static constexpr std::u16string_view DATE_STYLE = u"dateStyle"; -static constexpr std::u16string_view DAY = u"day"; -static constexpr std::u16string_view DECIMAL_PLACES = u"decimalPlaces"; -static constexpr std::u16string_view DEFAULT_UPPER = u"DEFAULT"; -static constexpr std::u16string_view ENGINEERING = u"engineering"; -static constexpr std::u16string_view EXACT = u"exact"; -static constexpr std::u16string_view EXCEPT_ZERO = u"exceptZero"; -static constexpr std::u16string_view FAILS = u"fails"; -static constexpr std::u16string_view FULL_UPPER = u"FULL"; -static constexpr std::u16string_view HOUR = u"hour"; -static constexpr std::u16string_view LONG = u"long"; -static constexpr std::u16string_view LONG_UPPER = u"LONG"; -static constexpr std::u16string_view MAXIMUM_FRACTION_DIGITS = u"maximumFractionDigits"; -static constexpr std::u16string_view MAXIMUM_SIGNIFICANT_DIGITS = u"maximumSignificantDigits"; -static constexpr std::u16string_view MEDIUM_UPPER = u"MEDIUM"; -static constexpr std::u16string_view MIN2 = u"min2"; -static constexpr std::u16string_view MINIMUM_FRACTION_DIGITS = u"minimumFractionDigits"; -static constexpr std::u16string_view MINIMUM_INTEGER_DIGITS = u"minimumIntegerDigits"; -static constexpr std::u16string_view MINIMUM_SIGNIFICANT_DIGITS = u"minimumSignificantDigits"; -static constexpr std::u16string_view MINUTE = u"minute"; -static constexpr std::u16string_view MONTH = u"month"; -static constexpr std::u16string_view NARROW = u"narrow"; -static constexpr std::u16string_view NEGATIVE = u"negative"; -static constexpr std::u16string_view NEVER = u"never"; -static constexpr std::u16string_view NOTATION = u"notation"; -static constexpr std::u16string_view NUMBERING_SYSTEM = u"numberingSystem"; -static constexpr std::u16string_view NUMERIC = u"numeric"; -static constexpr std::u16string_view ORDINAL = u"ordinal"; -static constexpr std::u16string_view PERCENT_STRING = u"percent"; -static constexpr std::u16string_view SCIENTIFIC = u"scientific"; -static constexpr std::u16string_view SECOND = u"second"; -static constexpr std::u16string_view SELECT = u"select"; -static constexpr std::u16string_view SHORT = u"short"; -static constexpr std::u16string_view SHORT_UPPER = u"SHORT"; -static constexpr std::u16string_view SIGN_DISPLAY = u"signDisplay"; -static constexpr std::u16string_view STYLE = u"style"; -static constexpr std::u16string_view TIME_STYLE = u"timeStyle"; -static constexpr std::u16string_view TWO_DIGIT = u"2-digit"; -static constexpr std::u16string_view USE_GROUPING = u"useGrouping"; -static constexpr std::u16string_view WEEKDAY = u"weekday"; -static constexpr std::u16string_view YEAR = u"year"; -} // namespace options - // Function registry implementation Formatter::~Formatter() {} @@ -334,6 +286,133 @@ MFFunctionRegistry::~MFFunctionRegistry() { // --------- Number +bool inBounds(const UnicodeString& s, int32_t i) { + return i < s.length(); +} + +bool isDigit(UChar32 c) { + return c >= '0' && c <= '9'; +} + +bool parseDigits(const UnicodeString& s, int32_t& i) { + if (!isDigit(s[i])) { + return false; + } + while (inBounds(s, i) && isDigit(s[i])) { + i++; + } + return true; +} + +// number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT] +bool validateNumberLiteral(const UnicodeString& s) { + int32_t i = 0; + + if (s.isEmpty()) { + return false; + } + + // Parse optional sign + // ["-"] + if (s[0] == HYPHEN) { + i++; + } + + if (!inBounds(s, i)) { + return false; + } + + // Parse integer digits + // (%x30 / (%x31-39 *DIGIT)) + if (s[i] == '0') { + if (!inBounds(s, i + 1) || s[i + 1] != PERIOD) { + return false; + } + i++; + } else { + if (!parseDigits(s, i)) { + return false; + } + } + // The rest is optional + if (!inBounds(s, i)) { + return true; + } + + // Parse optional decimal digits + // ["." 1*DIGIT] + if (s[i] == PERIOD) { + i++; + if (!parseDigits(s, i)) { + return false; + } + } + + if (!inBounds(s, i)) { + return true; + } + + // Parse optional exponent + // [%i"e" ["-" / "+"] 1*DIGIT] + if (s[i] == 'e' || s[i] == 'E') { + i++; + if (!inBounds(s, i)) { + return false; + } + // Parse optional sign + if (s[i] == HYPHEN || s[i] == PLUS) { + i++; + } + if (!inBounds(s, i)) { + return false; + } + if (!parseDigits(s, i)) { + return false; + } + } + if (i != s.length()) { + return false; + } + return true; +} + +bool isInteger(const Formattable& s) { + switch (s.getType()) { + case UFMT_DOUBLE: + case UFMT_LONG: + case UFMT_INT64: + return true; + case UFMT_STRING: { + UErrorCode ignore = U_ZERO_ERROR; + const UnicodeString& str = s.getString(ignore); + return validateNumberLiteral(str); + } + default: + return false; + } +} + +bool isDigitSizeOption(const UnicodeString& s) { + return s == UnicodeString("minimumIntegerDigits") + || s == UnicodeString("minimumFractionDigits") + || s == UnicodeString("maximumFractionDigits") + || s == UnicodeString("minimumSignificantDigits") + || s == UnicodeString("maximumSignificantDigits"); +} + +/* static */ void StandardFunctions::validateDigitSizeOptions(const FunctionOptions& opts, + UErrorCode& status) { + CHECK_ERROR(status); + + for (int32_t i = 0; i < opts.optionsCount(); i++) { + const ResolvedFunctionOption& opt = opts.options[i]; + if (isDigitSizeOption(opt.getName()) && !isInteger(opt.getValue())) { + status = U_MF_BAD_OPTION; + return; + } + } +} + /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number, const FunctionOptions& opts, UErrorCode& status) { @@ -341,6 +420,11 @@ MFFunctionRegistry::~MFFunctionRegistry() { using namespace number; + validateDigitSizeOptions(opts, status); + if (U_FAILURE(status)) { + return {}; + } + if (U_SUCCESS(status)) { Formattable opt; nf = NumberFormatter::with(); @@ -517,22 +601,19 @@ static double parseNumberLiteral(const Formattable& input, UErrorCode& errorCode return {}; } - // Hack: Check for cases that are forbidden by the MF2 grammar - // but allowed by StringToDouble - int32_t len = inputStr.length(); - - if (len > 0 && ((inputStr[0] == '+') - || (inputStr[0] == '0' && len > 1 && inputStr[1] != '.') - || (inputStr[len - 1] == '.') - || (inputStr[0] == '.'))) { + // Validate string according to `number-literal` production + // in the spec for `:number`. This is because some cases are + // forbidden by this grammar, but allowed by StringToDouble. + if (!validateNumberLiteral(inputStr)) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; return 0; } - // Otherwise, convert to double using double_conversion::StringToDoubleConverter + // Convert to double using double_conversion::StringToDoubleConverter using namespace double_conversion; int processedCharactersCount = 0; StringToDoubleConverter converter(0, 0, 0, "", ""); + int32_t len = inputStr.length(); double result = converter.StringToDouble(reinterpret_cast(inputStr.getBuffer()), len, @@ -625,7 +706,7 @@ int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& o return static_cast(val); } } - return 0; + return 1; } int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOptions& opts) const { @@ -741,9 +822,10 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar // Need to return the integer value if invoked as :integer if (isInteger) { return FormattedPlaceholder(FormattedPlaceholder(Formattable(integerValue), arg.getFallback()), + std::move(opts), FormattedValue(std::move(numberResult))); } - return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult))); + return FormattedPlaceholder(arg, std::move(opts), FormattedValue(std::move(numberResult))); } StandardFunctions::Number::~Number() {} @@ -751,7 +833,6 @@ StandardFunctions::NumberFactory::~NumberFactory() {} // --------- PluralFactory - StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const { Formattable opt; diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 7e474415270..aef41ce383c 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -23,6 +23,54 @@ U_NAMESPACE_BEGIN namespace message2 { +// Constants for option names +namespace options { +static constexpr std::u16string_view ALWAYS = u"always"; +static constexpr std::u16string_view COMPACT = u"compact"; +static constexpr std::u16string_view COMPACT_DISPLAY = u"compactDisplay"; +static constexpr std::u16string_view DATE_STYLE = u"dateStyle"; +static constexpr std::u16string_view DAY = u"day"; +static constexpr std::u16string_view DECIMAL_PLACES = u"decimalPlaces"; +static constexpr std::u16string_view DEFAULT_UPPER = u"DEFAULT"; +static constexpr std::u16string_view ENGINEERING = u"engineering"; +static constexpr std::u16string_view EXACT = u"exact"; +static constexpr std::u16string_view EXCEPT_ZERO = u"exceptZero"; +static constexpr std::u16string_view FAILS = u"fails"; +static constexpr std::u16string_view FULL_UPPER = u"FULL"; +static constexpr std::u16string_view HOUR = u"hour"; +static constexpr std::u16string_view LONG = u"long"; +static constexpr std::u16string_view LONG_UPPER = u"LONG"; +static constexpr std::u16string_view MAXIMUM_FRACTION_DIGITS = u"maximumFractionDigits"; +static constexpr std::u16string_view MAXIMUM_SIGNIFICANT_DIGITS = u"maximumSignificantDigits"; +static constexpr std::u16string_view MEDIUM_UPPER = u"MEDIUM"; +static constexpr std::u16string_view MIN2 = u"min2"; +static constexpr std::u16string_view MINIMUM_FRACTION_DIGITS = u"minimumFractionDigits"; +static constexpr std::u16string_view MINIMUM_INTEGER_DIGITS = u"minimumIntegerDigits"; +static constexpr std::u16string_view MINIMUM_SIGNIFICANT_DIGITS = u"minimumSignificantDigits"; +static constexpr std::u16string_view MINUTE = u"minute"; +static constexpr std::u16string_view MONTH = u"month"; +static constexpr std::u16string_view NARROW = u"narrow"; +static constexpr std::u16string_view NEGATIVE = u"negative"; +static constexpr std::u16string_view NEVER = u"never"; +static constexpr std::u16string_view NOTATION = u"notation"; +static constexpr std::u16string_view NUMBERING_SYSTEM = u"numberingSystem"; +static constexpr std::u16string_view NUMERIC = u"numeric"; +static constexpr std::u16string_view ORDINAL = u"ordinal"; +static constexpr std::u16string_view PERCENT_STRING = u"percent"; +static constexpr std::u16string_view SCIENTIFIC = u"scientific"; +static constexpr std::u16string_view SECOND = u"second"; +static constexpr std::u16string_view SELECT = u"select"; +static constexpr std::u16string_view SHORT = u"short"; +static constexpr std::u16string_view SHORT_UPPER = u"SHORT"; +static constexpr std::u16string_view SIGN_DISPLAY = u"signDisplay"; +static constexpr std::u16string_view STYLE = u"style"; +static constexpr std::u16string_view TIME_STYLE = u"timeStyle"; +static constexpr std::u16string_view TWO_DIGIT = u"2-digit"; +static constexpr std::u16string_view USE_GROUPING = u"useGrouping"; +static constexpr std::u16string_view WEEKDAY = u"weekday"; +static constexpr std::u16string_view YEAR = u"year"; +} // namespace options + // Built-in functions /* The standard functions are :datetime, :date, :time, @@ -38,6 +86,8 @@ namespace message2 { static UnicodeString normalizeNFC(const UnicodeString&); private: + static void validateDigitSizeOptions(const FunctionOptions&, UErrorCode&); + static void checkSelectOption(const FunctionOptions&, UErrorCode&); static UnicodeString getStringOption(const FunctionOptions& opts, std::u16string_view optionName, UErrorCode& errorCode); diff --git a/icu4c/source/i18n/messageformat2_parser.cpp b/icu4c/source/i18n/messageformat2_parser.cpp index 9a9f8e78df0..879c7024fc9 100644 --- a/icu4c/source/i18n/messageformat2_parser.cpp +++ b/icu4c/source/i18n/messageformat2_parser.cpp @@ -188,25 +188,42 @@ UnicodeSet* initNameStartChars(UErrorCode& status) { if (U_FAILURE(status)) { return nullptr; } - UnicodeSet* result = new UnicodeSet(*isAlpha); + UnicodeSet* result = new UnicodeSet(); if (result == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return nullptr; }; - result->add(UNDERSCORE); - result->add(0x00C0, 0x00D6); - result->add(0x00D8, 0x00F6); - result->add(0x00F8, 0x02FF); - result->add(0x0370, 0x037D); - result->add(0x037F, 0x061B); - result->add(0x061D, 0x1FFF); - result->add(0x200C, 0x200D); - result->add(0x2070, 0x218F); - result->add(0x2C00, 0x2FEF); + + result->addAll(*isAlpha); + result->add(0x002B); + result->add(0x005F); + result->add(0x00A1, 0x061B); + result->add(0x061D, 0x167F); + result->add(0x1681, 0x1FFF); + result->add(0x200B, 0x200D); + result->add(0x2010, 0x2027); + result->add(0x2030, 0x205E); + result->add(0x2060, 0x2065); + result->add(0x206A, 0x2FFF); result->add(0x3001, 0xD7FF); - result->add(0xF900, 0xFDCF); + result->add(0xE000, 0xFDCF); result->add(0xFDF0, 0xFFFD); - result->add(0x100000, 0xEFFFF); + result->add(0x10000, 0x1FFFD); + result->add(0x20000, 0x2FFFD); + result->add(0x30000, 0x3FFFD); + result->add(0x40000, 0x4FFFD); + result->add(0x50000, 0x5FFFD); + result->add(0x60000, 0x6FFFD); + result->add(0x70000, 0x7FFFD); + result->add(0x80000, 0x8FFFD); + result->add(0x90000, 0x9FFFD); + result->add(0xA0000, 0xAFFFD); + result->add(0xB0000, 0xBFFFD); + result->add(0xC0000, 0xCFFFD); + result->add(0xD0000, 0xDFFFD); + result->add(0xE0000, 0xEFFFD); + result->add(0xF0000, 0xFFFFD); + result->add(0x100000, 0x10FFFD); result->freeze(); return result; } @@ -230,9 +247,6 @@ UnicodeSet* initNameChars(UErrorCode& status) { result->addAll(*digit); result->add(HYPHEN); result->add(PERIOD); - result->add(0x00B7); - result->add(0x0300, 0x036F); - result->add(0x203F, 0x2040); result->freeze(); return result; } @@ -742,6 +756,29 @@ void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) { CHECK_BOUNDS(errorCode); } +/* + Consumes a possibly-empty sequence of name-chars. Appends to `str` + and returns `str`. +*/ +UnicodeString Parser::parseNameChars(UnicodeString& str, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return {}; + } + + while (isNameChar(peek())) { + UChar32 c = peek(); + str += c; + normalizedInput += c; + next(); + if (!inBounds()) { + ERROR(errorCode); + break; + } + } + + return str; +} + /* Consumes a non-empty sequence of `name-char`s, the first of which is also a `name-start`. @@ -767,16 +804,7 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) { parseOptionalBidi(); // name-start *name-char - while (isNameChar(peek())) { - UChar32 c = peek(); - name += c; - normalizedInput += c; - next(); - if (!inBounds()) { - ERROR(errorCode); - break; - } - } + parseNameChars(name, errorCode); // [bidi] parseOptionalBidi(); @@ -999,91 +1027,15 @@ Literal Parser::parseUnquotedLiteral(UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; } + // unquoted-literal = 1*name-char - // unquoted -> name - if (isNameStart(peek())) { - return Literal(false, parseName(errorCode)); + if (!(isNameChar(peek()))) { + ERROR(errorCode); + return {}; } - // unquoted -> number - // Parse the contents UnicodeString contents; - - // Parse the sign - if (peek() == HYPHEN) { - contents += peek(); - normalizedInput += peek(); - next(); - } - if (!inBounds()) { - ERROR(errorCode); - return {}; - } - - // Parse the integer part - if (peek() == ((UChar32)0x0030) /* 0 */) { - contents += peek(); - normalizedInput += peek(); - next(); - } else if (isDigit(peek())) { - contents += parseDigits(errorCode); - } else { - // Error -- nothing else can start a number literal - ERROR(errorCode); - return {}; - } - - // Parse the decimal point if present - if (peek() == PERIOD) { - contents += peek(); - normalizedInput += peek(); - next(); - if (!inBounds()) { - ERROR(errorCode); - return {}; - } - // Parse the fraction part - if (isDigit(peek())) { - contents += parseDigits(errorCode); - } else { - // '.' not followed by digit is a parse error - ERROR(errorCode); - return {}; - } - } - - if (!inBounds()) { - ERROR(errorCode); - return {}; - } - - // Parse the exponent part if present - if (peek() == UPPERCASE_E || peek() == LOWERCASE_E) { - contents += peek(); - normalizedInput += peek(); - next(); - if (!inBounds()) { - ERROR(errorCode); - return {}; - } - // Parse sign if present - if (peek() == PLUS || peek() == HYPHEN) { - contents += peek(); - normalizedInput += peek(); - next(); - if (!inBounds()) { - ERROR(errorCode); - return {}; - } - } - // Parse exponent digits - if (!isDigit(peek())) { - ERROR(errorCode); - return {}; - } - contents += parseDigits(errorCode); - } - + parseNameChars(contents, errorCode); return Literal(false, contents); } diff --git a/icu4c/source/i18n/messageformat2_parser.h b/icu4c/source/i18n/messageformat2_parser.h index 62a52d8f680..e3af5349fd4 100644 --- a/icu4c/source/i18n/messageformat2_parser.h +++ b/icu4c/source/i18n/messageformat2_parser.h @@ -164,6 +164,7 @@ namespace message2 { void parseToken(const std::u16string_view&, UErrorCode&); void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&); bool nextIs(const std::u16string_view&) const; + UnicodeString parseNameChars(UnicodeString&, UErrorCode&); UnicodeString parseName(UErrorCode&); UnicodeString parseIdentifier(UErrorCode&); UnicodeString parseDigits(UErrorCode&); diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index cd72bbd4a62..90648bf1d3a 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -456,16 +456,23 @@ class U_I18N_API ResolvedFunctionOption : public UObject { /* const */ UnicodeString name; /* const */ Formattable value; + // True iff this option was represented in the syntax by a literal value. + // This is necessary in order to implement the spec for the `select` option + // of `:number` and `:integer`. + /* const */ bool sourceIsLiteral; public: const UnicodeString& getName() const { return name; } const Formattable& getValue() const { return value; } - ResolvedFunctionOption(const UnicodeString& n, const Formattable& f) : name(n), value(f) {} + bool isLiteral() const { return sourceIsLiteral; } + ResolvedFunctionOption(const UnicodeString& n, const Formattable& f, bool s) + : name(n), value(f), sourceIsLiteral(s) {} ResolvedFunctionOption() {} ResolvedFunctionOption(ResolvedFunctionOption&&); ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) noexcept { name = std::move(other.name); value = std::move(other.value); + sourceIsLiteral = other.sourceIsLiteral; return *this; } virtual ~ResolvedFunctionOption(); @@ -559,6 +566,7 @@ class U_I18N_API FunctionOptions : public UObject { const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; UBool getFunctionOption(std::u16string_view, Formattable&) const; + UBool wasSetFromLiteral(const UnicodeString&) const; // Returns empty string if option doesn't exist UnicodeString getStringFunctionOption(std::u16string_view) const; int32_t optionsCount() const { return functionOptionsLen; } diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index c4bdc4cc2f5..8384c1ade28 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -40,7 +40,7 @@ static UErrorCode getExpectedRuntimeErrorFromString(const std::string& errorName return U_MF_OPERAND_MISMATCH_ERROR; } if (errorName == "bad-option") { - return U_MF_FORMATTING_ERROR; + return U_MF_BAD_OPTION; } if (errorName == "unknown-function") { return U_MF_UNKNOWN_FUNCTION_ERROR; @@ -198,10 +198,10 @@ static void runValidTest(TestMessageFormat2& icuTest, if (errorType.length() <= 0) { errorType = errors[0]["name"]; } - // See TODO(options); ignore these tests for now - if (errorType == "bad-option") { - return; - } +// // See TODO(options); ignore these tests for now +// if (errorType == "bad-option") { +// return; +// } test.setExpectedError(getExpectedRuntimeErrorFromString(errorType)); expectedError = true; } else if (defaultError.length() > 0) { @@ -279,8 +279,8 @@ static void runTestsFromJsonFile(TestMessageFormat2& t, for (auto iter = tests.begin(); iter != tests.end(); ++iter) { makeTestName(testName, sizeof(testName), fileName, ++testNum); t.logln(testName); - - t.logln(u_str(iter->dump())); + // Use error_handler_t::ignore because of the patch to allow lone surrogates + t.logln(u_str(iter->dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore))); runValidTest(t, testName, defaultError, anyError, *iter, errorCode); } @@ -357,15 +357,10 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { // (This applies to the expected output for all the U_DUPLICATE_DECLARATION_ERROR tests) runTestsFromJsonFile(*this, "duplicate-declarations.json", errorCode); - // TODO(options): - // Bad options. The spec is unclear about this - // -- see https://github.com/unicode-org/message-format-wg/issues/738 - // The current behavior is to set a U_MF_FORMATTING_ERROR for any invalid options. runTestsFromJsonFile(*this, "invalid-options.json", errorCode); runTestsFromJsonFile(*this, "syntax-errors-end-of-input.json", errorCode); runTestsFromJsonFile(*this, "syntax-errors-diagnostics.json", errorCode); - runTestsFromJsonFile(*this, "invalid-number-literals-diagnostics.json", errorCode); runTestsFromJsonFile(*this, "syntax-errors-diagnostics-multiline.json", errorCode); // ICU4J tests diff --git a/icu4c/source/tools/toolutil/json-json.hpp b/icu4c/source/tools/toolutil/json-json.hpp index bc16fcef6d4..e502b671ec4 100644 --- a/icu4c/source/tools/toolutil/json-json.hpp +++ b/icu4c/source/tools/toolutil/json-json.hpp @@ -7757,22 +7757,22 @@ class lexer : public lexer_base } else { - error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; + // ICU PATCH - See ICU-23090 + codepoint = codepoint1; } } else { - error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; + // ICU PATCH - See ICU-23090 + codepoint = codepoint1; } } else { if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF)) { - error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; - return token_type::parse_error; + // ICU PATCH - See ICU-23090 + codepoint = codepoint1; } } diff --git a/testdata/message2/invalid-number-literals-diagnostics.json b/testdata/message2/invalid-number-literals-diagnostics.json deleted file mode 100644 index d35c16b2338..00000000000 --- a/testdata/message2/invalid-number-literals-diagnostics.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "scenario": "Number literal syntax errors", - "description": "Syntax errors with number literals; for ICU4C, the character offset in the parse error is checked", - "defaultTestProperties": { - "locale": "en-US", - "expErrors": [ - { - "type": "syntax-error" - } - ] - }, - "tests": [ - { "src": "{00}", "char": 2}, - { "src": "{042}", "char": 2}, - { "src": "{1.}", "char": 3}, - { "src": "{1e}", "char": 3}, - { "src": "{1E}", "char": 3}, - { "src": "{1.e}", "char": 3}, - { "src": "{1.2e}", "char": 5}, - { "src": "{1.e3}", "char": 3}, - { "src": "{1e+}", "char": 4}, - { "src": "{1e-}", "char": 4}, - { "src": "{1.0e2.0}", "char": 6} - ] -} diff --git a/testdata/message2/spec/functions/datetime.json b/testdata/message2/spec/functions/datetime.json index a5f3bd00bc9..d8e8b6dad9d 100644 --- a/testdata/message2/spec/functions/datetime.json +++ b/testdata/message2/spec/functions/datetime.json @@ -45,7 +45,7 @@ "src": "{|2006-01-02T15:04:06| :datetime}" }, { - "src": "{|2006-01-02T15:04:06| :datetime year=numeric month=|2-digit|}" + "src": "{|2006-01-02T15:04:06| :datetime year=numeric month=2-digit}" }, { "src": "{|2006-01-02T15:04:06| :datetime dateStyle=long}" diff --git a/testdata/message2/spec/functions/integer.json b/testdata/message2/spec/functions/integer.json index f249cb27e50..f2d344c951f 100644 --- a/testdata/message2/spec/functions/integer.json +++ b/testdata/message2/spec/functions/integer.json @@ -16,7 +16,11 @@ "exp": "hello -4" }, { - "src": "hello {0.42e+1 :integer}", + "src": "hello {0.42 :integer}", + "exp": "hello 0" + }, + { + "src": "hello {|0.42e+1| :integer}", "exp": "hello 4" }, { @@ -32,6 +36,36 @@ { "src": ".local $x = {1.25 :integer} .local $y = {$x :number} {{{$y}}}", "exp": "1" + }, + { + "src": "literal select {1 :integer select=exact}", + "exp": "literal select 1" + }, + { + "src": ".local $bad = {exact} {{variable select {1 :integer select=$bad}}}", + "exp": "variable select 1", + "expErrors": [{ "type": "bad-option" }] + }, + { + "src": "variable select {1 :integer select=$bad}", + "params": [{ "name": "bad", "value": "exact" }], + "exp": "variable select 1", + "expErrors": [{ "type": "bad-option" }] + }, + { + "src": ".local $sel = {1 :integer select=exact} .match $sel 1 {{literal select {$sel}}} * {{OTHER}}", + "exp": "literal select 1" + }, + { + "src": ".local $sel = {1 :integer select=exact} .local $bad = {$sel :integer} .match $bad 1 {{ONE}} * {{operand select {$bad}}}", + "exp": "operand select 1", + "expErrors": [{ "type": "bad-option" }, { "type": "bad-selector" }] + }, + { + "src": ".local $sel = {1 :integer select=$bad} .match $sel 1 {{ONE}} * {{variable select {$sel}}}", + "params": [{ "name": "bad", "value": "exact" }], + "exp": "variable select 1", + "expErrors": [{ "type": "bad-option" }, { "type": "bad-selector" }] } ] } diff --git a/testdata/message2/spec/functions/number.json b/testdata/message2/spec/functions/number.json index dee7b9c0137..89f859164ec 100644 --- a/testdata/message2/spec/functions/number.json +++ b/testdata/message2/spec/functions/number.json @@ -16,9 +16,112 @@ "exp": "hello -4.2" }, { - "src": "hello {0.42e+1 :number}", + "src": "hello {0.42 :number}", + "exp": "hello 0.42" + }, + { + "src": "hello {|0.42e+1| :number}", "exp": "hello 4.2" }, + { + "src": "hello {00 :number}", + "exp": "hello {|00|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {042 :number}", + "exp": "hello {|042|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1. :number}", + "exp": "hello {|1.|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1e :number}", + "exp": "hello {|1e|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1E :number}", + "exp": "hello {|1E|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1.e :number}", + "exp": "hello {|1.e|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1.2e :number}", + "exp": "hello {|1.2e|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1.e3 :number}", + "exp": "hello {|1.e3|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1e+ :number}", + "exp": "hello {|1e+|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1e- :number}", + "exp": "hello {|1e-|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, + { + "src": "hello {1.0e2.0 :number}", + "exp": "hello {|1.0e2.0|}", + "expErrors": [ + { + "type": "bad-operand" + } + ] + }, { "src": "hello {foo :number}", "exp": "hello {|foo|}", @@ -181,6 +284,42 @@ } ] }, + { + "description": "formatting with select=literal has no effect", + "src": "literal select {1 :number select=exact}", + "exp": "literal select 1" + }, + { + "description": "select=$var with local literal value causes error but no fallback", + "src": ".local $bad = {exact} {{variable select {1 :number select=$bad}}}", + "exp": "variable select 1", + "expErrors": [{ "type": "bad-option" }] + }, + { + "description": "select=$var with external string value is not allowed", + "src": "variable select {1 :number select=$bad}", + "params": [{ "name": "bad", "value": "exact" }], + "exp": "variable select 1", + "expErrors": [{ "type": "bad-option" }] + }, + { + "description": "select=literal works", + "src": ".local $sel = {1 :number select=exact} .match $sel 1 {{literal select {$sel}}} * {{OTHER}}", + "exp": "literal select 1" + }, + { + "description": "having select=literal as a selector operand is not allowed", + "src": ".local $sel = {1 :number select=exact} .local $bad = {$sel :number} .match $bad 1 {{ONE}} * {{operand select {$bad}}}", + "exp": "operand select 1", + "expErrors": [{ "type": "bad-option" }, { "type": "bad-selector" }] + }, + { + "description": "with select=$var, * is always selected but its formatting is unaffected", + "src": ".local $sel = {1 :number select=$bad} .match $sel 1 {{ONE}} * {{variable select {$sel}}}", + "params": [{ "name": "bad", "value": "exact" }], + "exp": "variable select 1", + "expErrors": [{ "type": "bad-option" }, { "type": "bad-selector" }] + }, { "src": "{42 :number @foo @bar=13}", "exp": "42", diff --git a/testdata/message2/spec/functions/string.json b/testdata/message2/spec/functions/string.json index 82f17380e58..06d0255ce53 100644 --- a/testdata/message2/spec/functions/string.json +++ b/testdata/message2/spec/functions/string.json @@ -22,7 +22,7 @@ "params": [ { "name": "foo", - "value": 1 + "value": "1" } ], "exp": "one" @@ -32,7 +32,7 @@ "params": [ { "name": "foo", - "value": null + "value": "2" } ], "exp": "other" diff --git a/testdata/message2/spec/syntax-errors.json b/testdata/message2/spec/syntax-errors.json index 00d0420f46f..b2e5ffc6d42 100644 --- a/testdata/message2/spec/syntax-errors.json +++ b/testdata/message2/spec/syntax-errors.json @@ -185,7 +185,6 @@ { "src": "{! .}" }, { "src": "{%}" }, { "src": "{*}" }, - { "src": "{+}" }, { "src": "{<}" }, { "src": "{>}" }, { "src": "{?}" }, @@ -193,10 +192,12 @@ { "src": "{^.}" }, { "src": "{^ .}" }, { "src": "{&}" }, + { "src": "{\ud800}" }, + { "src": "{\ufdd0}" }, + { "src": "{\ufffe}" }, { "src": "{!.\\{}" }, { "src": "{!. \\{}" }, { "src": "{!|a|}" }, - { "src": "foo {+reserved}" }, { "src": "foo {&private}" }, { "src": "foo {?reserved @a @b=c}" }, { "src": ".foo {42} {{bar}}" }, @@ -207,7 +208,6 @@ { "src": ".l $x.y = {|bar|} {{}}" }, { "src": "hello {|4.2| %number}" }, { "src": "hello {|4.2| %n|um|ber}" }, - { "src": "{+42}" }, { "src": "hello {|4.2| &num|be|r}" }, { "src": "hello {|4.2| ^num|be|r}" }, { "src": "hello {|4.2| +num|be|r}" }, diff --git a/testdata/message2/spec/syntax.json b/testdata/message2/spec/syntax.json index d03024bc81a..b334c8f734d 100644 --- a/testdata/message2/spec/syntax.json +++ b/testdata/message2/spec/syntax.json @@ -421,75 +421,110 @@ ] }, { - "description": "... literal -> quoted-literal -> \"|\" \"|\" ...", + "description": "... quoted-literal", "src": "{||}", "exp": "" }, { - "description": "... quoted-literal -> \"|\" quoted-char \"|\"", + "description": "... quoted-literal", "src": "{|a|}", "exp": "a" }, { - "description": "... quoted-literal -> \"|\" escaped-char \"|\"", + "description": "... quoted-literal", "src": "{|\\\\|}", "exp": "\\" }, { - "description": "... quoted-literal -> \"|\" quoted-char 1*escaped-char \"|\"", + "description": "... quoted-literal", "src": "{|a\\\\\\{\\|\\}|}", "exp": "a\\{|}" }, { - "description": "... unquoted-literal -> number-literal -> %x30", + "description": "... unquoted-literal", "src": "{0}", "exp": "0" }, { - "description": "... unquoted-literal -> number-literal -> \"-\" %x30", + "description": "... unquoted-literal", "src": "{-0}", "exp": "-0" }, { - "description": "... unquoted-literal -> number-literal -> (%x31-39 *DIGIT) -> %x31", + "description": "... unquoted-literal", "src": "{1}", "exp": "1" }, { - "description": "... unquoted-literal -> number-literal -> (%x31-39 *DIGIT) -> %x31 DIGIT -> 11", + "description": "... unquoted-literal", "src": "{11}", "exp": "11" }, { - "description": "... unquoted-literal -> number-literal -> %x30 \".\" 1*DIGIT -> 0 \".\" 1", + "description": "... unquoted-literal", "src": "{0.1}", "exp": "0.1" }, { - "description": "... unquoted-literal -> number-literal -> %x30 \".\" 1*DIGIT -> %x30 \".\" DIGIT DIGIT -> 0 \".\" 1 2", + "description": "... unquoted-literal", "src": "{0.12}", "exp": "0.12" }, { - "description": "... unquoted-literal -> number-literal -> %x30 %i\"e\" 1*DIGIT -> %x30 \"e\" DIGIT", + "description": "... unquoted-literal", "src": "{0e1}", "exp": "0e1" }, { - "description": "... unquoted-literal -> number-literal -> %x30 %i\"e\" 1*DIGIT -> %x30 \"E\" DIGIT", + "description": "... unquoted-literal", "src": "{0E1}", "exp": "0E1" }, { - "description": "... unquoted-literal -> number-literal -> %x30 %i\"e\" \"-\" 1*DIGIT ...", + "description": "... unquoted-literal", "src": "{0E-1}", "exp": "0E-1" }, { - "description": "... unquoted-literal -> number-literal -> %x30 %i\"e\" \"+\" 1*DIGIT ...", + "description": "... unquoted-literal", "src": "{0E-1}", "exp": "0E-1" }, + { + "description": "+ as unquoted-literal", + "src": "{+}", + "exp": "+" + }, + { + "description": "- as unquoted-literal", + "src": "{-}", + "exp": "-" + }, + { + "description": ". as unquoted-literal", + "src": "{ยท}", + "exp": "ยท" + }, + { + "description": "emoji as unquoted-literal", + "src": "{๐Ÿฅ”}", + "exp": "๐Ÿฅ”" + }, + { + "description": "emoji above U+FFFF as unquoted-literal, ", + "src": "{๐Ÿ€„๏ธ}", + "exp": "๐Ÿ€„๏ธ" + }, + { + "description": "multi-code-point emoji as unquoted-literal", + "src": "{๐Ÿณ๏ธโ€๐ŸŒˆ}", + "exp": "๐Ÿณ๏ธโ€๐ŸŒˆ" + }, + { + "description": "various characters as unquoted-literal", + "src": "{\u00a1\u061d\u1681\u200b\u2010\u2030\u2060\u206a\u3001\ue000\ufdf0}", + "exp": "\u00a1\u061d\u1681\u200b\u2010\u2030\u2060\u206a\u3001\ue000\ufdf0" + }, { "src": "hello { world\t\n}", "exp": "hello world" diff --git a/testdata/message2/syntax-errors-diagnostics.json b/testdata/message2/syntax-errors-diagnostics.json index 79a29a027f9..9e6e3bc28f7 100644 --- a/testdata/message2/syntax-errors-diagnostics.json +++ b/testdata/message2/syntax-errors-diagnostics.json @@ -236,11 +236,6 @@ "char": 2, "comment": "Variable names can't start with a : or -" }, - { - "src": "{$bar+foo}", - "char": 5, - "comment": "Missing space before annotation. Note that {{$bar:foo}} and {{$bar-foo}} are valid, because variable names can contain a ':' or a '-'" - }, { "src": "{|3.14|:foo}", "char": 7, @@ -338,10 +333,6 @@ "src": ".match {1} {{_}}", "char": 12, "comment": "Disambiguating a wrong .match from an unsupported statement" - }, - { - "src": "{{{/p o4.๔…ฒ = 1}}}", - "char": 9 } ] } diff --git a/testdata/message2/unsupported-expressions.json b/testdata/message2/unsupported-expressions.json index 7e9a64943bd..6b5f90bb430 100644 --- a/testdata/message2/unsupported-expressions.json +++ b/testdata/message2/unsupported-expressions.json @@ -12,7 +12,6 @@ "tests": [ { "src": "hello {|4.2| %number}" }, { "src": "hello {|4.2| %n|um|ber}" }, - { "src": "{+42}" }, { "src": "hello {|4.2| &num|be|r}" }, { "src": "hello {|4.2| ^num|be|r}" }, { "src": "hello {|4.2| +num|be|r}" }, @@ -48,14 +47,12 @@ { "src": "hello {$foo >num x \\\\ abcde |aaa||3.14||42| r }" }, { "src": "hello {$foo >num x \\\\ abcde |aaa||3.14| |42| r }" }, { "src" : ".input{ $n ~ }{{{$n}}}" }, - { "src": "foo {+reserved}"}, { "src": "foo {&private}" }, { "src": "foo {?reserved @a @b=$c}" }, { "src": "{!.}" }, { "src": "{! .}" }, { "src": "{%}" }, { "src": "{*}" }, - { "src": "{+}" }, { "src": "{<}" }, { "src": "{>}" }, { "src": "{?}" },