ICU-23059 ICU4C MF2: Update spec tests

This updates the MF2 spec tests to 943479b602 with the following exceptions:

- functions/currency.json and functions/math.json are omitted because these are not yet implemented

- bidi.json will be handled in a future PR

- u-options.json will be handled in a future PR

Changes include:
* `:integer` now returns a value encapsulating the rounded numeric value of the argument, rather than
  the value itself.
* Fallbacks are handled according to the current spec.
* Fallback values are not passed into functions.
* Characters inside literal fallbacks are properly escaped.
* The test runner skips null values properly.
* The test runner handles boolean `expErrors` in defaultTestProperties.
* `:string` normalizes its input and normalizeNFC() has been refactored so it can be called there.
This commit is contained in:
Tim Chevalier 2025-02-24 16:17:54 -08:00
parent 60a0d9fcd8
commit 0748442ed6
23 changed files with 471 additions and 220 deletions

View file

@ -18,6 +18,7 @@
#include "messageformat2_allocation.h"
#include "messageformat2_checker.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
@ -36,29 +37,62 @@ static Formattable evalLiteral(const Literal& lit) {
}
// Assumes that `var` is a message argument; returns the argument's value.
[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const {
[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const UnicodeString& fallback,
const VariableName& var,
MessageContext& context,
UErrorCode& errorCode) const {
if (U_SUCCESS(errorCode)) {
// The fallback for a variable name is itself.
UnicodeString str(DOLLAR);
str += var;
const Formattable* val = context.getGlobal(*this, var, errorCode);
const Formattable* val = context.getGlobal(var, errorCode);
if (U_SUCCESS(errorCode)) {
return (FormattedPlaceholder(*val, str));
// Note: the fallback string has to be passed in because in a declaration like:
// .local $foo = {$bar :number}
// the fallback for $bar is "$foo".
UnicodeString fallbackToUse = fallback;
if (fallbackToUse.isEmpty()) {
fallbackToUse += DOLLAR;
fallbackToUse += var;
}
return (FormattedPlaceholder(*val, fallbackToUse));
}
}
return {};
}
// Returns the contents of the literal
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const {
// The fallback for a literal is itself.
return FormattedPlaceholder(evalLiteral(lit), lit.quoted());
// Helper function to re-escape any escaped-char characters
static UnicodeString reserialize(const UnicodeString& s) {
UnicodeString result(PIPE);
for (int32_t i = 0; i < s.length(); i++) {
switch(s[i]) {
case BACKSLASH:
case PIPE:
case LEFT_CURLY_BRACE:
case RIGHT_CURLY_BRACE: {
result += BACKSLASH;
break;
}
default:
break;
}
result += s[i];
}
result += PIPE;
return result;
}
[[nodiscard]] InternalValue* MessageFormatter::formatOperand(const Environment& env,
const Operand& rand,
MessageContext& context,
UErrorCode &status) const {
// Returns the contents of the literal
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const UnicodeString& fallback,
const Literal& lit) const {
// The fallback for a literal is itself, unless another fallback is passed in
// (same reasoning as evalArgument())
UnicodeString fallbackToUse = fallback.isEmpty() ? reserialize(lit.unquoted()) : fallback;
return FormattedPlaceholder(evalLiteral(lit), fallbackToUse);
}
[[nodiscard]] InternalValue* MessageFormatter::formatOperand(const UnicodeString& fallback,
const Environment& env,
const Operand& rand,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
@ -77,17 +111,20 @@ static Formattable evalLiteral(const Literal& lit) {
// NFC-normalize the variable name. See
// https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers
const VariableName normalized = normalizeNFC(var);
const VariableName normalized = StandardFunctions::normalizeNFC(var);
// Look up the variable in the environment
if (env.has(normalized)) {
// `var` is a local -- look it up
const Closure& rhs = env.lookup(normalized);
// Format the expression using the environment from the closure
return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
// The name of this local variable is the fallback for its RHS.
UnicodeString newFallback(DOLLAR);
newFallback += var;
return formatExpression(newFallback, rhs.getEnv(), rhs.getExpr(), context, status);
}
// Variable wasn't found in locals -- check if it's global
FormattedPlaceholder result = evalArgument(normalized, context, status);
FormattedPlaceholder result = evalArgument(fallback, normalized, context, status);
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
status = U_ZERO_ERROR;
// Unbound variable -- set a resolution error
@ -101,7 +138,7 @@ static Formattable evalLiteral(const Literal& lit) {
return create<InternalValue>(InternalValue(std::move(result)), status);
} else {
U_ASSERT(rand.isLiteral());
return create<InternalValue>(InternalValue(formatLiteral(rand.asLiteral())), status);
return create<InternalValue>(InternalValue(formatLiteral(fallback, rand.asLiteral())), status);
}
}
@ -122,7 +159,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O
// Options are fully evaluated before calling the function
// Format the operand
LocalPointer<InternalValue> rhsVal(formatOperand(env, v, context, status));
LocalPointer<InternalValue> rhsVal(formatOperand({}, env, v, context, status));
if (U_FAILURE(status)) {
return {};
}
@ -227,17 +264,18 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O
}
// Formats an expression using `globalEnv` for the values of variables
[[nodiscard]] InternalValue* MessageFormatter::formatExpression(const Environment& globalEnv,
const Expression& expr,
MessageContext& context,
UErrorCode &status) const {
[[nodiscard]] InternalValue* MessageFormatter::formatExpression(const UnicodeString& fallback,
const Environment& globalEnv,
const Expression& expr,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
const Operand& rand = expr.getOperand();
// Format the operand (formatOperand handles the case of a null operand)
LocalPointer<InternalValue> randVal(formatOperand(globalEnv, rand, context, status));
LocalPointer<InternalValue> randVal(formatOperand(fallback, globalEnv, rand, context, status));
FormattedPlaceholder maybeRand = randVal->takeArgument(status);
@ -281,7 +319,7 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment&
} else {
// Format the expression
LocalPointer<InternalValue> partVal(
formatExpression(globalEnv, part.contents(), context, status));
formatExpression({}, globalEnv, part.contents(), context, status));
FormattedPlaceholder partResult = partVal->forceFormatting(context.getErrors(),
status);
// Force full evaluation, e.g. applying default formatters to
@ -315,7 +353,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme
// 2. For each expression exp of the message's selectors
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// 2i. Let rv be the resolved value of exp.
LocalPointer<InternalValue> rv(formatOperand(env, Operand(selectors[i]), context, status));
LocalPointer<InternalValue> rv(formatOperand({}, env, Operand(selectors[i]), context, status));
if (rv->canSelect()) {
// 2ii. If selection is supported for rv:
// (True if this code has been reached)
@ -444,7 +482,7 @@ void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res,
// 2ii(b)(a) Assert that key is a literal.
// (Not needed)
// 2ii(b)(b) Let `ks` be the resolved value of `key` in Unicode Normalization Form C.
ks = normalizeNFC(key.asLiteral().unquoted());
ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
// 2ii(b)(c) Append `ks` as the last element of the list `keys`.
ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
CHECK_ERROR(status);
@ -505,7 +543,7 @@ void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UError
// 2i(c). Assert that `key` is a literal.
// (Not needed)
// 2i(d). Let `ks` be the resolved value of `key`.
UnicodeString ks = normalizeNFC(key.asLiteral().unquoted());
UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
// 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
// 2i(f). If `matches` includes `ks`
@ -567,7 +605,7 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo
// 5iii(c)(a). Assert that `key` is a literal.
// (Not needed)
// 5iii(c)(b). Let `ks` be the resolved value of `key`.
UnicodeString ks = normalizeNFC(key.asLiteral().unquoted());
UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
// 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
matchpref = vectorFind(matches, ks);
U_ASSERT(matchpref >= 0);
@ -692,14 +730,14 @@ void MessageFormatter::check(MessageContext& context, const Environment& localEn
// Check that variable is in scope
const VariableName& var = rand.asVariable();
UnicodeString normalized = normalizeNFC(var);
UnicodeString normalized = StandardFunctions::normalizeNFC(var);
// Check local scope
if (localEnv.has(normalized)) {
return;
}
// Check global scope
context.getGlobal(*this, normalized, status);
context.getGlobal(normalized, status);
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
status = U_ZERO_ERROR;
context.getErrors().setUnresolvedVariable(var, status);
@ -736,7 +774,7 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*&
// memoizing the value of localEnv up to this point
// Add the LHS to the environment for checking the next declaration
env = Environment::create(normalizeNFC(decl.getVariable()),
env = Environment::create(StandardFunctions::normalizeNFC(decl.getVariable()),
Closure(rhs, *env),
env,
status);

View file

@ -13,6 +13,7 @@
#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model_names.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_function_registry_internal.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
@ -26,13 +27,12 @@ namespace message2 {
using Arguments = MessageArguments;
const Formattable* Arguments::getArgument(const MessageFormatter& context,
const VariableName& arg,
const Formattable* Arguments::getArgument(const VariableName& arg,
UErrorCode& errorCode) const {
if (U_SUCCESS(errorCode)) {
U_ASSERT(argsLen == 0 || arguments.isValid());
for (int32_t i = 0; i < argsLen; i++) {
UnicodeString normalized = context.normalizeNFC(argumentNames[i]);
UnicodeString normalized = StandardFunctions::normalizeNFC(argumentNames[i]);
// arg already assumed to be normalized
if (normalized == arg) {
return &arguments[i];

View file

@ -13,6 +13,7 @@
#include "messageformat2_allocation.h"
#include "messageformat2_checker.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
#include "uvector.h" // U_ASSERT
@ -113,7 +114,7 @@ Key Checker::normalizeNFC(const Key& k) const {
return k;
}
return Key(Literal(k.asLiteral().isQuoted(),
context.normalizeNFC(k.asLiteral().unquoted())));
StandardFunctions::normalizeNFC(k.asLiteral().unquoted())));
}
static bool areDefaultKeys(const Key* keys, int32_t len) {

View file

@ -211,10 +211,9 @@ PrioritizedVariant::~PrioritizedVariant() {}
errors.checkErrors(status);
}
const Formattable* MessageContext::getGlobal(const MessageFormatter& context,
const VariableName& v,
const Formattable* MessageContext::getGlobal(const VariableName& v,
UErrorCode& errorCode) const {
return arguments.getArgument(context, v, errorCode);
return arguments.getArgument(v, errorCode);
}
MessageContext::MessageContext(const MessageArguments& args,
@ -356,6 +355,10 @@ PrioritizedVariant::~PrioritizedVariant() {}
return {};
}
if (arg.isFallback()) {
return arg;
}
// The fallback for a nullary function call is the function name
UnicodeString fallback;
if (arg.isNullOperand()) {

View file

@ -149,9 +149,7 @@ namespace message2 {
public:
MessageContext(const MessageArguments&, const StaticErrors&, UErrorCode&);
const Formattable* getGlobal(const MessageFormatter&,
const VariableName&,
UErrorCode&) const;
const Formattable* getGlobal(const VariableName&, UErrorCode&) const;
// If any errors were set, update `status` accordingly
void checkErrors(UErrorCode& status) const;

View file

@ -119,24 +119,6 @@ namespace message2 {
// MessageFormatter
// Returns the NFC-normalized version of s, returning s itself
// if it's already normalized.
UnicodeString MessageFormatter::normalizeNFC(const UnicodeString& s) const {
UErrorCode status = U_ZERO_ERROR;
// Check if string is already normalized
UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status);
// If so, return it
if (U_SUCCESS(status) && result == UNORM_YES) {
return s;
}
// Otherwise, normalize it
UnicodeString normalized = nfcNormalizer->normalize(s, status);
if (U_FAILURE(status)) {
return {};
}
return normalized;
}
MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) {
CHECK_ERROR(success);
@ -188,8 +170,6 @@ namespace message2 {
errors = errorsNew.orphan();
}
nfcNormalizer = Normalizer2::getNFCInstance(success);
// Note: we currently evaluate variables lazily,
// without memoization. This call is still necessary
// to check out-of-scope uses of local variables in
@ -218,7 +198,6 @@ namespace message2 {
signalErrors = other.signalErrors;
errors = other.errors;
other.errors = nullptr;
nfcNormalizer = other.nfcNormalizer;
return *this;
}

View file

@ -10,10 +10,13 @@
#if !UCONFIG_NO_MF2
#include <math.h>
#include <cmath>
#include "unicode/dtptngen.h"
#include "unicode/messageformat2.h"
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/normalizer2.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "double-conversion.h"
@ -172,6 +175,28 @@ void MFFunctionRegistry::checkStandard() const {
// Formatter/selector helpers
// Returns the NFC-normalized version of s, returning s itself
// if it's already normalized.
/* static */ UnicodeString StandardFunctions::normalizeNFC(const UnicodeString& s) {
UErrorCode status = U_ZERO_ERROR;
const Normalizer2* nfcNormalizer = Normalizer2::getNFCInstance(status);
if (U_FAILURE(status)) {
return s;
}
// Check if string is already normalized
UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status);
// If so, return it
if (U_SUCCESS(status) && result == UNORM_YES) {
return s;
}
// Otherwise, normalize it
UnicodeString normalized = nfcNormalizer->normalize(s, status);
if (U_FAILURE(status)) {
return {};
}
return normalized;
}
// Converts `s` to a double, indicating failure via `errorCode`
static void strToDouble(const UnicodeString& s, double& result, UErrorCode& errorCode) {
CHECK_ERROR(errorCode);
@ -504,21 +529,6 @@ static UChar32 digitToChar(int32_t val, UErrorCode errorCode) {
}
}
static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
double numberValue = parseNumberLiteral(input.asFormattable(), errorCode);
if (U_FAILURE(errorCode)) {
return notANumber(input);
}
UErrorCode savedStatus = errorCode;
number::FormattedNumber result = nf.formatDouble(numberValue, errorCode);
// Ignore U_USING_DEFAULT_WARNING
if (errorCode == U_USING_DEFAULT_WARNING) {
errorCode = savedStatus;
}
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
}
int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const {
Formattable opt;
@ -636,6 +646,8 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
realFormatter = formatterForOptions(*this, opts, errorCode);
number::FormattedNumber numberResult;
int64_t integerValue = 0;
if (U_SUCCESS(errorCode)) {
// Already checked that contents can be formatted
const Formattable& toFormat = arg.asFormattable();
@ -644,23 +656,31 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
double d = toFormat.getDouble(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
numberResult = realFormatter.formatDouble(d, errorCode);
integerValue = static_cast<int64_t>(std::round(d));
break;
}
case UFMT_LONG: {
int32_t l = toFormat.getLong(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
numberResult = realFormatter.formatInt(l, errorCode);
integerValue = l;
break;
}
case UFMT_INT64: {
int64_t i = toFormat.getInt64(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
numberResult = realFormatter.formatInt(i, errorCode);
integerValue = i;
break;
}
case UFMT_STRING: {
// Try to parse the string as a number
return tryParsingNumberLiteral(realFormatter, arg, errorCode);
double d = parseNumberLiteral(toFormat, errorCode);
if (U_FAILURE(errorCode))
return {};
numberResult = realFormatter.formatDouble(d, errorCode);
integerValue = static_cast<int64_t>(std::round(d));
break;
}
default: {
// Other types can't be parsed as a number
@ -670,6 +690,11 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
}
}
// Need to return the integer value if invoked as :integer
if (isInteger) {
return FormattedPlaceholder(FormattedPlaceholder(Formattable(integerValue), arg.getFallback()),
FormattedValue(std::move(numberResult)));
}
return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult)));
}
@ -1263,9 +1288,11 @@ void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat,
if (U_FAILURE(errorCode)) {
return;
}
// Normalize result
UnicodeString normalized = normalizeNFC(formattedValue);
for (int32_t i = 0; i < keysLen; i++) {
if (keys[i] == formattedValue) {
if (keys[i] == normalized) {
prefs[0] = keys[i];
prefsLen = 1;
break;

View file

@ -33,6 +33,11 @@ namespace message2 {
class StandardFunctions {
friend class MessageFormatter;
public:
// Used for normalizing variable names and keys for comparison
static UnicodeString normalizeNFC(const UnicodeString&);
private:
static UnicodeString getStringOption(const FunctionOptions& opts,
const UnicodeString& optionName,
UErrorCode& errorCode);

View file

@ -355,9 +355,7 @@ namespace message2 {
// Formatting methods
// Used for normalizing variable names and keys for comparison
UnicodeString normalizeNFC(const UnicodeString&) const;
[[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
[[nodiscard]] FormattedPlaceholder formatLiteral(const UnicodeString&, const data_model::Literal&) const;
void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
// Evaluates a function call
// Dispatches on argument type
@ -371,13 +369,21 @@ namespace message2 {
MessageContext& context,
UErrorCode& status) const;
// Formats an expression that appears in a pattern or as the definition of a local variable
[[nodiscard]] InternalValue* formatExpression(const Environment&,
const data_model::Expression&,
MessageContext&,
UErrorCode&) const;
[[nodiscard]] InternalValue* formatExpression(const UnicodeString&,
const Environment&,
const data_model::Expression&,
MessageContext&,
UErrorCode&) const;
[[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
[[nodiscard]] InternalValue* formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
[[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
[[nodiscard]] InternalValue* formatOperand(const UnicodeString&,
const Environment&,
const data_model::Operand&,
MessageContext&,
UErrorCode&) const;
[[nodiscard]] FormattedPlaceholder evalArgument(const UnicodeString&,
const data_model::VariableName&,
MessageContext&,
UErrorCode&) const;
void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
// Function registry methods
@ -452,9 +458,6 @@ namespace message2 {
// The default is false.
bool signalErrors = false;
// Used for implementing normalizeNFC()
const Normalizer2* nfcNormalizer = nullptr;
}; // class MessageFormatter
} // namespace message2

View file

@ -114,8 +114,7 @@ namespace message2 {
private:
friend class MessageContext;
const Formattable* getArgument(const MessageFormatter&,
const data_model::VariableName&,
const Formattable* getArgument(const data_model::VariableName&,
UErrorCode&) const;
// Avoids using Hashtable so that code constructing a Hashtable

View file

@ -112,10 +112,11 @@ static bool setArguments(TestMessageFormat2& t,
return false; // For now, boolean and null arguments are unsupported
}
} else {
schemaError = true;
break;
// Null argument -- not supported
return false;
}
} else {
t.logln("name is null");
schemaError = true;
break;
}
@ -138,6 +139,7 @@ static bool setArguments(TestMessageFormat2& t,
static void runValidTest(TestMessageFormat2& icuTest,
const std::string& testName,
const std::string& defaultError,
bool anyError,
const json& j,
IcuTestErrorCode& errorCode) {
auto j_object = j.template get<json::object_t>();
@ -205,6 +207,9 @@ static void runValidTest(TestMessageFormat2& icuTest,
} else if (defaultError.length() > 0) {
test.setExpectedError(getExpectedRuntimeErrorFromString(defaultError));
expectedError = true;
} else if (anyError) {
test.setExpectedAnyError();
expectedError = true;
}
// If no expected result and no error, then set the test builder to expect success
@ -250,9 +255,13 @@ static void runTestsFromJsonFile(TestMessageFormat2& t,
// Some files have an expected error
std::string defaultError;
bool anyError = false;
if (!j_object["defaultTestProperties"].is_null()
&& !j_object["defaultTestProperties"]["expErrors"].is_null()) {
auto expErrors = j_object["defaultTestProperties"]["expErrors"];
// If expErrors is a boolean "true", that means we expect all tests
// to emit errors but we don't care which ones.
anyError = expErrors.is_boolean() && expErrors.template get<bool>();
// expErrors might also be a boolean, in which case we ignore it --
// so we have to check if it's an array
if (expErrors.is_array()) {
@ -273,7 +282,7 @@ static void runTestsFromJsonFile(TestMessageFormat2& t,
t.logln(u_str(iter->dump()));
runValidTest(t, testName, defaultError, *iter, errorCode);
runValidTest(t, testName, defaultError, anyError, *iter, errorCode);
}
} else {
// Test doesn't follow schema -- probably an error
@ -301,6 +310,7 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
// Do valid spec tests
runTestsFromJsonFile(*this, "spec/syntax.json", errorCode);
runTestsFromJsonFile(*this, "spec/fallback.json", errorCode);
// Uncomment when test functions are implemented in the registry
// See https://unicode-org.atlassian.net/browse/ICU-22907
@ -315,9 +325,6 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode);
// Other tests (non-spec)
// TODO: Delete this file after https://github.com/unicode-org/message-format-wg/pull/904
// lands and the tests here are updated from the spec repo
runTestsFromJsonFile(*this, "normalization.json", errorCode);
// TODO: https://github.com/unicode-org/message-format-wg/pull/902 will
// move the bidi tests into the spec
runTestsFromJsonFile(*this, "bidi.json", errorCode);

View file

@ -28,6 +28,7 @@ class TestCase : public UMemory {
/* const */ Locale locale;
/* const */ std::map<UnicodeString, Formattable> arguments;
/* const */ UErrorCode expectedError;
/* const */ bool arbitraryError = false;
/* const */ bool expectedNoSyntaxError;
/* const */ bool hasExpectedOutput;
/* const */ UnicodeString expected;
@ -45,11 +46,14 @@ class TestCase : public UMemory {
std::map<UnicodeString, Formattable> getArguments() const { return std::move(arguments); }
const UnicodeString& getTestName() const { return testName; }
bool expectSuccess() const {
return (!ignoreError && U_SUCCESS(expectedError));
return (!ignoreError && U_SUCCESS(expectedError) && !arbitraryError);
}
bool expectFailure() const {
return (!ignoreError && U_FAILURE(expectedError));
}
bool expectArbitraryError() const {
return arbitraryError;
}
bool expectNoSyntaxError() const {
return expectedNoSyntaxError;
}
@ -139,6 +143,10 @@ class TestCase : public UMemory {
expectedError = U_SUCCESS(errorCode) ? U_ZERO_ERROR : errorCode;
return *this;
}
Builder& setExpectedAnyError() {
arbitraryError = true;
return *this;
}
Builder& setNoSyntaxError() {
expectNoSyntaxError = true;
return *this;
@ -182,6 +190,7 @@ class TestCase : public UMemory {
bool hasExpectedOutput;
UnicodeString expected;
UErrorCode expectedError;
bool arbitraryError;
bool expectNoSyntaxError;
bool hasLineNumberAndOffset;
uint32_t lineNumber;
@ -190,7 +199,7 @@ class TestCase : public UMemory {
const MFFunctionRegistry* functionRegistry = nullptr; // Not owned
public:
Builder() : pattern(""), locale(Locale::getDefault()), hasExpectedOutput(false), expected(""), expectedError(U_ZERO_ERROR), expectNoSyntaxError(false), hasLineNumberAndOffset(false), ignoreError(false) {}
Builder() : pattern(""), locale(Locale::getDefault()), hasExpectedOutput(false), expected(""), expectedError(U_ZERO_ERROR), arbitraryError(false), expectNoSyntaxError(false), hasLineNumberAndOffset(false), ignoreError(false) {}
};
private:
@ -200,6 +209,7 @@ class TestCase : public UMemory {
locale(builder.locale),
arguments(builder.arguments),
expectedError(builder.expectedError),
arbitraryError(builder.arbitraryError),
expectedNoSyntaxError(builder.expectNoSyntaxError),
hasExpectedOutput(builder.hasExpectedOutput),
expected(builder.expected),
@ -270,6 +280,9 @@ class TestUtils {
failExpectedSuccess(tmsg, testCase, errorCode, parseError.line, parseError.offset);
return;
}
if (testCase.expectArbitraryError() && U_SUCCESS(errorCode)) {
failExpectedArbitraryError(tmsg, testCase);
}
if (testCase.expectFailure() && errorCode != testCase.expectedErrorCode()) {
failExpectedFailure(tmsg, testCase, errorCode);
return;
@ -323,6 +336,10 @@ class TestUtils {
tmsg.errln(testCase.getTestName() + " failed test with wrong error code; pattern: " + testCase.getPattern() + " and error code " + UnicodeString(u_errorName(errorCode)) + " and expected error code: " + UnicodeString(u_errorName(testCase.expectedErrorCode())));
errorCode.reset();
}
static void failExpectedArbitraryError(IntlTest& tmsg, const TestCase& testCase) {
tmsg.dataerrln(testCase.getTestName());
tmsg.errln(testCase.getTestName() + " succeeded although any error was expected; pattern: " + testCase.getPattern());
}
static void failWrongOutput(IntlTest& tmsg, const TestCase& testCase, const UnicodeString& result) {
tmsg.dataerrln(testCase.getTestName());
tmsg.logln(testCase.getTestName() + " failed test with wrong output; pattern: " + testCase.getPattern() + " and expected output = " + testCase.expectedOutput() + " and actual output = " + result);

View file

@ -63,7 +63,7 @@
{
"src": [".local $num = {|42| :number}\n",
"{{Testing date formatting: {$num :datetime}}}"],
"exp": "Testing date formatting: {|42|}",
"exp": "Testing date formatting: {$num}",
"expErrors": [{"type": "bad-operand"}]
},
{

View file

@ -1,67 +0,0 @@
{
"$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json",
"scenario": "Syntax",
"description": "Test cases that do not depend on any registry definitions.",
"defaultTestProperties": {
"locale": "en-US"
},
"tests": [
{
"description": "NFC: text is not normalized",
"src": "\u1E0A\u0323",
"exp": "\u1E0A\u0323"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
"src": ".local $\u0044\u0323\u0307 = {foo} {{{$\u1E0c\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
"src": ".local $\u1E0c\u0307 = {foo} {{{$\u0044\u0323\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
"src": ".input {$\u1E0c\u0307} {{{$\u0044\u0323\u0307}}}",
"params": [{"name": "\u1E0c\u0307", "value": "foo"}],
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
"src": ".input {$\u0044\u0323\u0307} {{{$\u1E0c\u0307}}}",
"params": [{"name": "\u0044\u0323\u0307", "value": "foo"}],
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; reordering",
"src": ".local $\u0044\u0307\u0323 = {foo} {{{$\u1E0c\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; special case mapping",
"src": ".local $\u0041\u030A\u0301 = {foo} {{{$\u01FA}}}",
"exp": "foo"
},
{
"description": "NFC: keys are normalized",
"src": ".local $x = {\u1E0C\u0307 :string} .match $x \u1E0A\u0323 {{Right}} * {{Wrong}}",
"exp": "Right"
},
{
"description": "NFC: keys are normalized (unquoted)",
"src": ".local $x = {\u1E0A\u0323 :string} .match $x \u1E0A\u0323 {{Not normalized}} \u1E0C\u0307 {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
},
{
"description": "NFC: keys are normalized (quoted)",
"src": ".local $x = {\u1E0A\u0323 :string} .match $x |\u1E0A\u0323| {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
},
{
"description": "NFC: keys are normalized (mixed)",
"src": ".local $x = {\u1E0A\u0323 :string} .match $x \u1E0A\u0323 {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
}
]
}

146
testdata/message2/spec/bidi.json vendored Normal file
View file

@ -0,0 +1,146 @@
{
"scenario": "Bidi support",
"description": "Tests for correct parsing of messages with bidirectional marks and isolates",
"defaultTestProperties": {
"bidiIsolation": "default",
"locale": "en-US"
},
"tests": [
{
"description": "simple-message = o [simple-start pattern]",
"src": " \u061C Hello world!",
"exp": " \u061C Hello world!"
},
{
"description": "complex-message = o *(declaration o) complex-body o",
"src": "\u200E .local $x = {1} {{ {$x}}}",
"exp": " \u20681\u2069"
},
{
"description": "complex-message = o *(declaration o) complex-body o",
"src": ".local $x = {1} \u200F {{ {$x}}}",
"exp": " \u20681\u2069"
},
{
"description": "complex-message = o *(declaration o) complex-body o",
"src": ".local $x = {1} {{ {$x}}} \u2066",
"exp": " \u20681\u2069"
},
{
"description": "input-declaration = input o variable-expression",
"src": ".input \u2067 {$x :number} {{hello}}",
"params": [{"name": "x", "value": "1"}],
"exp": "hello"
},
{
"description": "local s variable o \"=\" o expression",
"src": ".local $x \u2068 = \u2069 {1} {{hello}}",
"exp": "hello"
},
{
"description": "local s variable o \"=\" o expression",
"src": ".local \u2067 $x = {1} {{hello}}",
"exp": "hello"
},
{
"description": "local s variable o \"=\" o expression",
"src": ".local\u2067 $x = {1} {{hello}}",
"exp": "hello"
},
{
"description": "o \"{{\" pattern \"}}\"",
"src": "\u2067 {{hello}}",
"exp": "hello"
},
{
"description": "match-statement s variant *(o variant)",
"src": ".local $x = {1 :number}\n.match $x\n1 {{one}}\n\u061C * {{other}}",
"exp": "one"
},
{
"description": "match-statement s variant *(o variant)",
"src": ".local $x = {1 :number}.match $x \u061c1 {{one}}* {{other}}",
"exp": "one"
},
{
"description": "match-statement s variant *(o variant)",
"src": ".local $x = {1 :number}.match $x\u061c1 {{one}}* {{other}}",
"expErrors": [{"type": "syntax-error"}]
},
{
"description": "variant = key *(s key) quoted-pattern",
"src": ".local $x = {1 :number} .local $y = {$x :number}.match $x $y\n1 \u200E 1 {{one}}* * {{other}}",
"exp": "one"
},
{
"description": "variant = key *(s key) quoted-pattern",
"src": ".local $x = {1 :number} .local $y = {$x :number}.match $x $y\n1\u200E 1 {{one}}* * {{other}}",
"exp": "one"
},
{
"description": "literal-expression = \"{\" o literal [s function] *(s attribute) o \"}\"",
"src": "{\u200E hello \u200F}",
"exp": "\u2068hello\u2069"
},
{
"description": "variable-expression = \"{\" o variable [s function] *(s attribute) o \"}\"",
"src": ".local $x = {1} {{ {\u200E $x \u200F} }}",
"exp": " \u20681\u2069 "
},
{
"description": "function-expression = \"{\" o function *(s attribute) o \"}\"",
"src": "{1 \u200E :number \u200F}",
"exp": "1"
},
{
"description": "markup = \"{\" o \"#\" identifier *(s option) *(s attribute) o [\"/\"] \"}\"",
"src": "{\u200F #b \u200E }",
"exp": ""
},
{
"description": "markup = \"{\" o \"/\" identifier *(s option) *(s attribute) o \"}\"",
"src": "{\u200F /b \u200E }",
"exp": ""
},
{
"description": "option = identifier o \"=\" o (literal / variable)",
"src": "{1 :number minimumFractionDigits\u200F=\u200E1 }",
"exp": "1.0"
},
{
"description": "attribute = \"@\" identifier [o \"=\" o (literal / variable)]",
"src": "{1 :number @locale\u200F=\u200Een }",
"exp": "1"
},
{
"description": " name... excludes U+FFFD and U+061C -- this pases as name -> [bidi] name-start *name-char",
"src": ".local $\u061Cfoo = {1} {{ {$\u061Cfoo} }}",
"exp": " \u20681\u2069 "
},
{
"description": " name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName but excludes U+FFFD and U+061C",
"src": ".local $foo\u061Cbar = {2} {{ }}",
"expErrors": [{"type": "syntax-error"}]
},
{
"description": "name = [bidi] name-start *name-char [bidi]",
"src": ".local $\u200Efoo\u200F = {3} {{{$\u200Efoo\u200F}}}",
"exp": "\u20683\u2069"
},
{
"description": "name = [bidi] name-start *name-char [bidi]",
"src": ".local $foo = {4} {{{$\u200Efoo\u200F}}}",
"exp": "\u20684\u2069"
},
{
"description": "name = [bidi] name-start *name-char [bidi]",
"src": ".local $\u200Efoo\u200F = {5} {{{$foo}}}",
"exp": "\u20685\u2069"
},
{
"description": "name = [bidi] name-start *name-char [bidi]",
"src": ".local $foo\u200Ebar = {5} {{{$foo\u200Ebar}}}",
"expErrors": [{"type": "syntax-error"}]
}
]
}

52
testdata/message2/spec/fallback.json vendored Normal file
View file

@ -0,0 +1,52 @@
{
"$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json",
"scenario": "Fallback",
"description": "Test cases for fallback behaviour.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US",
"expErrors": true
},
"tests": [
{
"description": "function with unquoted literal operand",
"src": "{42 :test:function fails=format}",
"exp": "{|42|}"
},
{
"description": "function with quoted literal operand",
"src": "{|C:\\\\| :test:function fails=format}",
"exp": "{|C:\\\\|}"
},
{
"description": "unannotated implicit input variable",
"src": "{$var}",
"exp": "{$var}"
},
{
"description": "annotated implicit input variable",
"src": "{$var :number}",
"exp": "{$var}"
},
{
"description": "local variable with unknown function in declaration",
"src": ".local $var = {|val| :test:undefined} {{{$var}}}",
"exp": "{$var}"
},
{
"description": "function with local variable operand with unknown function in declaration",
"src": ".local $var = {|val| :test:undefined} {{{$var :test:function}}}",
"exp": "{$var}"
},
{
"description": "local variable with unknown function in placeholder",
"src": ".local $var = {|val|} {{{$var :test:undefined}}}",
"exp": "{$var}"
},
{
"description": "function with no operand",
"src": "{:test:undefined}",
"exp": "{:test:undefined}"
}
]
}

View file

@ -3,6 +3,7 @@
"scenario": "Date function",
"description": "The built-in formatter for dates.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US",
"expErrors": false
},
@ -35,10 +36,10 @@
"src": "{|2006-01-02| :date style=long}"
},
{
"src": ".local $d = {|2006-01-02| :date style=long} {{{$d :date}}}"
"src": ".local $d = {|2006-01-02| :date style=long} {{{$d}}}"
},
{
"src": ".local $t = {|2006-01-02T15:04:06| :time} {{{$t :date}}}"
"src": ".local $d = {|2006-01-02| :datetime dateStyle=long timeStyle=long} {{{$d :date}}}"
}
]
}

View file

@ -3,6 +3,7 @@
"scenario": "Datetime function",
"description": "The built-in formatter for datetimes.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US",
"expErrors": false
},

View file

@ -3,6 +3,7 @@
"scenario": "Integer function",
"description": "The built-in formatter for integers.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US"
},
"tests": [
@ -19,14 +20,18 @@
"exp": "hello 4"
},
{
"src": ".input {$foo :integer} .match $foo 1 {{one}} * {{other}}",
"params": [
{
"name": "foo",
"value": 1.2
}
],
"exp": "one"
"src": ".input {$foo :integer} .match $foo 1 {{=1}} * {{other}}",
"params": [{ "name": "foo", "value": 1.2 }],
"exp": "=1"
},
{
"src": ".input {$foo :integer} .match $foo 1 {{=1}} one {{one}} * {{other}}",
"params": [{ "name": "foo", "value": 1.2 }],
"exp": "=1"
},
{
"src": ".local $x = {1.25 :integer} .local $y = {$x :number} {{{$y}}}",
"exp": "1"
}
]
}

View file

@ -3,6 +3,7 @@
"scenario": "Number function",
"description": "The built-in formatter for numbers.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US"
},
"tests": [
@ -131,33 +132,14 @@
},
{
"src": ".local $foo = {$bar :number minimumFractionDigits=foo} {{bar {$foo}}}",
"params": [
{
"name": "bar",
"value": 4.2
}
],
"exp": "bar {$bar}",
"expErrors": [
{
"type": "bad-option"
}
]
"params": [{ "name": "bar", "value": 4.2 }],
"expErrors": [{ "type": "bad-option" }]
},
{
"src": ".local $foo = {$bar :number} {{bar {$foo}}}",
"params": [
{
"name": "bar",
"value": "foo"
}
],
"exp": "bar {$bar}",
"expErrors": [
{
"type": "bad-operand"
}
]
"params": [{ "name": "bar", "value": "foo" }],
"exp": "bar {$foo}",
"expErrors": [{ "type": "bad-operand" }]
},
{
"src": ".input {$foo :number} {{bar {$foo}}}",
@ -181,18 +163,8 @@
},
{
"src": ".input {$foo :number minimumFractionDigits=foo} {{bar {$foo}}}",
"params": [
{
"name": "foo",
"value": 4.2
}
],
"exp": "bar {$foo}",
"expErrors": [
{
"type": "bad-option"
}
]
"params": [{ "name": "foo", "value": 4.2 }],
"expErrors": [{ "type": "bad-option" }]
},
{
"src": ".input {$foo :number} {{bar {$foo}}}",

View file

@ -3,6 +3,7 @@
"scenario": "String function",
"description": "The built-in formatter for strings.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US"
},
"tests": [
@ -34,8 +35,7 @@
"value": null
}
],
"exp": "other",
"ignoreCpp": "Explicit null doesn't work"
"exp": "other"
},
{
"src": ".input {$foo :string} .match $foo 1 {{one}} * {{other}}",
@ -45,6 +45,31 @@
"type": "unresolved-variable"
}
]
},
{
"description": "NFC: keys are normalized (unquoted)",
"src": ".local $x = {\u1E0A\u0323 :string} .match $x \u1E0A\u0323 {{Not normalized}} \u1E0C\u0307 {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
},
{
"description": "NFC: keys are normalized (quoted)",
"src": ".local $x = {\u1E0A\u0323 :string} .match $x |\u1E0A\u0323| {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
},
{
"description": "NFC: keys are normalized (mixed)",
"src": ".local $x = {\u1E0A\u0323 :string} .match $x \u1E0A\u0323 {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
},
{
"description": "NFC: :string normalizes the comparison value (un-normalized selector, normalized key)",
"src": ".local $x = {\u1E0A\u0323 :string} .match $x \u1E0C\u0307 {{Right}} * {{Wrong}}",
"exp": "Right"
},
{
"description": "NFC: keys are normalized (normalized selector, un-normalized key)",
"src": ".local $x = {\u1E0C\u0307 :string} .match $x \u1E0A\u0323 {{Right}} * {{Wrong}}",
"exp": "Right"
}
]
}

View file

@ -3,6 +3,7 @@
"scenario": "Time function",
"description": "The built-in formatter for times.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US",
"expErrors": false
},
@ -32,10 +33,10 @@
"src": "{|2006-01-02T15:04:06| :time style=medium}"
},
{
"src": ".local $t = {|2006-01-02T15:04:06| :time style=medium} {{{$t :time}}}"
"src": ".local $t = {|2006-01-02T15:04:06| :time style=medium} {{{$t}}}"
},
{
"src": ".local $d = {|2006-01-02T15:04:06| :date} {{{$d :time}}}"
"src": ".local $t = {|2006-01-02T15:04:06| :datetime dateStyle=long timeStyle=long} {{{$t :time}}}"
}
]
}

View file

@ -3,6 +3,7 @@
"scenario": "Syntax",
"description": "Test cases that do not depend on any registry definitions.",
"defaultTestProperties": {
"bidiIsolation": "none",
"locale": "en-US"
},
"tests": [
@ -697,6 +698,43 @@
{
"src": "{{trailing whitespace}} \n",
"exp": "trailing whitespace"
},
{
"description": "NFC: text is not normalized",
"src": "\u1E0A\u0323",
"exp": "\u1E0A\u0323"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
"src": ".local $\u0044\u0323\u0307 = {foo} {{{$\u1E0c\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
"src": ".local $\u1E0c\u0307 = {foo} {{{$\u0044\u0323\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
"src": ".input {$\u1E0c\u0307} {{{$\u0044\u0323\u0307}}}",
"params": [{"name": "\u1E0c\u0307", "value": "foo"}],
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
"src": ".input {$\u0044\u0323\u0307} {{{$\u1E0c\u0307}}}",
"params": [{"name": "\u0044\u0323\u0307", "value": "foo"}],
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; reordering",
"src": ".local $\u0044\u0307\u0323 = {foo} {{{$\u1E0c\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; special case mapping",
"src": ".local $\u0041\u030A\u0301 = {foo} {{{$\u01FA}}}",
"exp": "foo"
}
]
}