ICU-22261 Add tech preview implementation for MessageFormat 2.0 to icu4c

This commit is contained in:
Tim Chevalier 2023-05-12 22:23:28 -07:00 committed by Elango Cheran
parent 4b8f8f3c48
commit f7d641d5ad
46 changed files with 18258 additions and 2 deletions

View file

@ -438,6 +438,7 @@ typedef enum UErrorCode {
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UErrorCode warning value.
@ -568,12 +569,27 @@ typedef enum UErrorCode {
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
/* MessageFormat 2.0 errors */
U_MF_UNRESOLVED_VARIABLE_ERROR, /** A variable is referred to but not bound by any definition */
U_MF_SYNTAX_ERROR, /** Includes all syntax errors */
U_MF_UNKNOWN_FUNCTION_ERROR, /** An annotation refers to a function not defined by the standard or custom function registry */
U_MF_VARIANT_KEY_MISMATCH_ERROR, /** In a match-construct, one or more variants had a different number of keys from the number of selectors */
U_MF_FORMATTING_ERROR, /** Covers all runtime errors: for example, an internally inconsistent set of options. */
U_MF_NONEXHAUSTIVE_PATTERN_ERROR, /** In a match-construct, the variants do not cover all possible values */
U_MF_DUPLICATE_OPTION_NAME_ERROR, /** In an annotation, the same option name appears more than once */
U_MF_SELECTOR_ERROR, /** A selector function is applied to an operand of the wrong type */
U_MF_MISSING_SELECTOR_ANNOTATION_ERROR, /** A selector expression evaluates to an unannotated operand */
U_MF_DUPLICATE_DECLARATION_ERROR, /** The same variable is declared in more than one .local or .input declaration */
U_MF_OPERAND_MISMATCH_ERROR, /** An operand provided to a function does not have the required form for that function */
U_MF_UNSUPPORTED_STATEMENT_ERROR, /** A message includes a reserved statement */
U_MF_UNSUPPORTED_EXPRESSION_ERROR, /** A message includes syntax reserved for future standardization or private implementation use */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_FMT_PARSE_ERROR_LIMIT = 0x10114,
U_FMT_PARSE_ERROR_LIMIT = 0x10121,
#endif // U_HIDE_DEPRECATED_API
/*

View file

@ -129,6 +129,19 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_FORMAT_INEXACT_ERROR",
"U_NUMBER_ARG_OUTOFBOUNDS_ERROR",
"U_NUMBER_SKELETON_SYNTAX_ERROR",
"U_MF_UNRESOLVED_VARIABLE_ERROR",
"U_MF_SYNTAX_ERROR",
"U_MF_UNKNOWN_FUNCTION_ERROR",
"U_MF_VARIANT_KEY_MISMATCH_ERROR",
"U_MF_FORMATTING_ERROR",
"U_MF_NONEXHAUSTIVE_PATTERN_ERROR",
"U_MF_DUPLICATE_OPTION_NAME_ERROR",
"U_MF_SELECTOR_ERROR",
"U_MF_MISSING_SELECTOR_ANNOTATION_ERROR",
"U_MF_DUPLICATE_DECLARATION_ERROR",
"U_MF_OPERAND_MISMATCH_ERROR",
"U_MF_UNSUPPORTED_STATEMENT_ERROR",
"U_MF_UNSUPPORTED_EXPRESSION_ERROR"
};
static const char * const

View file

@ -173,6 +173,17 @@
<ClCompile Include="measunit.cpp" />
<ClCompile Include="measunit_extra.cpp" />
<ClCompile Include="measure.cpp" />
<ClCompile Include="messageformat2.cpp" />
<ClCompile Include="messageformat2_arguments.cpp" />
<ClCompile Include="messageformat2_checker.cpp" />
<ClCompile Include="messageformat2_data_model.cpp" />
<ClCompile Include="messageformat2_errors.cpp" />
<ClCompile Include="messageformat2_evaluation.cpp" />
<ClCompile Include="messageformat2_formattable.cpp" />
<ClCompile Include="messageformat2_formatter.cpp" />
<ClCompile Include="messageformat2_function_registry.cpp" />
<ClCompile Include="messageformat2_parser.cpp" />
<ClCompile Include="messageformat2_serializer.cpp" />
<ClCompile Include="msgfmt.cpp" />
<ClCompile Include="nfrs.cpp" />
<ClCompile Include="nfrule.cpp" />

View file

@ -213,6 +213,42 @@
<ClCompile Include="measure.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_arguments.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_checker.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_context.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_data_model.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_errors.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_evaluation.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_formattable.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_formatter.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_function_registry.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_parser.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messageformat2_serializer.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="msgfmt.cpp">
<Filter>formatting</Filter>
</ClCompile>

View file

@ -406,6 +406,17 @@
<ClCompile Include="measunit.cpp" />
<ClCompile Include="measunit_extra.cpp" />
<ClCompile Include="measure.cpp" />
<ClCompile Include="messageformat2.cpp" />
<ClCompile Include="messageformat2_arguments.cpp" />
<ClCompile Include="messageformat2_checker.cpp" />
<ClCompile Include="messageformat2_data_model.cpp" />
<ClCompile Include="messageformat2_errors.cpp" />
<ClCompile Include="messageformat2_evaluation.cpp" />
<ClCompile Include="messageformat2_formattable.cpp" />
<ClCompile Include="messageformat2_formatter.cpp" />
<ClCompile Include="messageformat2_function_registry.cpp" />
<ClCompile Include="messageformat2_parser.cpp" />
<ClCompile Include="messageformat2_serializer.cpp" />
<ClCompile Include="msgfmt.cpp" />
<ClCompile Include="nfrs.cpp" />
<ClCompile Include="nfrule.cpp" />

View file

@ -0,0 +1,874 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model.h"
#include "unicode/messageformat2_formattable.h"
#include "unicode/messageformat2.h"
#include "unicode/unistr.h"
#include "messageformat2_allocation.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_macros.h"
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// ------------------------------------------------------
// Formatting
// The result of formatting a literal is just itself.
static Formattable evalLiteral(const Literal& lit) {
return Formattable(lit.unquoted());
}
// Assumes that `var` is a message argument; returns the argument's value.
[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const {
if (U_SUCCESS(errorCode)) {
// The fallback for a variable name is itself.
UnicodeString str(DOLLAR);
str += var;
const Formattable* val = context.getGlobal(var, errorCode);
if (U_SUCCESS(errorCode)) {
return (FormattedPlaceholder(*val, str));
}
}
return {};
}
// Returns the contents of the literal
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const {
// The fallback for a literal is itself.
return FormattedPlaceholder(evalLiteral(lit), lit.quoted());
}
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
const Operand& rand,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
if (rand.isNull()) {
return FormattedPlaceholder();
}
if (rand.isVariable()) {
// Check if it's local or global
// Note: there is no name shadowing; this is enforced by the parser
const VariableName& var = rand.asVariable();
// TODO: Currently, this code implements lazy evaluation of locals.
// That is, the environment binds names to a closure, not a resolved value.
// Eager vs. lazy evaluation is an open issue:
// see https://github.com/unicode-org/message-format-wg/issues/299
// Look up the variable in the environment
if (env.has(var)) {
// `var` is a local -- look it up
const Closure& rhs = env.lookup(var);
// Format the expression using the environment from the closure
return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
}
// Variable wasn't found in locals -- check if it's global
FormattedPlaceholder result = evalArgument(var, context, status);
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
status = U_ZERO_ERROR;
// Unbound variable -- set a resolution error
context.getErrors().setUnresolvedVariable(var, status);
// Use fallback per
// https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
UnicodeString str(DOLLAR);
str += var;
return FormattedPlaceholder(str);
}
return result;
} else {
U_ASSERT(rand.isLiteral());
return formatLiteral(rand.asLiteral());
}
}
// Resolves a function's options
FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const {
LocalPointer<UVector> optionsVector(createUVector(status));
if (U_FAILURE(status)) {
return {};
}
LocalPointer<ResolvedFunctionOption> resolvedOpt;
for (int i = 0; i < options.size(); i++) {
const Option& opt = options.getOption(i, status);
if (U_FAILURE(status)) {
return {};
}
const UnicodeString& k = opt.getName();
const Operand& v = opt.getValue();
// Options are fully evaluated before calling the function
// Format the operand
FormattedPlaceholder rhsVal = formatOperand(env, v, context, status);
if (U_FAILURE(status)) {
return {};
}
if (!rhsVal.isFallback()) {
resolvedOpt.adoptInstead(create<ResolvedFunctionOption>(ResolvedFunctionOption(k, rhsVal.asFormattable()), status));
if (U_FAILURE(status)) {
return {};
}
optionsVector->adoptElement(resolvedOpt.orphan(), status);
}
}
return FunctionOptions(std::move(*optionsVector), status);
}
// Overload that dispatches on argument type. Syntax doesn't provide for options in this case.
[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument,
MessageContext& context,
UErrorCode& status) const {
if (U_FAILURE(status)) {
return {};
}
// These cases should have been checked for already
U_ASSERT(!argument.isFallback() && !argument.isNullOperand());
const Formattable& toFormat = argument.asFormattable();
switch (toFormat.getType()) {
case UFMT_OBJECT: {
const FormattableObject* obj = toFormat.getObject(status);
U_ASSERT(U_SUCCESS(status));
U_ASSERT(obj != nullptr);
const UnicodeString& type = obj->tag();
FunctionName functionName;
if (!getDefaultFormatterNameByType(type, functionName)) {
// No formatter for this type -- follow default behavior
break;
}
return evalFormatterCall(functionName,
std::move(argument),
FunctionOptions(),
context,
status);
}
default: {
// TODO: The array case isn't handled yet; not sure whether it's desirable
// to have a default list formatter
break;
}
}
// No formatter for this type, or it's a primitive type (which will be formatted later)
// -- just return the argument itself
return std::move(argument);
}
// Overload that dispatches on function name
[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName,
FormattedPlaceholder&& argument,
FunctionOptions&& options,
MessageContext& context,
UErrorCode& status) const {
if (U_FAILURE(status)) {
return {};
}
DynamicErrors& errs = context.getErrors();
UnicodeString fallback(COLON);
fallback += functionName;
if (!argument.isNullOperand()) {
fallback = argument.fallback;
}
if (isFormatter(functionName)) {
const Formatter& formatterImpl = getFormatter(context, functionName, status);
UErrorCode savedStatus = status;
FormattedPlaceholder result = formatterImpl.format(std::move(argument), std::move(options), status);
// Update errors
if (savedStatus != status) {
if (U_FAILURE(status)) {
if (status == U_MF_OPERAND_MISMATCH_ERROR) {
status = U_ZERO_ERROR;
errs.setOperandMismatchError(functionName, status);
} else {
status = U_ZERO_ERROR;
// Convey any error generated by the formatter
// as a formatting error, except for operand mismatch errors
errs.setFormattingError(functionName, status);
}
return FormattedPlaceholder(fallback);
} else {
// Ignore warnings
status = savedStatus;
}
}
// Ignore the output if any errors occurred
if (errs.hasFormattingError()) {
return FormattedPlaceholder(fallback);
}
return result;
}
// No formatter with this name -- set error
if (isSelector(functionName)) {
errs.setFormattingError(functionName, status);
} else {
errs.setUnknownFunction(functionName, status);
}
return FormattedPlaceholder(fallback);
}
// Per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
static UnicodeString reservedFallback (const Expression& e) {
UErrorCode localErrorCode = U_ZERO_ERROR;
const Operator* rator = e.getOperator(localErrorCode);
U_ASSERT(U_SUCCESS(localErrorCode));
const Reserved& r = rator->asReserved();
// An empty Reserved isn't representable in the syntax
U_ASSERT(r.numParts() > 0);
const UnicodeString& contents = r.getPart(0).unquoted();
// Parts should never be empty
U_ASSERT(contents.length() > 0);
// Return first character of string
return UnicodeString(contents, 0, 1);
}
// Formats an expression using `globalEnv` for the values of variables
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv,
const Expression& expr,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
// Formatting error
if (expr.isReserved()) {
context.getErrors().setReservedError(status);
return FormattedPlaceholder(reservedFallback(expr));
}
const Operand& rand = expr.getOperand();
// Format the operand (formatOperand handles the case of a null operand)
FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status);
// Don't call the function on error values
if (randVal.isFallback()) {
return randVal;
}
if (!expr.isFunctionCall()) {
// Dispatch based on type of `randVal`
return evalFormatterCall(std::move(randVal),
context,
status);
} else {
const Operator* rator = expr.getOperator(status);
U_ASSERT(U_SUCCESS(status));
const FunctionName& functionName = rator->getFunctionName();
const OptionMap& options = rator->getOptionsInternal();
// Resolve the options
FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status);
// Call the formatter function
// The fallback for a nullary function call is the function name
UnicodeString fallback;
if (rand.isNull()) {
fallback = UnicodeString(COLON);
fallback += functionName;
} else {
fallback = randVal.fallback;
}
return evalFormatterCall(functionName,
std::move(randVal),
std::move(resolvedOptions),
context,
status);
}
}
// Formats each text and expression part of a pattern, appending the results to `result`
void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const {
CHECK_ERROR(status);
for (int32_t i = 0; i < pat.numParts(); i++) {
const PatternPart& part = pat.getPart(i);
if (part.isText()) {
result += part.asText();
} else if (part.isMarkup()) {
// Markup is ignored
} else {
// Format the expression
FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status);
// Force full evaluation, e.g. applying default formatters to
// unformatted input (or formatting numbers as strings)
UnicodeString partResult = partVal.formatToString(locale, status);
result += partResult;
// Handle formatting errors. `formatToString()` can't take a context and thus can't
// register an error directly
if (status == U_MF_FORMATTING_ERROR) {
status = U_ZERO_ERROR;
// TODO: The name of the formatter that failed is unavailable.
// Not ideal, but it's hard for `formatToString()`
// to pass along more detailed diagnostics
context.getErrors().setFormattingError(status);
}
}
}
}
// ------------------------------------------------------
// Selection
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors
// `res` is a vector of ResolvedSelectors
void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const {
CHECK_ERROR(status);
U_ASSERT(!dataModel.hasPattern());
const Expression* selectors = dataModel.getSelectorsInternal();
// 1. Let res be a new empty list of resolved values that support selection.
// (Implicit, since `res` is an out-parameter)
// 2. For each expression exp of the message's selectors
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// 2i. Let rv be the resolved value of exp.
ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
if (rv.hasSelector()) {
// 2ii. If selection is supported for rv:
// (True if this code has been reached)
} else {
// 2iii. Else:
// Let nomatch be a resolved value for which selection always fails.
// Append nomatch as the last element of the list res.
// Emit a Selection Error.
// (Note: in this case, rv, being a fallback, serves as `nomatch`)
#if U_DEBUG
const DynamicErrors& err = context.getErrors();
U_ASSERT(err.hasError());
U_ASSERT(rv.argument().isFallback());
#endif
}
// 2ii(a). Append rv as the last element of the list res.
// (Also fulfills 2iii)
LocalPointer<ResolvedSelector> v(create<ResolvedSelector>(std::move(rv), status));
CHECK_ERROR(status);
res.adoptElement(v.orphan(), status);
}
}
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
// `keys` and `matches` are vectors of strings
void MessageFormatter::matchSelectorKeys(const UVector& keys,
MessageContext& context,
ResolvedSelector&& rv,
UVector& keysOut,
UErrorCode& status) const {
CHECK_ERROR(status);
if (!rv.hasSelector()) {
// Return an empty list of matches
return;
}
auto selectorImpl = rv.getSelector();
U_ASSERT(selectorImpl != nullptr);
UErrorCode savedStatus = status;
// Convert `keys` to an array
int32_t keysLen = keys.size();
UnicodeString* keysArr = new UnicodeString[keysLen];
if (keysArr == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (int32_t i = 0; i < keysLen; i++) {
const UnicodeString* k = static_cast<UnicodeString*>(keys[i]);
U_ASSERT(k != nullptr);
keysArr[i] = *k;
}
LocalArray<UnicodeString> adoptedKeys(keysArr);
// Create an array to hold the output
UnicodeString* prefsArr = new UnicodeString[keysLen];
if (prefsArr == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
LocalArray<UnicodeString> adoptedPrefs(prefsArr);
int32_t prefsLen = 0;
// Call the selector
selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(),
adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen,
status);
// Update errors
if (savedStatus != status) {
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
context.getErrors().setSelectorError(rv.getSelectorName(), status);
} else {
// Ignore warnings
status = savedStatus;
}
}
CHECK_ERROR(status);
// Copy the resulting keys (if there was no error)
keysOut.removeAllElements();
for (int32_t i = 0; i < prefsLen; i++) {
UnicodeString* k = message2::create<UnicodeString>(std::move(prefsArr[i]), status);
if (k == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
keysOut.adoptElement(k, status);
CHECK_ERROR(status);
}
}
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
// `res` is a vector of FormattedPlaceholders;
// `pref` is a vector of vectors of strings
void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const {
CHECK_ERROR(status);
// 1. Let pref be a new empty list of lists of strings.
// (Implicit, since `pref` is an out-parameter)
UnicodeString ks;
LocalPointer<UnicodeString> ksP;
int32_t numVariants = dataModel.numVariants();
const Variant* variants = dataModel.getVariantsInternal();
// 2. For each index i in res
for (int32_t i = 0; i < (int32_t) res.size(); i++) {
// 2i. Let keys be a new empty list of strings.
LocalPointer<UVector> keys(createUVector(status));
CHECK_ERROR(status);
// 2ii. For each variant `var` of the message
for (int32_t variantNum = 0; variantNum < numVariants; variantNum++) {
const SelectorKeys& selectorKeys = variants[variantNum].getKeys();
// Note: Here, `var` names the key list of `var`,
// not a Variant itself
const Key* var = selectorKeys.getKeysInternal();
// 2ii(a). Let `key` be the `var` key at position i.
U_ASSERT(i < selectorKeys.len); // established by semantic check in formatSelectors()
const Key& key = var[i];
// 2ii(b). If `key` is not the catch-all key '*'
if (!key.isWildcard()) {
// 2ii(b)(a) Assert that key is a literal.
// (Not needed)
// 2ii(b)(b) Let `ks` be the resolved value of `key`.
ks = key.asLiteral().unquoted();
// 2ii(b)(c) Append `ks` as the last element of the list `keys`.
ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
CHECK_ERROR(status);
keys->adoptElement(ksP.orphan(), status);
}
}
// 2iii. Let `rv` be the resolved value at index `i` of `res`.
U_ASSERT(i < res.size());
ResolvedSelector rv = std::move(*(static_cast<ResolvedSelector*>(res[i])));
// 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys)
LocalPointer<UVector> matches(createUVector(status));
matchSelectorKeys(*keys, context, std::move(rv), *matches, status);
// 2v. Append `matches` as the last element of the list `pref`
pref.adoptElement(matches.orphan(), status);
}
}
// `v` is assumed to be a vector of strings
static int32_t vectorFind(const UVector& v, const UnicodeString& k) {
for (int32_t i = 0; i < v.size(); i++) {
if (*static_cast<UnicodeString*>(v[i]) == k) {
return i;
}
}
return -1;
}
static UBool vectorContains(const UVector& v, const UnicodeString& k) {
return (vectorFind(v, k) != -1);
}
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants
// `pref` is a vector of vectors of strings. `vars` is a vector of PrioritizedVariants
void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
const Variant* variants = dataModel.getVariantsInternal();
// 1. Let `vars` be a new empty list of variants.
// (Not needed since `vars` is an out-parameter)
// 2. For each variant `var` of the message:
for (int32_t j = 0; j < dataModel.numVariants(); j++) {
const SelectorKeys& selectorKeys = variants[j].getKeys();
const Pattern& p = variants[j].getPattern();
// Note: Here, `var` names the key list of `var`,
// not a Variant itself
const Key* var = selectorKeys.getKeysInternal();
// 2i. For each index `i` in `pref`:
bool noMatch = false;
for (int32_t i = 0; i < (int32_t) pref.size(); i++) {
// 2i(a). Let `key` be the `var` key at position `i`.
U_ASSERT(i < selectorKeys.len);
const Key& key = var[i];
// 2i(b). If key is the catch-all key '*':
if (key.isWildcard()) {
// 2i(b)(a). Continue the inner loop on pref.
continue;
}
// 2i(c). Assert that `key` is a literal.
// (Not needed)
// 2i(d). Let `ks` be the resolved value of `key`.
UnicodeString ks = key.asLiteral().unquoted();
// 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
// 2i(f). If `matches` includes `ks`
if (vectorContains(matches, ks)) {
// 2i(f)(a). Continue the inner loop on `pref`.
continue;
}
// 2i(g). Else:
// 2i(g)(a). Continue the outer loop on message variants.
noMatch = true;
break;
}
if (!noMatch) {
// Append `var` as the last element of the list `vars`.
PrioritizedVariant* tuple = create<PrioritizedVariant>(PrioritizedVariant(-1, selectorKeys, p), status);
CHECK_ERROR(status);
vars.adoptElement(tuple, status);
}
}
}
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants
// Leaves the preferred variant as element 0 in `sortable`
// Note: this sorts in-place, so `sortable` is just `vars`
// `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants
void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
CHECK_ERROR(status);
// Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place)
// 1. Let `sortable` be a new empty list of (integer, variant) tuples.
// (Not needed since `sortable` is an out-parameter)
// 2. For each variant `var` of `vars`
// 2i. Let tuple be a new tuple (-1, var).
// 2ii. Append `tuple` as the last element of the list `sortable`.
// 3. Let `len` be the integer count of items in `pref`.
int32_t len = pref.size();
// 4. Let `i` be `len` - 1.
int32_t i = len - 1;
// 5. While i >= 0:
while (i >= 0) {
// 5i. Let `matches` be the list of strings at index `i` of `pref`.
U_ASSERT(pref[i] != nullptr);
const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
// 5ii. Let `minpref` be the integer count of items in `matches`.
int32_t minpref = matches.size();
// 5iii. For each tuple `tuple` of `sortable`:
for (int32_t j = 0; j < vars.size(); j++) {
U_ASSERT(vars[j] != nullptr);
PrioritizedVariant& tuple = *(static_cast<PrioritizedVariant*>(vars[j]));
// 5iii(a). Let matchpref be an integer with the value minpref.
int32_t matchpref = minpref;
// 5iii(b). Let `key` be the tuple variant key at position `i`.
const Key* tupleVariantKeys = tuple.keys.getKeysInternal();
U_ASSERT(i < tuple.keys.len); // Given by earlier semantic checking
const Key& key = tupleVariantKeys[i];
// 5iii(c) If `key` is not the catch-all key '*':
if (!key.isWildcard()) {
// 5iii(c)(a). Assert that `key` is a literal.
// (Not needed)
// 5iii(c)(b). Let `ks` be the resolved value of `key`.
UnicodeString ks = key.asLiteral().unquoted();
// 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
matchpref = vectorFind(matches, ks);
U_ASSERT(matchpref >= 0);
}
// 5iii(d) Set the `tuple` integer value as matchpref.
tuple.priority = matchpref;
}
// 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`)
vars.sort(comparePrioritizedVariants, status);
CHECK_ERROR(status);
// 5v. Set `i` to be `i` - 1.
i--;
}
// The caller is responsible for steps 6 and 7
// 6. Let `var` be the `variant` element of the first element of `sortable`.
// 7. Select the pattern of `var`
}
// Evaluate the operand
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
if (rand.isNull()) {
return ResolvedSelector(FormattedPlaceholder());
}
if (rand.isLiteral()) {
return ResolvedSelector(formatLiteral(rand.asLiteral()));
}
// Must be variable
const VariableName& var = rand.asVariable();
// Resolve the variable
if (env.has(var)) {
const Closure& referent = env.lookup(var);
// Resolve the referent
return resolveVariables(referent.getEnv(), referent.getExpr(), context, status);
}
// Either this is a global var or an unbound var --
// either way, it can't be bound to a function call.
// Check globals
FormattedPlaceholder val = evalArgument(var, context, status);
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
status = U_ZERO_ERROR;
// Unresolved variable -- could be a previous warning. Nothing to resolve
U_ASSERT(context.getErrors().hasUnresolvedVariableError());
return ResolvedSelector(FormattedPlaceholder(var));
}
// Pass through other errors
return ResolvedSelector(std::move(val));
}
// Evaluate the expression except for not performing the top-level function call
// (which is expected to be a selector, but may not be, in error cases)
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
const Expression& expr,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
// A `reserved` is an error
if (expr.isReserved()) {
context.getErrors().setReservedError(status);
return ResolvedSelector(FormattedPlaceholder(reservedFallback(expr)));
}
// Function call -- resolve the operand and options
if (expr.isFunctionCall()) {
const Operator* rator = expr.getOperator(status);
U_ASSERT(U_SUCCESS(status));
// Already checked that rator is non-reserved
const FunctionName& selectorName = rator->getFunctionName();
if (isSelector(selectorName)) {
auto selector = getSelector(context, selectorName, status);
if (U_SUCCESS(status)) {
FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status);
// Operand may be the null argument, but resolveVariables() handles that
FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status);
return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument));
}
} else if (isFormatter(selectorName)) {
context.getErrors().setSelectorError(selectorName, status);
} else {
context.getErrors().setUnknownFunction(selectorName, status);
}
// Non-selector used as selector; an error would have been recorded earlier
UnicodeString fallback(COLON);
fallback += selectorName;
if (!expr.getOperand().isNull()) {
fallback = formatOperand(env, expr.getOperand(), context, status).fallback;
}
return ResolvedSelector(FormattedPlaceholder(fallback));
} else {
// Might be a variable reference, so expand one more level of variable
return resolveVariables(env, expr.getOperand(), context, status);
}
}
ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
// Resolve expression to determine if it's a function call
ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
DynamicErrors& err = context.getErrors();
// If there is a selector, then `resolveVariables()` recorded it in the context
if (exprResult.hasSelector()) {
// Check if there was an error
if (exprResult.argument().isFallback()) {
// Use a null expression if it's a syntax or data model warning;
// create a valid (non-fallback) formatted placeholder from the
// fallback string otherwise
if (err.hasSyntaxError() || err.hasDataModelError()) {
return ResolvedSelector(FormattedPlaceholder()); // Null operand
} else {
return ResolvedSelector(exprResult.takeArgument());
}
}
return exprResult;
}
// No selector was found; error should already have been set
U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError());
return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback));
}
void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const {
CHECK_ERROR(status);
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
// Resolve Selectors
// res is a vector of FormattedPlaceholders
LocalPointer<UVector> res(createUVector(status));
CHECK_ERROR(status);
resolveSelectors(context, env, status, *res);
// Resolve Preferences
// pref is a vector of vectors of strings
LocalPointer<UVector> pref(createUVector(status));
CHECK_ERROR(status);
resolvePreferences(context, *res, *pref, status);
// Filter Variants
// vars is a vector of PrioritizedVariants
LocalPointer<UVector> vars(createUVector(status));
CHECK_ERROR(status);
filterVariants(*pref, *vars, status);
// Sort Variants and select the final pattern
// Note: `sortable` in the spec is just `vars` here,
// which is sorted in-place
sortVariants(*pref, *vars, status);
CHECK_ERROR(status);
// 6. Let `var` be the `variant` element of the first element of `sortable`.
U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error)
const PrioritizedVariant& var = *(static_cast<PrioritizedVariant*>(vars->elementAt(0)));
// 7. Select the pattern of `var`
const Pattern& pat = var.pat;
// Format the pattern
formatPattern(context, env, pat, status, result);
}
// Note: this is non-const due to the function registry being non-const, which is in turn
// due to the values (`FormatterFactory` objects in the map) having mutable state.
// In other words, formatting a message can mutate the underlying `MessageFormatter` by changing
// state within the factory objects that represent custom formatters.
UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
EMPTY_ON_ERROR(status);
// Create a new environment that will store closures for all local variables
Environment* env = Environment::create(status);
// Create a new context with the given arguments and the `errors` structure
MessageContext context(arguments, *errors, status);
// Check for unresolved variable errors
checkDeclarations(context, env, status);
LocalPointer<Environment> globalEnv(env);
UnicodeString result;
if (dataModel.hasPattern()) {
formatPattern(context, *globalEnv, dataModel.getPattern(), status, result);
} else {
// Check for errors/warnings -- if so, then the result of pattern selection is the fallback value
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
const DynamicErrors& err = context.getErrors();
if (err.hasSyntaxError() || err.hasDataModelError()) {
result += REPLACEMENT;
} else {
formatSelectors(context, *globalEnv, status, result);
}
}
// Update status according to all errors seen while formatting
context.checkErrors(status);
return result;
}
// ----------------------------------------
// Checking for resolution errors
void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode& status) const {
// Check the RHS of each option
for (int32_t i = 0; i < options.size(); i++) {
const Option& opt = options.getOption(i, status);
CHECK_ERROR(status);
check(context, localEnv, opt.getValue(), status);
}
}
void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode& status) const {
// Nothing to check for literals
if (rand.isLiteral() || rand.isNull()) {
return;
}
// Check that variable is in scope
const VariableName& var = rand.asVariable();
// Check local scope
if (localEnv.has(var)) {
return;
}
// Check global scope
context.getGlobal(var, status);
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
status = U_ZERO_ERROR;
context.getErrors().setUnresolvedVariable(var, status);
}
// Either `var` is a global, or some other error occurred.
// Nothing more to do either way
return;
}
void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode& status) const {
// Check for unresolved variable errors
if (expr.isFunctionCall()) {
const Operator* rator = expr.getOperator(status);
U_ASSERT(U_SUCCESS(status));
const Operand& rand = expr.getOperand();
check(context, localEnv, rand, status);
check(context, localEnv, rator->getOptionsInternal(), status);
}
}
// Check for resolution errors
void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const {
CHECK_ERROR(status);
const Binding* decls = getDataModel().getLocalVariablesInternal();
U_ASSERT(env != nullptr && decls != nullptr);
for (int32_t i = 0; i < getDataModel().bindingsLen; i++) {
const Binding& decl = decls[i];
const Expression& rhs = decl.getValue();
check(context, *env, rhs, status);
// Add a closure to the global environment,
// memoizing the value of localEnv up to this point
// Add the LHS to the environment for checking the next declaration
env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
CHECK_ERROR(status);
}
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,139 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT2_UTILS_H
#define MESSAGEFORMAT2_UTILS_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#include "uvector.h"
U_NAMESPACE_BEGIN
namespace message2 {
// Helpers
template<typename T>
static T* copyArray(const T* source, int32_t& len) { // `len` is an in/out param
if (source == nullptr) {
len = 0;
return nullptr;
}
T* dest = new T[len];
if (dest == nullptr) {
// Set length to 0 to prevent the
// array from being accessed
len = 0;
} else {
for (int32_t i = 0; i < len; i++) {
dest[i] = source[i];
}
}
return dest;
}
template<typename T>
static T* copyVectorToArray(const UVector& source, int32_t& len) {
len = source.size();
T* dest = new T[len];
if (dest == nullptr) {
// Set length to 0 to prevent the
// array from being accessed
len = 0;
} else {
for (int32_t i = 0; i < len; i++) {
dest[i] = *(static_cast<T*>(source.elementAt(i)));
}
}
return dest;
}
template<typename T>
static T* moveVectorToArray(UVector& source, int32_t& len) {
len = source.size();
T* dest = new T[len];
if (dest == nullptr) {
// Set length to 0 to prevent the
// array from being accessed
len = 0;
} else {
for (int32_t i = 0; i < len; i++) {
dest[i] = std::move(*static_cast<T*>(source.elementAt(i)));
}
}
source.removeAllElements();
return dest;
}
inline UVector* createUVectorNoAdopt(UErrorCode& status) {
if (U_FAILURE(status)) {
return nullptr;
}
LocalPointer<UVector> result(new UVector(status));
if (U_FAILURE(status)) {
return nullptr;
}
return result.orphan();
}
inline UVector* createUVector(UErrorCode& status) {
UVector* result = createUVectorNoAdopt(status);
if (U_FAILURE(status)) {
return nullptr;
}
result->setDeleter(uprv_deleteUObject);
return result;
}
static UBool stringsEqual(const UElement s1, const UElement s2) {
return (*static_cast<UnicodeString*>(s1.pointer) == *static_cast<UnicodeString*>(s2.pointer));
}
inline UVector* createStringUVector(UErrorCode& status) {
UVector* v = createUVector(status);
if (U_FAILURE(status)) {
return nullptr;
}
v->setComparer(stringsEqual);
return v;
}
inline UVector* createStringVectorNoAdopt(UErrorCode& status) {
UVector* v = createUVectorNoAdopt(status);
if (U_FAILURE(status)) {
return nullptr;
}
v->setComparer(stringsEqual);
return v;
}
template<typename T>
inline T* create(T&& node, UErrorCode& status) {
if (U_FAILURE(status)) {
return nullptr;
}
T* result = new T(std::move(node));
if (result == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return result;
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_UTILS_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -0,0 +1,55 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model_names.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// ------------------------------------------------------
// MessageArguments
using Arguments = MessageArguments;
const Formattable* Arguments::getArgument(const VariableName& arg, UErrorCode& errorCode) const {
if (U_SUCCESS(errorCode)) {
U_ASSERT(argsLen == 0 || arguments.isValid());
for (int32_t i = 0; i < argsLen; i++) {
if (argumentNames[i] == arg) {
return &arguments[i];
}
}
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return nullptr;
}
MessageArguments::~MessageArguments() {}
// Message arguments
// -----------------
MessageArguments& MessageArguments::operator=(MessageArguments&& other) noexcept {
U_ASSERT(other.arguments.isValid() || other.argsLen == 0);
argsLen = other.argsLen;
if (argsLen != 0) {
argumentNames.adoptInstead(other.argumentNames.orphan());
arguments.adoptInstead(other.arguments.orphan());
}
return *this;
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,62 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT2_CACHED_FORMATTERS_H
#define MESSAGEFORMAT2_CACHED_FORMATTERS_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "hash.h"
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// Formatter cache
// --------------
class MessageFormatter;
// Map from function names to Formatters
class CachedFormatters : public UObject {
private:
friend class MessageFormatter;
// Maps stringified FunctionNames onto Formatter*
// Adopts its values
Hashtable cache;
CachedFormatters() { cache.setValueDeleter(uprv_deleteUObject); }
public:
// Returns a pointer because Formatter is an abstract class
const Formatter* getFormatter(const FunctionName& f) {
return static_cast<const Formatter*>(cache.get(f));
}
// Adopts its argument
void adoptFormatter(const FunctionName& f, Formatter* val, UErrorCode& status) {
cache.put(f, val, status);
}
CachedFormatters& operator=(const CachedFormatters&) = delete;
virtual ~CachedFormatters();
};
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_CACHED_FORMATTERS_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -0,0 +1,295 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "messageformat2_allocation.h"
#include "messageformat2_checker.h"
#include "messageformat2_macros.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
namespace message2 {
/*
Checks data model errors
(see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#error-handling )
The following are checked here:
Variant Key Mismatch
Missing Fallback Variant (called NonexhaustivePattern here)
Missing Selector Annotation
Duplicate Declaration
- Most duplicate declaration errors are checked by the parser,
but the checker checks for declarations of input variables
that were previously implicitly declared
(Duplicate option names and duplicate declarations are checked by the parser)
*/
// Type environments
// -----------------
TypeEnvironment::TypeEnvironment(UErrorCode& status) {
CHECK_ERROR(status);
UVector* temp;
temp = createStringVectorNoAdopt(status);
CHECK_ERROR(status);
annotated.adoptInstead(temp);
temp = createStringVectorNoAdopt(status);
CHECK_ERROR(status);
unannotated.adoptInstead(temp);
temp = createStringVectorNoAdopt(status);
CHECK_ERROR(status);
freeVars.adoptInstead(temp);
}
static bool has(const UVector& v, const VariableName& var) {
return v.contains(const_cast<void*>(static_cast<const void*>(&var)));
}
// Returns true if `var` was either previously used (implicit declaration),
// or is in scope by an explicit declaration
bool TypeEnvironment::known(const VariableName& var) const {
return has(*annotated, var) || has(*unannotated, var) || has(*freeVars, var);
}
TypeEnvironment::Type TypeEnvironment::get(const VariableName& var) const {
U_ASSERT(annotated.isValid());
if (has(*annotated, var)) {
return Annotated;
}
U_ASSERT(unannotated.isValid());
if (has(*unannotated, var)) {
return Unannotated;
}
U_ASSERT(freeVars.isValid());
if (has(*freeVars, var)) {
return FreeVariable;
}
// This case is a "free variable without an implicit declaration",
// i.e. one used only in a selector expression and not in a declaration RHS
return Unannotated;
}
void TypeEnvironment::extend(const VariableName& var, TypeEnvironment::Type t, UErrorCode& status) {
if (t == Unannotated) {
U_ASSERT(unannotated.isValid());
// See comment below
unannotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
return;
}
if (t == FreeVariable) {
U_ASSERT(freeVars.isValid());
// See comment below
freeVars->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
return;
}
U_ASSERT(annotated.isValid());
// This is safe because elements of `annotated` are never written
// and the lifetime of `var` is guaranteed to include the lifetime of
// `annotated`
annotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
}
TypeEnvironment::~TypeEnvironment() {}
// ---------------------
static bool areDefaultKeys(const Key* keys, int32_t len) {
U_ASSERT(len > 0);
for (int32_t i = 0; i < len; i++) {
if (!keys[i].isWildcard()) {
return false;
}
}
return true;
}
void Checker::addFreeVars(TypeEnvironment& t, const Operand& rand, UErrorCode& status) {
CHECK_ERROR(status);
if (rand.isVariable()) {
const VariableName& v = rand.asVariable();
if (!t.known(v)) {
t.extend(v, TypeEnvironment::Type::FreeVariable, status);
}
}
}
void Checker::addFreeVars(TypeEnvironment& t, const OptionMap& opts, UErrorCode& status) {
for (int32_t i = 0; i < opts.size(); i++) {
const Option& o = opts.getOption(i, status);
CHECK_ERROR(status);
addFreeVars(t, o.getValue(), status);
}
}
void Checker::addFreeVars(TypeEnvironment& t, const Operator& rator, UErrorCode& status) {
CHECK_ERROR(status);
if (!rator.isReserved()) {
addFreeVars(t, rator.getOptionsInternal(), status);
}
}
void Checker::addFreeVars(TypeEnvironment& t, const Expression& rhs, UErrorCode& status) {
CHECK_ERROR(status);
if (rhs.isFunctionCall()) {
const Operator* rator = rhs.getOperator(status);
U_ASSERT(U_SUCCESS(status));
addFreeVars(t, *rator, status);
}
addFreeVars(t, rhs.getOperand(), status);
}
void Checker::checkVariants(UErrorCode& status) {
CHECK_ERROR(status);
U_ASSERT(!dataModel.hasPattern());
// Check that each variant has a key list with size
// equal to the number of selectors
const Variant* variants = dataModel.getVariantsInternal();
// Check that one variant includes only wildcards
bool defaultExists = false;
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
const SelectorKeys& k = variants[i].getKeys();
const Key* keys = k.getKeysInternal();
int32_t len = k.len;
if (len != dataModel.numSelectors()) {
// Variant key mismatch
errors.addError(StaticErrorType::VariantKeyMismatchError, status);
return;
}
defaultExists |= areDefaultKeys(keys, len);
}
if (!defaultExists) {
errors.addError(StaticErrorType::NonexhaustivePattern, status);
return;
}
}
void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
CHECK_ERROR(status);
if (selectorExpr.isFunctionCall()) {
return; // No error
}
if (!selectorExpr.isReserved()) {
const Operand& rand = selectorExpr.getOperand();
if (rand.isVariable()) {
if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
return; // No error
}
}
}
// If this code is reached, an error was detected
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
}
void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {
U_ASSERT(!dataModel.hasPattern());
// Check each selector; if it's not annotated, emit a
// "missing selector annotation" error
const Expression* selectors = dataModel.getSelectorsInternal();
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
requireAnnotated(t, selectors[i], status);
}
}
TypeEnvironment::Type typeOf(TypeEnvironment& t, const Expression& expr) {
if (expr.isFunctionCall()) {
return TypeEnvironment::Type::Annotated;
}
if (expr.isReserved()) {
return TypeEnvironment::Type::Unannotated;
}
const Operand& rand = expr.getOperand();
U_ASSERT(!rand.isNull());
if (rand.isLiteral()) {
return TypeEnvironment::Type::Unannotated;
}
U_ASSERT(rand.isVariable());
return t.get(rand.asVariable());
}
void Checker::checkDeclarations(TypeEnvironment& t, UErrorCode& status) {
CHECK_ERROR(status);
// For each declaration, extend the type environment with its type
// Only a very simple type system is necessary: variables
// have the type "annotated", "unannotated", or "free".
// For "missing selector annotation" checking, free variables
// (message arguments) are treated as unannotated.
// Free variables are also used for checking duplicate declarations.
const Binding* env = dataModel.getLocalVariablesInternal();
for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
const Binding& b = env[i];
const VariableName& lhs = b.getVariable();
const Expression& rhs = b.getValue();
// First, add free variables from the RHS of b
// This must be done first so we can catch:
// .local $foo = {$foo}
// (where the RHS is the first use of $foo)
if (b.isLocal()) {
addFreeVars(t, rhs, status);
// Next, check if the LHS equals any free variables
// whose implicit declarations are in scope
if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
errors.addError(StaticErrorType::DuplicateDeclarationError, status);
}
} else {
// Input declaration; if b has no annotation, there's nothing to check
if (!b.isLocal() && b.hasAnnotation()) {
const OptionMap& opts = b.getOptionsInternal();
// For .input declarations, we just need to add any variables
// referenced in the options
addFreeVars(t, opts, status);
}
// Next, check if the LHS equals any free variables
// whose implicit declarations are in scope
if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
errors.addError(StaticErrorType::DuplicateDeclarationError, status);
}
}
// Next, extend the type environment with a binding from lhs to its type
t.extend(lhs, typeOf(t, rhs), status);
}
// Check for unsupported statements
if (dataModel.unsupportedStatementsLen > 0) {
errors.addError(StaticErrorType::UnsupportedStatementError, status);
}
}
void Checker::check(UErrorCode& status) {
CHECK_ERROR(status);
TypeEnvironment typeEnv(status);
checkDeclarations(typeEnv, status);
// Pattern message
if (dataModel.hasPattern()) {
return;
} else {
// Selectors message
checkSelectors(typeEnv, status);
checkVariants(status);
}
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,91 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT_CHECKER_H
#define MESSAGEFORMAT_CHECKER_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_data_model.h"
#include "messageformat2_errors.h"
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// Used for checking missing selector annotation errors
// and duplicate declaration errors (specifically for
// implicit declarations)
class TypeEnvironment : public UMemory {
public:
// MessageFormat has a simple type system;
// variables are in-scope and annotated; in-scope and unannotated;
// or free (a free variable has no explicit declaration in the scope
// of its use.)
enum Type {
Annotated,
Unannotated,
FreeVariable
};
void extend(const VariableName&, Type, UErrorCode& status);
Type get(const VariableName&) const;
bool known(const VariableName&) const;
TypeEnvironment(UErrorCode& status);
virtual ~TypeEnvironment();
private:
// Stores variables known to be annotated.
LocalPointer<UVector> annotated; // Vector of `VariableName`s
// Stores variables that are in-scope but unannotated.
LocalPointer<UVector> unannotated; // Vector of `VariableName`s
// Stores free variables that are used in the RHS of a declaration
LocalPointer<UVector> freeVars; // Vector of `VariableNames`; tracks free variables
// This can't just be "variables that don't appear in
// `annotated` or `unannotated`", as a use introduces
// an explicit declaration
}; // class TypeEnvironment
// Checks a data model for semantic errors
// (Errors are defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md )
class Checker {
public:
void check(UErrorCode&);
Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {}
private:
void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const OptionMap&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Expression&, UErrorCode&);
void checkDeclarations(TypeEnvironment&, UErrorCode&);
void checkSelectors(const TypeEnvironment&, UErrorCode&);
void checkVariants(UErrorCode&);
void check(const OptionMap&);
void check(const Operand&);
void check(const Expression&);
void check(const Pattern&);
const MFDataModel& dataModel;
StaticErrors& errors;
}; // class Checker
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_CHECKER_H
#endif // U_HIDE_DEPRECATED_API
// eof

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,286 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "messageformat2_allocation.h"
#include "messageformat2_errors.h"
#include "messageformat2_macros.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
namespace message2 {
// Errors
// -----------
void DynamicErrors::setReservedError(UErrorCode& status) {
addError(DynamicError(DynamicErrorType::ReservedError), status);
}
void DynamicErrors::setFormattingError(const FunctionName& formatterName, UErrorCode& status) {
addError(DynamicError(DynamicErrorType::FormattingError, formatterName), status);
}
void DynamicErrors::setFormattingError(UErrorCode& status) {
addError(DynamicError(DynamicErrorType::FormattingError, UnicodeString("unknown formatter")), status);
}
void DynamicErrors::setOperandMismatchError(const FunctionName& formatterName, UErrorCode& status) {
addError(DynamicError(DynamicErrorType::OperandMismatchError, formatterName), status);
}
void StaticErrors::setDuplicateOptionName(UErrorCode& status) {
addError(StaticError(StaticErrorType::DuplicateOptionName), status);
}
void StaticErrors::setMissingSelectorAnnotation(UErrorCode& status) {
addError(StaticError(StaticErrorType::MissingSelectorAnnotation), status);
}
void DynamicErrors::setSelectorError(const FunctionName& selectorName, UErrorCode& status) {
addError(DynamicError(DynamicErrorType::SelectorError, selectorName), status);
}
void DynamicErrors::setUnknownFunction(const FunctionName& functionName, UErrorCode& status) {
addError(DynamicError(DynamicErrorType::UnknownFunction, functionName), status);
}
void DynamicErrors::setUnresolvedVariable(const VariableName& v, UErrorCode& status) {
addError(DynamicError(DynamicErrorType::UnresolvedVariable, v), status);
}
DynamicErrors::DynamicErrors(const StaticErrors& e, UErrorCode& status) : staticErrors(e) {
resolutionAndFormattingErrors.adoptInstead(createUVector(status));
}
StaticErrors::StaticErrors(UErrorCode& status) {
syntaxAndDataModelErrors.adoptInstead(createUVector(status));
}
StaticErrors::StaticErrors(StaticErrors&& other) noexcept {
U_ASSERT(other.syntaxAndDataModelErrors.isValid());
syntaxAndDataModelErrors.adoptInstead(other.syntaxAndDataModelErrors.orphan());
dataModelError = other.dataModelError;
missingSelectorAnnotationError = other.missingSelectorAnnotationError;
syntaxError = other.syntaxError;
}
StaticErrors::StaticErrors(const StaticErrors& other, UErrorCode& errorCode) {
CHECK_ERROR(errorCode);
U_ASSERT(other.syntaxAndDataModelErrors.isValid());
syntaxAndDataModelErrors.adoptInstead(createUVector(errorCode));
CHECK_ERROR(errorCode);
for (int32_t i = 0; i < other.syntaxAndDataModelErrors->size(); i++) {
StaticError* e = static_cast<StaticError*>(other.syntaxAndDataModelErrors->elementAt(i));
U_ASSERT(e != nullptr);
StaticError* copy = new StaticError(*e);
if (copy == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
syntaxAndDataModelErrors->adoptElement(copy, errorCode);
}
dataModelError = other.dataModelError;
missingSelectorAnnotationError = other.missingSelectorAnnotationError;
syntaxError = other.syntaxError;
}
int32_t DynamicErrors::count() const {
U_ASSERT(resolutionAndFormattingErrors.isValid() && staticErrors.syntaxAndDataModelErrors.isValid());
return resolutionAndFormattingErrors->size() + staticErrors.syntaxAndDataModelErrors->size();
}
bool DynamicErrors::hasError() const {
return count() > 0;
}
bool DynamicErrors::hasStaticError() const {
U_ASSERT(staticErrors.syntaxAndDataModelErrors.isValid());
return staticErrors.syntaxAndDataModelErrors->size() > 0;
}
const DynamicError& DynamicErrors::first() const {
U_ASSERT(resolutionAndFormattingErrors->size() > 0);
return *static_cast<DynamicError*>(resolutionAndFormattingErrors->elementAt(0));
}
void DynamicErrors::checkErrors(UErrorCode& status) const {
if (status != U_ZERO_ERROR) {
return;
}
// Just handle the first error
// TODO: Eventually want to return all errors to caller
if (count() == 0) {
return;
}
if (staticErrors.syntaxAndDataModelErrors->size() > 0) {
switch (staticErrors.first().type) {
case StaticErrorType::DuplicateDeclarationError: {
status = U_MF_DUPLICATE_DECLARATION_ERROR;
break;
}
case StaticErrorType::DuplicateOptionName: {
status = U_MF_DUPLICATE_OPTION_NAME_ERROR;
break;
}
case StaticErrorType::VariantKeyMismatchError: {
status = U_MF_VARIANT_KEY_MISMATCH_ERROR;
break;
}
case StaticErrorType::NonexhaustivePattern: {
status = U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
break;
}
case StaticErrorType::MissingSelectorAnnotation: {
status = U_MF_MISSING_SELECTOR_ANNOTATION_ERROR;
break;
}
case StaticErrorType::SyntaxError: {
status = U_MF_SYNTAX_ERROR;
break;
}
case StaticErrorType::UnsupportedStatementError: {
status = U_MF_UNSUPPORTED_STATEMENT_ERROR;
}
}
} else {
U_ASSERT(resolutionAndFormattingErrors->size() > 0);
switch (first().type) {
case DynamicErrorType::UnknownFunction: {
status = U_MF_UNKNOWN_FUNCTION_ERROR;
break;
}
case DynamicErrorType::UnresolvedVariable: {
status = U_MF_UNRESOLVED_VARIABLE_ERROR;
break;
}
case DynamicErrorType::FormattingError: {
status = U_MF_FORMATTING_ERROR;
break;
}
case DynamicErrorType::OperandMismatchError: {
status = U_MF_OPERAND_MISMATCH_ERROR;
break;
}
case DynamicErrorType::ReservedError: {
status = U_MF_UNSUPPORTED_EXPRESSION_ERROR;
break;
}
case DynamicErrorType::SelectorError: {
status = U_MF_SELECTOR_ERROR;
break;
}
}
}
}
void StaticErrors::addSyntaxError(UErrorCode& status) {
addError(StaticError(StaticErrorType::SyntaxError), status);
}
void StaticErrors::addError(StaticError&& e, UErrorCode& status) {
CHECK_ERROR(status);
void* errorP = static_cast<void*>(create<StaticError>(std::move(e), status));
U_ASSERT(syntaxAndDataModelErrors.isValid());
switch (e.type) {
case StaticErrorType::SyntaxError: {
syntaxError = true;
break;
}
case StaticErrorType::DuplicateDeclarationError: {
dataModelError = true;
break;
}
case StaticErrorType::DuplicateOptionName: {
dataModelError = true;
break;
}
case StaticErrorType::VariantKeyMismatchError: {
dataModelError = true;
break;
}
case StaticErrorType::NonexhaustivePattern: {
dataModelError = true;
break;
}
case StaticErrorType::MissingSelectorAnnotation: {
missingSelectorAnnotationError = true;
dataModelError = true;
break;
}
case StaticErrorType::UnsupportedStatementError: {
dataModelError = true;
break;
}
}
syntaxAndDataModelErrors->adoptElement(errorP, status);
}
void DynamicErrors::addError(DynamicError&& e, UErrorCode& status) {
CHECK_ERROR(status);
void* errorP = static_cast<void*>(create<DynamicError>(std::move(e), status));
U_ASSERT(resolutionAndFormattingErrors.isValid());
switch (e.type) {
case DynamicErrorType::UnresolvedVariable: {
unresolvedVariableError = true;
resolutionAndFormattingErrors->adoptElement(errorP, status);
break;
}
case DynamicErrorType::FormattingError: {
formattingError = true;
resolutionAndFormattingErrors->adoptElement(errorP, status);
break;
}
case DynamicErrorType::OperandMismatchError: {
formattingError = true;
resolutionAndFormattingErrors->adoptElement(errorP, status);
break;
}
case DynamicErrorType::ReservedError: {
resolutionAndFormattingErrors->adoptElement(errorP, status);
break;
}
case DynamicErrorType::SelectorError: {
selectorError = true;
resolutionAndFormattingErrors->adoptElement(errorP, status);
break;
}
case DynamicErrorType::UnknownFunction: {
unknownFunctionError = true;
resolutionAndFormattingErrors->adoptElement(errorP, status);
break;
}
}
}
const StaticError& StaticErrors::first() const {
U_ASSERT(syntaxAndDataModelErrors.isValid() && syntaxAndDataModelErrors->size() > 0);
return *static_cast<StaticError*>(syntaxAndDataModelErrors->elementAt(0));
}
StaticErrors::~StaticErrors() {}
DynamicErrors::~DynamicErrors() {}
template<typename ErrorType>
Error<ErrorType>::~Error() {}
template<>
Error<StaticErrorType>::~Error() {}
template<>
Error<DynamicErrorType>::~Error() {}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,155 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT2_ERRORS_H
#define MESSAGEFORMAT2_ERRORS_H
#if U_SHOW_CPLUSPLUS_API
/**
* \file
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
*/
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "uvector.h"
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// Errors
// ----------
class DynamicErrors;
class StaticErrors;
// Internal class -- used as a private field in MessageFormatter
template <typename ErrorType>
class Error : public UObject {
public:
Error(ErrorType ty) : type(ty) {}
Error(ErrorType ty, const UnicodeString& s) : type(ty), contents(s) {}
virtual ~Error();
private:
friend class DynamicErrors;
friend class StaticErrors;
ErrorType type;
UnicodeString contents;
}; // class Error
enum StaticErrorType {
DuplicateDeclarationError,
DuplicateOptionName,
MissingSelectorAnnotation,
NonexhaustivePattern,
SyntaxError,
UnsupportedStatementError,
VariantKeyMismatchError
};
enum DynamicErrorType {
UnresolvedVariable,
FormattingError,
OperandMismatchError,
ReservedError,
SelectorError,
UnknownFunction,
};
using StaticError = Error<StaticErrorType>;
using DynamicError = Error<DynamicErrorType>;
// These explicit instantiations have to come before the
// destructor definitions
template<>
Error<StaticErrorType>::~Error();
template<>
Error<DynamicErrorType>::~Error();
class StaticErrors : public UObject {
private:
friend class DynamicErrors;
LocalPointer<UVector> syntaxAndDataModelErrors;
bool dataModelError = false;
bool missingSelectorAnnotationError = false;
bool syntaxError = false;
public:
StaticErrors(UErrorCode&);
void setMissingSelectorAnnotation(UErrorCode&);
void setDuplicateOptionName(UErrorCode&);
void addSyntaxError(UErrorCode&);
bool hasDataModelError() const { return dataModelError; }
bool hasSyntaxError() const { return syntaxError; }
bool hasMissingSelectorAnnotationError() const { return missingSelectorAnnotationError; }
void addError(StaticError&&, UErrorCode&);
void checkErrors(UErrorCode&);
const StaticError& first() const;
StaticErrors(const StaticErrors&, UErrorCode&);
StaticErrors(StaticErrors&&) noexcept;
virtual ~StaticErrors();
}; // class StaticErrors
class DynamicErrors : public UObject {
private:
const StaticErrors& staticErrors;
LocalPointer<UVector> resolutionAndFormattingErrors;
bool formattingError = false;
bool selectorError = false;
bool unknownFunctionError = false;
bool unresolvedVariableError = false;
public:
DynamicErrors(const StaticErrors&, UErrorCode&);
int32_t count() const;
void setSelectorError(const FunctionName&, UErrorCode&);
void setReservedError(UErrorCode&);
void setUnresolvedVariable(const VariableName&, UErrorCode&);
void setUnknownFunction(const FunctionName&, UErrorCode&);
void setFormattingError(const FunctionName&, UErrorCode&);
// Used when the name of the offending formatter is unknown
void setFormattingError(UErrorCode&);
void setOperandMismatchError(const FunctionName&, UErrorCode&);
bool hasDataModelError() const { return staticErrors.hasDataModelError(); }
bool hasFormattingError() const { return formattingError; }
bool hasSelectorError() const { return selectorError; }
bool hasSyntaxError() const { return staticErrors.hasSyntaxError(); }
bool hasUnknownFunctionError() const { return unknownFunctionError; }
bool hasMissingSelectorAnnotationError() const { return staticErrors.hasMissingSelectorAnnotationError(); }
bool hasUnresolvedVariableError() const { return unresolvedVariableError; }
void addError(DynamicError&&, UErrorCode&);
void checkErrors(UErrorCode&) const;
bool hasError() const;
bool hasStaticError() const;
const DynamicError& first() const;
virtual ~DynamicErrors();
}; // class DynamicErrors
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_ERRORS_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -0,0 +1,208 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "messageformat2_allocation.h"
#include "messageformat2_cached_formatters.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_macros.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
// Auxiliary data structures used during formatting a message
namespace message2 {
using namespace data_model;
// Functions
// -------------
ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) {
name = std::move(other.name);
value = std::move(other.value);
}
ResolvedFunctionOption::~ResolvedFunctionOption() {}
const ResolvedFunctionOption* FunctionOptions::getResolvedFunctionOptions(int32_t& len) const {
len = functionOptionsLen;
U_ASSERT(len == 0 || options != nullptr);
return options;
}
FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) {
CHECK_ERROR(status);
options = moveVectorToArray<ResolvedFunctionOption>(optionsVector, functionOptionsLen);
if (options == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
UBool FunctionOptions::getFunctionOption(const UnicodeString& key, Formattable& option) const {
if (options == nullptr) {
U_ASSERT(functionOptionsLen == 0);
}
for (int32_t i = 0; i < functionOptionsLen; i++) {
const ResolvedFunctionOption& opt = options[i];
if (opt.getName() == key) {
option = opt.getValue();
return true;
}
}
return false;
}
UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) const {
Formattable option;
if (getFunctionOption(key, option)) {
if (option.getType() == UFMT_STRING) {
UErrorCode localErrorCode = U_ZERO_ERROR;
UnicodeString val = option.getString(localErrorCode);
U_ASSERT(U_SUCCESS(localErrorCode));
return val;
}
}
// For anything else, including non-string values, return "".
// Alternately, could try to stringify the non-string option.
// (Currently, no tests require that.)
return {};
}
FunctionOptions& FunctionOptions::operator=(FunctionOptions&& other) noexcept {
functionOptionsLen = other.functionOptionsLen;
options = other.options;
other.functionOptionsLen = 0;
other.options = nullptr;
return *this;
}
FunctionOptions::FunctionOptions(FunctionOptions&& other) {
*this = std::move(other);
}
FunctionOptions::~FunctionOptions() {
if (options != nullptr) {
delete[] options;
}
}
// ResolvedSelector
// ----------------
ResolvedSelector::ResolvedSelector(const FunctionName& fn,
Selector* sel,
FunctionOptions&& opts,
FormattedPlaceholder&& val)
: selectorName(fn), selector(sel), options(std::move(opts)), value(std::move(val)) {
U_ASSERT(sel != nullptr);
}
ResolvedSelector::ResolvedSelector(FormattedPlaceholder&& val) : value(std::move(val)) {}
ResolvedSelector& ResolvedSelector::operator=(ResolvedSelector&& other) noexcept {
selectorName = std::move(other.selectorName);
selector.adoptInstead(other.selector.orphan());
options = std::move(other.options);
value = std::move(other.value);
return *this;
}
ResolvedSelector::ResolvedSelector(ResolvedSelector&& other) {
*this = std::move(other);
}
ResolvedSelector::~ResolvedSelector() {}
// PrioritizedVariant
// ------------------
UBool PrioritizedVariant::operator<(const PrioritizedVariant& other) const {
if (priority < other.priority) {
return true;
}
return false;
}
PrioritizedVariant::~PrioritizedVariant() {}
// ---------------- Environments and closures
Environment* Environment::create(const VariableName& var, Closure&& c, Environment* parent, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);
Environment* result = new NonEmptyEnvironment(var, std::move(c), parent);
if (result == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
return result;
}
Environment* Environment::create(UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);
Environment* result = new EmptyEnvironment();
if (result == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
return result;
}
const Closure& EmptyEnvironment::lookup(const VariableName& v) const {
(void) v;
U_ASSERT(false);
UPRV_UNREACHABLE_EXIT;
}
const Closure& NonEmptyEnvironment::lookup(const VariableName& v) const {
if (v == var) {
return rhs;
}
return parent->lookup(v);
}
bool EmptyEnvironment::has(const VariableName& v) const {
(void) v;
return false;
}
bool NonEmptyEnvironment::has(const VariableName& v) const {
if (v == var) {
return true;
}
return parent->has(v);
}
Environment::~Environment() {}
NonEmptyEnvironment::~NonEmptyEnvironment() {}
EmptyEnvironment::~EmptyEnvironment() {}
Closure::~Closure() {}
CachedFormatters::~CachedFormatters() {}
// MessageContext methods
void MessageContext::checkErrors(UErrorCode& status) const {
CHECK_ERROR(status);
errors.checkErrors(status);
}
const Formattable* MessageContext::getGlobal(const VariableName& v, UErrorCode& errorCode) const {
return arguments.getArgument(v, errorCode);
}
MessageContext::MessageContext(const MessageArguments& args,
const StaticErrors& e,
UErrorCode& status) : arguments(args), errors(e, status) {}
MessageContext::~MessageContext() {}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,203 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT2_EVALUATION_H
#define MESSAGEFORMAT2_EVALUATION_H
#if U_SHOW_CPLUSPLUS_API
/**
* \file
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
*/
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model.h"
#include "unicode/messageformat2_function_registry.h"
#include "messageformat2_errors.h"
// Auxiliary data structures used during formatting a message
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// PrioritizedVariant
// For how this class is used, see the references to (integer, variant) tuples
// in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
class PrioritizedVariant : public UObject {
public:
PrioritizedVariant() = default;
PrioritizedVariant(PrioritizedVariant&&) = default;
PrioritizedVariant& operator=(PrioritizedVariant&&) noexcept = default;
UBool operator<(const PrioritizedVariant&) const;
int32_t priority;
/* const */ SelectorKeys keys;
/* const */ Pattern pat;
PrioritizedVariant(uint32_t p,
const SelectorKeys& k,
const Pattern& pattern) noexcept : priority(p), keys(k), pat(pattern) {}
virtual ~PrioritizedVariant();
}; // class PrioritizedVariant
static inline int32_t comparePrioritizedVariants(UElement left, UElement right) {
const PrioritizedVariant& tuple1 = *(static_cast<const PrioritizedVariant*>(left.pointer));
const PrioritizedVariant& tuple2 = *(static_cast<const PrioritizedVariant*>(right.pointer));
if (tuple1 < tuple2) {
return -1;
}
if (tuple1.priority == tuple2.priority) {
return 0;
}
return 1;
}
// Encapsulates a value to be scrutinized by a `match` with its resolved
// options and the name of the selector
class ResolvedSelector : public UObject {
public:
ResolvedSelector() {}
ResolvedSelector(const FunctionName& fn,
Selector* selector,
FunctionOptions&& options,
FormattedPlaceholder&& value);
// Used either for errors, or when selector isn't yet known
explicit ResolvedSelector(FormattedPlaceholder&& value);
bool hasSelector() const { return selector.isValid(); }
const FormattedPlaceholder& argument() const { return value; }
FormattedPlaceholder&& takeArgument() { return std::move(value); }
const Selector* getSelector() {
U_ASSERT(selector.isValid());
return selector.getAlias();
}
FunctionOptions&& takeOptions() {
return std::move(options);
}
const FunctionName& getSelectorName() const { return selectorName; }
virtual ~ResolvedSelector();
ResolvedSelector& operator=(ResolvedSelector&&) noexcept;
ResolvedSelector(ResolvedSelector&&);
private:
FunctionName selectorName; // For error reporting
LocalPointer<Selector> selector;
FunctionOptions options;
FormattedPlaceholder value;
}; // class ResolvedSelector
// Closures and environments
// -------------------------
class Environment;
// A closure represents the right-hand side of a variable
// declaration, along with an environment giving values
// to its free variables
class Closure : public UMemory {
public:
const Expression& getExpr() const {
return expr;
}
const Environment& getEnv() const {
return env;
}
Closure(const Expression& expression, const Environment& environment) : expr(expression), env(environment) {}
Closure(Closure&&) = default;
virtual ~Closure();
private:
// An unevaluated expression
const Expression& expr;
// The environment mapping names used in this
// expression to other expressions
const Environment& env;
};
// An environment is represented as a linked chain of
// non-empty environments, terminating at an empty environment.
// It's searched using linear search.
class Environment : public UMemory {
public:
virtual bool has(const VariableName&) const = 0;
virtual const Closure& lookup(const VariableName&) const = 0;
static Environment* create(UErrorCode&);
static Environment* create(const VariableName&, Closure&&, Environment*, UErrorCode&);
virtual ~Environment();
};
class NonEmptyEnvironment;
class EmptyEnvironment : public Environment {
public:
EmptyEnvironment() = default;
virtual ~EmptyEnvironment();
private:
friend class Environment;
bool has(const VariableName&) const override;
const Closure& lookup(const VariableName&) const override;
static EmptyEnvironment* create(UErrorCode&);
static NonEmptyEnvironment* create(const VariableName&, Closure&&, Environment*, UErrorCode&);
};
class NonEmptyEnvironment : public Environment {
private:
friend class Environment;
bool has(const VariableName&) const override;
const Closure& lookup(const VariableName&) const override;
static NonEmptyEnvironment* create(const VariableName&, Closure&&, const Environment*, UErrorCode&);
virtual ~NonEmptyEnvironment();
private:
friend class Environment;
NonEmptyEnvironment(const VariableName& v, Closure&& c, Environment* e) : var(v), rhs(std::move(c)), parent(e) {}
// Maps VariableName onto Closure*
// Chain of linked environments
VariableName var;
Closure rhs;
const LocalPointer<Environment> parent;
};
// The context contains all the information needed to process
// an entire message: arguments, formatter cache, and error list
class MessageContext : public UMemory {
public:
MessageContext(const MessageArguments&, const StaticErrors&, UErrorCode&);
const Formattable* getGlobal(const VariableName&, UErrorCode&) const;
// If any errors were set, update `status` accordingly
void checkErrors(UErrorCode& status) const;
DynamicErrors& getErrors() { return errors; }
virtual ~MessageContext();
private:
const MessageArguments& arguments; // External message arguments
// Errors accumulated during parsing/formatting
DynamicErrors errors;
}; // class MessageContext
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_EVALUATION_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -0,0 +1,334 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_formattable.h"
#include "unicode/smpdtfmt.h"
#include "messageformat2_macros.h"
#include "limits.h"
U_NAMESPACE_BEGIN
namespace message2 {
// Fallback values are enclosed in curly braces;
// see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#formatting-fallback-values
static UnicodeString fallbackToString(const UnicodeString& s) {
UnicodeString result;
result += LEFT_CURLY_BRACE;
result += s;
result += RIGHT_CURLY_BRACE;
return result;
}
Formattable& Formattable::operator=(Formattable other) noexcept {
swap(*this, other);
return *this;
}
Formattable::Formattable(const Formattable& other) {
contents = other.contents;
holdsDate = other.holdsDate;
}
Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) {
Formattable f;
// The relevant overload of the StringPiece constructor
// casts the string length to int32_t, so we have to check
// that the length makes sense
if (number.size() > INT_MAX) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
f.contents = icu::Formattable(StringPiece(number), status);
}
return f;
}
UFormattableType Formattable::getType() const {
if (std::holds_alternative<double>(contents)) {
return holdsDate ? UFMT_DATE : UFMT_DOUBLE;
}
if (std::holds_alternative<int64_t>(contents)) {
return UFMT_INT64;
}
if (std::holds_alternative<UnicodeString>(contents)) {
return UFMT_STRING;
}
if (isDecimal()) {
switch (std::get_if<icu::Formattable>(&contents)->getType()) {
case icu::Formattable::Type::kLong: {
return UFMT_LONG;
}
case icu::Formattable::Type::kDouble: {
return UFMT_DOUBLE;
}
default: {
return UFMT_INT64;
}
}
}
if (std::holds_alternative<const FormattableObject*>(contents)) {
return UFMT_OBJECT;
}
return UFMT_ARRAY;
}
const Formattable* Formattable::getArray(int32_t& len, UErrorCode& status) const {
NULL_ON_ERROR(status);
if (getType() != UFMT_ARRAY) {
len = 0;
status = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
const std::pair<const Formattable*, int32_t>& p = *std::get_if<std::pair<const Formattable*, int32_t>>(&contents);
U_ASSERT(p.first != nullptr);
len = p.second;
return p.first;
}
int64_t Formattable::getInt64(UErrorCode& status) const {
if (isDecimal() && isNumeric()) {
return std::get_if<icu::Formattable>(&contents)->getInt64(status);
}
switch (getType()) {
case UFMT_LONG:
case UFMT_INT64: {
return *std::get_if<int64_t>(&contents);
}
case UFMT_DOUBLE: {
return icu::Formattable(*std::get_if<double>(&contents)).getInt64(status);
}
default: {
status = U_INVALID_FORMAT_ERROR;
return 0;
}
}
}
icu::Formattable Formattable::asICUFormattable(UErrorCode& status) const {
if (U_FAILURE(status)) {
return {};
}
// Type must not be UFMT_ARRAY or UFMT_OBJECT
if (getType() == UFMT_ARRAY || getType() == UFMT_OBJECT) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return {};
}
if (isDecimal()) {
return *std::get_if<icu::Formattable>(&contents);
}
switch (getType()) {
case UFMT_DATE: {
return icu::Formattable(*std::get_if<double>(&contents), icu::Formattable::kIsDate);
}
case UFMT_DOUBLE: {
return icu::Formattable(*std::get_if<double>(&contents));
}
case UFMT_LONG: {
return icu::Formattable(static_cast<int32_t>(*std::get_if<double>(&contents)));
}
case UFMT_INT64: {
return icu::Formattable(*std::get_if<int64_t>(&contents));
}
case UFMT_STRING: {
return icu::Formattable(*std::get_if<UnicodeString>(&contents));
}
default: {
// Already checked for UFMT_ARRAY and UFMT_OBJECT
return icu::Formattable();
}
}
}
Formattable::~Formattable() {}
FormattableObject::~FormattableObject() {}
FormattedMessage::~FormattedMessage() {}
FormattedValue::FormattedValue(const UnicodeString& s) {
type = kString;
stringOutput = std::move(s);
}
FormattedValue::FormattedValue(number::FormattedNumber&& n) {
type = kNumber;
numberOutput = std::move(n);
}
FormattedValue& FormattedValue::operator=(FormattedValue&& other) noexcept {
type = other.type;
if (type == kString) {
stringOutput = std::move(other.stringOutput);
} else {
numberOutput = std::move(other.numberOutput);
}
return *this;
}
FormattedValue::~FormattedValue() {}
FormattedPlaceholder& FormattedPlaceholder::operator=(FormattedPlaceholder&& other) noexcept {
type = other.type;
source = other.source;
if (type == kEvaluated) {
formatted = std::move(other.formatted);
previousOptions = std::move(other.previousOptions);
}
fallback = other.fallback;
return *this;
}
const Formattable& FormattedPlaceholder::asFormattable() const {
return source;
}
// Default formatters
// ------------------
number::FormattedNumber formatNumberWithDefaults(const Locale& locale, double toFormat, UErrorCode& errorCode) {
return number::NumberFormatter::withLocale(locale).formatDouble(toFormat, errorCode);
}
number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int32_t toFormat, UErrorCode& errorCode) {
return number::NumberFormatter::withLocale(locale).formatInt(toFormat, errorCode);
}
number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int64_t toFormat, UErrorCode& errorCode) {
return number::NumberFormatter::withLocale(locale).formatInt(toFormat, errorCode);
}
number::FormattedNumber formatNumberWithDefaults(const Locale& locale, StringPiece toFormat, UErrorCode& errorCode) {
return number::NumberFormatter::withLocale(locale).formatDecimal(toFormat, errorCode);
}
DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);
LocalPointer<DateFormat> df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale));
if (!df.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
return df.orphan();
}
void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) {
CHECK_ERROR(errorCode);
LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode));
CHECK_ERROR(errorCode);
df->format(date, result, 0, errorCode);
}
// Called when output is required and the contents are an unevaluated `Formattable`;
// formats the source `Formattable` to a string with defaults, if it can be
// formatted with a default formatter
static FormattedPlaceholder formatWithDefaults(const Locale& locale, const FormattedPlaceholder& input, UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
}
const Formattable& toFormat = input.asFormattable();
// Try as decimal number first
if (toFormat.isNumeric()) {
// Note: the ICU Formattable has to be created here since the StringPiece
// refers to state inside the Formattable; so otherwise we'll have a reference
// to a temporary object
icu::Formattable icuFormattable = toFormat.asICUFormattable(status);
StringPiece asDecimal = icuFormattable.getDecimalNumber(status);
if (U_FAILURE(status)) {
return {};
}
if (asDecimal != nullptr) {
return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, asDecimal, status)));
}
}
UFormattableType type = toFormat.getType();
switch (type) {
case UFMT_DATE: {
UnicodeString result;
UDate d = toFormat.getDate(status);
U_ASSERT(U_SUCCESS(status));
formatDateWithDefaults(locale, d, result, status);
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
}
case UFMT_DOUBLE: {
double d = toFormat.getDouble(status);
U_ASSERT(U_SUCCESS(status));
return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status)));
}
case UFMT_LONG: {
int32_t l = toFormat.getLong(status);
U_ASSERT(U_SUCCESS(status));
return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status)));
}
case UFMT_INT64: {
int64_t i = toFormat.getInt64Value(status);
U_ASSERT(U_SUCCESS(status));
return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status)));
}
case UFMT_STRING: {
const UnicodeString& s = toFormat.getString(status);
U_ASSERT(U_SUCCESS(status));
return FormattedPlaceholder(input, FormattedValue(UnicodeString(s)));
}
default: {
// No default formatters for other types; use fallback
status = U_MF_FORMATTING_ERROR;
// Note: it would be better to set an internal formatting error so that a string
// (e.g. the type tag) can be provided. However, this method is called by the
// public method formatToString() and thus can't take a MessageContext
return FormattedPlaceholder(input.getFallback());
}
}
}
// Called when string output is required; forces output to be produced
// if none is present (including formatting number output as a string)
UnicodeString FormattedPlaceholder::formatToString(const Locale& locale,
UErrorCode& status) const {
if (U_FAILURE(status)) {
return {};
}
if (isFallback() || isNullOperand()) {
return fallbackToString(fallback);
}
// Evaluated value: either just return the string, or format the number
// as a string and return it
if (isEvaluated()) {
if (formatted.isString()) {
return formatted.getString();
} else {
return formatted.getNumber().toString(status);
}
}
// Unevaluated value: first evaluate it fully, then format
UErrorCode savedStatus = status;
FormattedPlaceholder evaluated = formatWithDefaults(locale, *this, status);
if (status == U_MF_FORMATTING_ERROR) {
U_ASSERT(evaluated.isFallback());
return evaluated.getFallback();
}
// Ignore U_USING_DEFAULT_WARNING
if (status == U_USING_DEFAULT_WARNING) {
status = savedStatus;
}
return evaluated.formatToString(locale, status);
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,353 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2.h"
#include "messageformat2_allocation.h"
#include "messageformat2_cached_formatters.h"
#include "messageformat2_checker.h"
#include "messageformat2_errors.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
#include "messageformat2_parser.h"
#include "messageformat2_serializer.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
namespace message2 {
// MessageFormatter::Builder
// -------------------------------------
// Creates a MessageFormat instance based on the pattern.
MessageFormatter::Builder& MessageFormatter::Builder::setPattern(const UnicodeString& pat, UParseError& parseError, UErrorCode& errorCode) {
normalizedInput.remove();
// Parse the pattern
MFDataModel::Builder tree(errorCode);
Parser(pat, tree, *errors, normalizedInput).parse(parseError, errorCode);
// Build the data model based on what was parsed
dataModel = tree.build(errorCode);
hasDataModel = true;
hasPattern = true;
pattern = pat;
return *this;
}
// Precondition: `reg` is non-null
// Does not adopt `reg`
MessageFormatter::Builder& MessageFormatter::Builder::setFunctionRegistry(const MFFunctionRegistry& reg) {
customMFFunctionRegistry = &reg;
return *this;
}
MessageFormatter::Builder& MessageFormatter::Builder::setLocale(const Locale& loc) {
locale = loc;
return *this;
}
MessageFormatter::Builder& MessageFormatter::Builder::setDataModel(MFDataModel&& newDataModel) {
normalizedInput.remove();
delete errors;
errors = nullptr;
hasPattern = false;
hasDataModel = true;
dataModel = std::move(newDataModel);
return *this;
}
/*
This build() method is non-destructive, which entails the risk that
its borrowed MFFunctionRegistry and (if the setDataModel() method was called)
MFDataModel pointers could become invalidated.
*/
MessageFormatter MessageFormatter::Builder::build(UErrorCode& errorCode) const {
return MessageFormatter(*this, errorCode);
}
MessageFormatter::Builder::Builder(UErrorCode& errorCode) : locale(Locale::getDefault()), customMFFunctionRegistry(nullptr) {
// Initialize errors
errors = new StaticErrors(errorCode);
CHECK_ERROR(errorCode);
if (errors == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
}
MessageFormatter::Builder::~Builder() {
if (errors != nullptr) {
delete errors;
}
}
// MessageFormatter
MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) {
CHECK_ERROR(success);
// Set up the standard function registry
MFFunctionRegistry::Builder standardFunctionsBuilder(success);
FormatterFactory* dateTime = StandardFunctions::DateTimeFactory::dateTime(success);
FormatterFactory* date = StandardFunctions::DateTimeFactory::date(success);
FormatterFactory* time = StandardFunctions::DateTimeFactory::time(success);
FormatterFactory* number = new StandardFunctions::NumberFactory();
FormatterFactory* integer = new StandardFunctions::IntegerFactory();
standardFunctionsBuilder.adoptFormatter(FunctionName(UnicodeString("datetime")), dateTime, success)
.adoptFormatter(FunctionName(UnicodeString("date")), date, success)
.adoptFormatter(FunctionName(UnicodeString("time")), time, success)
.adoptFormatter(FunctionName(UnicodeString("number")), number, success)
.adoptFormatter(FunctionName(UnicodeString("integer")), integer, success)
.adoptSelector(FunctionName(UnicodeString("number")), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success)
.adoptSelector(FunctionName(UnicodeString("integer")), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success)
.adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success);
CHECK_ERROR(success);
standardMFFunctionRegistry = standardFunctionsBuilder.build();
CHECK_ERROR(success);
standardMFFunctionRegistry.checkStandard();
normalizedInput = builder.normalizedInput;
// Build data model
// First, check that there is a data model
// (which might have been set by setDataModel(), or to
// the data model parsed from the pattern by setPattern())
if (!builder.hasDataModel) {
success = U_INVALID_STATE_ERROR;
return;
}
dataModel = builder.dataModel;
if (builder.errors != nullptr) {
errors = new StaticErrors(*builder.errors, success);
} else {
// Initialize errors
LocalPointer<StaticErrors> errorsNew(new StaticErrors(success));
CHECK_ERROR(success);
errors = errorsNew.orphan();
}
// Initialize formatter cache
cachedFormatters = new CachedFormatters();
if (cachedFormatters == nullptr) {
success = U_MEMORY_ALLOCATION_ERROR;
return;
}
// Note: we currently evaluate variables lazily,
// without memoization. This call is still necessary
// to check out-of-scope uses of local variables in
// right-hand sides (unresolved variable errors can
// only be checked when arguments are known)
// Check for resolution errors
Checker(dataModel, *errors).check(success);
}
void MessageFormatter::cleanup() noexcept {
if (cachedFormatters != nullptr) {
delete cachedFormatters;
}
if (errors != nullptr) {
delete errors;
}
}
MessageFormatter& MessageFormatter::operator=(MessageFormatter&& other) noexcept {
cleanup();
locale = std::move(other.locale);
standardMFFunctionRegistry = std::move(other.standardMFFunctionRegistry);
customMFFunctionRegistry = other.customMFFunctionRegistry;
dataModel = std::move(other.dataModel);
normalizedInput = std::move(other.normalizedInput);
cachedFormatters = other.cachedFormatters;
other.cachedFormatters = nullptr;
errors = other.errors;
other.errors = nullptr;
return *this;
}
const MFDataModel& MessageFormatter::getDataModel() const { return dataModel; }
UnicodeString MessageFormatter::getPattern() const {
// Converts the current data model back to a string
UnicodeString result;
Serializer serializer(getDataModel(), result);
serializer.serialize();
return result;
}
// Precondition: custom function registry exists
const MFFunctionRegistry& MessageFormatter::getCustomMFFunctionRegistry() const {
U_ASSERT(hasCustomMFFunctionRegistry());
return *customMFFunctionRegistry;
}
MessageFormatter::~MessageFormatter() {
cleanup();
}
// Selector and formatter lookup
// -----------------------------
// Postcondition: selector != nullptr || U_FAILURE(status)
Selector* MessageFormatter::getSelector(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
NULL_ON_ERROR(status);
U_ASSERT(isSelector(functionName));
const SelectorFactory* selectorFactory = lookupSelectorFactory(context, functionName, status);
NULL_ON_ERROR(status);
if (selectorFactory == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
// Create a specific instance of the selector
auto result = selectorFactory->createSelector(getLocale(), status);
NULL_ON_ERROR(status);
return result;
}
// Precondition: formatter is defined
const Formatter& MessageFormatter::getFormatter(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
U_ASSERT(isFormatter(functionName));
return *maybeCachedFormatter(context, functionName, status);
}
bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const {
U_ASSERT(hasCustomMFFunctionRegistry());
const MFFunctionRegistry& reg = getCustomMFFunctionRegistry();
return reg.getDefaultFormatterNameByType(type, name);
}
// ---------------------------------------------------
// Function registry
bool MessageFormatter::isBuiltInSelector(const FunctionName& functionName) const {
return standardMFFunctionRegistry.hasSelector(functionName);
}
bool MessageFormatter::isBuiltInFormatter(const FunctionName& functionName) const {
return standardMFFunctionRegistry.hasFormatter(functionName);
}
// https://github.com/unicode-org/message-format-wg/issues/409
// Unknown function = unknown function error
// Formatter used as selector = selector error
// Selector used as formatter = formatting error
const SelectorFactory* MessageFormatter::lookupSelectorFactory(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
DynamicErrors& err = context.getErrors();
if (isBuiltInSelector(functionName)) {
return standardMFFunctionRegistry.getSelector(functionName);
}
if (isBuiltInFormatter(functionName)) {
err.setSelectorError(functionName, status);
return nullptr;
}
if (hasCustomMFFunctionRegistry()) {
const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry();
const SelectorFactory* selectorFactory = customMFFunctionRegistry.getSelector(functionName);
if (selectorFactory != nullptr) {
return selectorFactory;
}
if (customMFFunctionRegistry.getFormatter(functionName) != nullptr) {
err.setSelectorError(functionName, status);
return nullptr;
}
}
// Either there is no custom function registry and the function
// isn't built-in, or the function doesn't exist in either the built-in
// or custom registry.
// Unknown function error
err.setUnknownFunction(functionName, status);
return nullptr;
}
// Returns non-owned pointer. Returns pointer rather than reference because it can fail.
// Returns non-const because FormatterFactory is mutable.
FormatterFactory* MessageFormatter::lookupFormatterFactory(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
DynamicErrors& err = context.getErrors();
if (isBuiltInFormatter(functionName)) {
return standardMFFunctionRegistry.getFormatter(functionName);
}
if (isBuiltInSelector(functionName)) {
err.setFormattingError(functionName, status);
return nullptr;
}
if (hasCustomMFFunctionRegistry()) {
const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry();
FormatterFactory* formatterFactory = customMFFunctionRegistry.getFormatter(functionName);
if (formatterFactory != nullptr) {
return formatterFactory;
}
if (customMFFunctionRegistry.getSelector(functionName) != nullptr) {
err.setFormattingError(functionName, status);
return nullptr;
}
}
// Either there is no custom function registry and the function
// isn't built-in, or the function doesn't exist in either the built-in
// or custom registry.
// Unknown function error
err.setUnknownFunction(functionName, status);
return nullptr;
}
bool MessageFormatter::isCustomFormatter(const FunctionName& fn) const {
return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFormatter(fn) != nullptr;
}
bool MessageFormatter::isCustomSelector(const FunctionName& fn) const {
return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getSelector(fn) != nullptr;
}
const Formatter* MessageFormatter::maybeCachedFormatter(MessageContext& context, const FunctionName& functionName, UErrorCode& errorCode) const {
NULL_ON_ERROR(errorCode);
U_ASSERT(cachedFormatters != nullptr);
const Formatter* result = cachedFormatters->getFormatter(functionName);
if (result == nullptr) {
// Create the formatter
// First, look up the formatter factory for this function
FormatterFactory* formatterFactory = lookupFormatterFactory(context, functionName, errorCode);
NULL_ON_ERROR(errorCode);
// If the formatter factory was null, there must have been
// an earlier error/warning
if (formatterFactory == nullptr) {
U_ASSERT(context.getErrors().hasUnknownFunctionError() || context.getErrors().hasFormattingError());
return nullptr;
}
// Create a specific instance of the formatter
Formatter* formatter = formatterFactory->createFormatter(locale, errorCode);
NULL_ON_ERROR(errorCode);
if (formatter == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
cachedFormatters->adoptFormatter(functionName, formatter, errorCode);
return formatter;
} else {
return result;
}
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,227 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT2_FUNCTION_REGISTRY_INTERNAL_H
#define MESSAGEFORMAT2_FUNCTION_REGISTRY_INTERNAL_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/datefmt.h"
#include "unicode/messageformat2_function_registry.h"
U_NAMESPACE_BEGIN
namespace message2 {
// Built-in functions
/*
The standard functions are :datetime, :date, :time,
:number, :integer, and :string,
per https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md
as of https://github.com/unicode-org/message-format-wg/releases/tag/LDML45-alpha
*/
class StandardFunctions {
friend class MessageFormatter;
static UnicodeString getStringOption(const FunctionOptions& opts,
const UnicodeString& optionName,
UErrorCode& errorCode);
class DateTime;
class DateTimeFactory : public FormatterFactory {
public:
Formatter* createFormatter(const Locale& locale, UErrorCode& status) override;
static DateTimeFactory* date(UErrorCode&);
static DateTimeFactory* time(UErrorCode&);
static DateTimeFactory* dateTime(UErrorCode&);
DateTimeFactory() = delete;
virtual ~DateTimeFactory();
private:
friend class DateTime;
typedef enum DateTimeType {
Date,
Time,
DateTime
} DateTimeType;
DateTimeType type;
DateTimeFactory(DateTimeType t) : type(t) {}
};
class DateTime : public Formatter {
public:
FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override;
virtual ~DateTime();
private:
const Locale& locale;
const DateTimeFactory::DateTimeType type;
friend class DateTimeFactory;
DateTime(const Locale& l, DateTimeFactory::DateTimeType t) : locale(l), type(t) {}
const LocalPointer<icu::DateFormat> icuFormatter;
/*
Looks up an option by name, first checking `opts`, then the cached options
in `toFormat` if applicable, and finally using a default
Ignores any options with non-string values
*/
UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat,
const FunctionOptions& opts,
const UnicodeString& optionName) const;
// Version for options that don't have defaults; sets the error
// code instead of returning a default value
UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat,
const FunctionOptions& opts,
const UnicodeString& optionName,
UErrorCode& errorCode) const;
};
// Note: IntegerFactory doesn't implement SelectorFactory;
// instead, an instance of PluralFactory is registered to the integer
// selector
// TODO
class IntegerFactory : public FormatterFactory {
public:
Formatter* createFormatter(const Locale& locale, UErrorCode& status) override;
virtual ~IntegerFactory();
};
class NumberFactory : public FormatterFactory {
public:
Formatter* createFormatter(const Locale& locale, UErrorCode& status) override;
virtual ~NumberFactory();
private:
friend class IntegerFactory;
static NumberFactory integer(const Locale& locale, UErrorCode& status);
};
class Number : public Formatter {
public:
FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override;
virtual ~Number();
private:
friend class NumberFactory;
friend class StandardFunctions;
Number(const Locale& loc) : locale(loc), icuFormatter(number::NumberFormatter::withLocale(loc)) {}
Number(const Locale& loc, bool isInt) : locale(loc), isInteger(isInt), icuFormatter(number::NumberFormatter::withLocale(loc)) {}
static Number integer(const Locale& loc);
// These options have their own accessor methods, since they have different default values.
int32_t maximumFractionDigits(const FunctionOptions& options) const;
int32_t minimumFractionDigits(const FunctionOptions& options) const;
int32_t minimumSignificantDigits(const FunctionOptions& options) const;
int32_t maximumSignificantDigits(const FunctionOptions& options) const;
int32_t minimumIntegerDigits(const FunctionOptions& options) const;
bool usePercent(const FunctionOptions& options) const;
const Locale& locale;
const bool isInteger = false;
const number::LocalizedNumberFormatter icuFormatter;
};
static number::LocalizedNumberFormatter formatterForOptions(const Number& number,
const FunctionOptions& opts,
UErrorCode& status);
class PluralFactory : public SelectorFactory {
public:
Selector* createSelector(const Locale& locale, UErrorCode& status) const override;
virtual ~PluralFactory();
private:
friend class IntegerFactory;
friend class MessageFormatter;
PluralFactory() {}
PluralFactory(bool isInt) : isInteger(isInt) {}
static PluralFactory integer() { return PluralFactory(true);}
const bool isInteger = false;
};
class Plural : public Selector {
public:
void selectKey(FormattedPlaceholder&& val,
FunctionOptions&& options,
const UnicodeString* keys,
int32_t keysLen,
UnicodeString* prefs,
int32_t& prefsLen,
UErrorCode& status) const override;
virtual ~Plural();
private:
friend class IntegerFactory;
friend class PluralFactory;
// Can't use UPluralType for this since we want to include
// exact matching as an option
typedef enum PluralType {
PLURAL_ORDINAL,
PLURAL_CARDINAL,
PLURAL_EXACT
} PluralType;
Plural(const Locale& loc) : locale(loc) {}
Plural(const Locale& loc, bool isInt) : locale(loc), isInteger(isInt) {}
static Plural integer(const Locale& loc) { return Plural(loc, true); }
PluralType pluralType(const FunctionOptions& opts) const;
const Locale& locale;
const bool isInteger = false;
};
class TextFactory : public SelectorFactory {
public:
Selector* createSelector(const Locale& locale, UErrorCode& status) const override;
virtual ~TextFactory();
};
class TextSelector : public Selector {
public:
void selectKey(FormattedPlaceholder&& val,
FunctionOptions&& options,
const UnicodeString* keys,
int32_t keysLen,
UnicodeString* prefs,
int32_t& prefsLen,
UErrorCode& status) const override;
virtual ~TextSelector();
private:
friend class TextFactory;
// Formatting `value` to a string might require the locale
const Locale& locale;
TextSelector(const Locale& l) : locale(l) {}
};
};
extern void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString&, UErrorCode& errorCode);
extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, double toFormat, UErrorCode& errorCode);
extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int32_t toFormat, UErrorCode& errorCode);
extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int64_t toFormat, UErrorCode& errorCode);
extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, StringPiece toFormat, UErrorCode& errorCode);
extern DateFormat* defaultDateTimeInstance(const Locale&, UErrorCode&);
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_INTERNAL_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -0,0 +1,109 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT2_MACROS_H
#define MESSAGEFORMAT2_MACROS_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/format.h"
#include "unicode/unistr.h"
#include "plurrule_impl.h"
U_NAMESPACE_BEGIN
namespace message2 {
using namespace pluralimpl;
// Tokens for parser and serializer
// Syntactically significant characters
#define LEFT_CURLY_BRACE ((UChar32)0x007B)
#define RIGHT_CURLY_BRACE ((UChar32)0x007D)
#define HTAB ((UChar32)0x0009)
#define CR ((UChar32)0x000D)
#define LF ((UChar32)0x000A)
#define IDEOGRAPHIC_SPACE ((UChar32)0x3000)
#define PIPE ((UChar32)0x007C)
#define EQUALS ((UChar32)0x003D)
#define DOLLAR ((UChar32)0x0024)
#define COLON ((UChar32)0x003A)
#define PLUS ((UChar32)0x002B)
#define HYPHEN ((UChar32)0x002D)
#define PERIOD ((UChar32)0x002E)
#define UNDERSCORE ((UChar32)0x005F)
#define LOWERCASE_E ((UChar32)0x0065)
#define UPPERCASE_E ((UChar32)0x0045)
// Reserved sigils
#define BANG ((UChar32)0x0021)
#define AT ((UChar32)0x0040)
#define PERCENT ((UChar32)0x0025)
#define CARET ((UChar32)0x005E)
#define AMPERSAND ((UChar32)0x0026)
#define LESS_THAN ((UChar32)0x003C)
#define GREATER_THAN ((UChar32)0x003E)
#define QUESTION ((UChar32)0x003F)
#define TILDE ((UChar32)0x007E)
// Fallback
#define REPLACEMENT ((UChar32) 0xFFFD)
// MessageFormat2 uses four keywords: `.input`, `.local`, `.when`, and `.match`.
static constexpr UChar32 ID_INPUT[] = {
0x2E, 0x69, 0x6E, 0x70, 0x75, 0x74, 0 /* ".input" */
};
static constexpr UChar32 ID_LOCAL[] = {
0x2E, 0x6C, 0x6F, 0x63, 0x61, 0x6C, 0 /* ".local" */
};
static constexpr UChar32 ID_MATCH[] = {
0x2E, 0x6D, 0x61, 0x74, 0x63, 0x68, 0 /* ".match" */
};
// Returns immediately if `errorCode` indicates failure
#define CHECK_ERROR(errorCode) \
if (U_FAILURE(errorCode)) { \
return; \
}
// Returns immediately if `errorCode` indicates failure
#define NULL_ON_ERROR(errorCode) \
if (U_FAILURE(errorCode)) { \
return nullptr; \
}
// Returns immediately if `errorCode` indicates failure
#define THIS_ON_ERROR(errorCode) \
if (U_FAILURE(errorCode)) { \
return *this; \
}
// Returns immediately if `errorCode` indicates failure
#define EMPTY_ON_ERROR(errorCode) \
if (U_FAILURE(errorCode)) { \
return {}; \
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_MACROS_H
#endif // U_HIDE_DEPRECATED_API
// eof

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,148 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT_PARSER_H
#define MESSAGEFORMAT_PARSER_H
#include "unicode/messageformat2_data_model.h"
#include "unicode/parseerr.h"
#include "messageformat2_allocation.h"
#include "messageformat2_errors.h"
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// Parser class (private)
class Parser : public UMemory {
public:
virtual ~Parser();
private:
friend class MessageFormatter;
void parse(UParseError&, UErrorCode&);
/*
Use an internal "parse error" structure to make it easier to translate
absolute offsets to line offsets.
This is translated back to a `UParseError` at the end of parsing.
*/
typedef struct MessageParseError {
// The line on which the error occurred
uint32_t line;
// The offset, relative to the erroneous line, on which the error occurred
uint32_t offset;
// The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0.
// It includes newline characters, because the index does too.
uint32_t lengthBeforeCurrentLine;
// This parser doesn't yet use the last two fields.
UChar preContext[U_PARSE_CONTEXT_LEN];
UChar postContext[U_PARSE_CONTEXT_LEN];
} MessageParseError;
Parser(const UnicodeString &input, MFDataModel::Builder& dataModelBuilder, StaticErrors& e, UnicodeString& normalizedInputRef)
: source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
parseError.line = 0;
parseError.offset = 0;
parseError.lengthBeforeCurrentLine = 0;
parseError.preContext[0] = '\0';
parseError.postContext[0] = '\0';
}
// Used so `parseEscapeSequence()` can handle all types of escape sequences
// (literal, text, and reserved)
typedef enum { LITERAL, TEXT, RESERVED } EscapeKind;
static void translateParseError(const MessageParseError&, UParseError&);
static void setParseError(MessageParseError&, uint32_t);
void maybeAdvanceLine();
Pattern parseSimpleMessage(UErrorCode&);
void parseBody(UErrorCode&);
void parseDeclarations(UErrorCode&);
void parseUnsupportedStatement(UErrorCode&);
void parseLocalDeclaration(UErrorCode&);
void parseInputDeclaration(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseWhitespaceMaybeRequired(bool, UErrorCode&);
void parseRequiredWhitespace(UErrorCode&);
void parseOptionalWhitespace(UErrorCode&);
void parseToken(UChar32, UErrorCode&);
void parseTokenWithWhitespace(UChar32, UErrorCode&);
template <int32_t N>
void parseToken(const UChar32 (&)[N], UErrorCode&);
template <int32_t N>
void parseTokenWithWhitespace(const UChar32 (&)[N], UErrorCode&);
bool nextIsMatch() const;
UnicodeString parseName(UErrorCode&);
UnicodeString parseIdentifier(UErrorCode&);
UnicodeString parseDigits(UErrorCode&);
VariableName parseVariableName(UErrorCode&);
FunctionName parseFunction(UErrorCode&);
void parseEscapeSequence(EscapeKind, UnicodeString&, UErrorCode&);
void parseLiteralEscape(UnicodeString&, UErrorCode&);
Literal parseUnquotedLiteral(UErrorCode&);
Literal parseQuotedLiteral(UErrorCode&);
Literal parseLiteral(UErrorCode&);
void parseAttribute(UVector&, UErrorCode&);
OptionMap parseAttributes(UErrorCode&);
void parseOption(Operator::Builder&, UErrorCode&);
void parseOption(UVector&, UErrorCode&);
void parseOptions(Operator::Builder&, UErrorCode&);
OptionMap parseOptions(UErrorCode&);
void parseReservedEscape(UnicodeString&, UErrorCode&);
void parseReservedChunk(Reserved::Builder&, UErrorCode&);
Reserved parseReserved(UErrorCode&);
Reserved parseReservedBody(Reserved::Builder&, UErrorCode&);
Operator parseAnnotation(UErrorCode&);
void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&);
Markup parseMarkup(UErrorCode&);
Expression parseExpression(UErrorCode&);
std::variant<Expression, Markup> parsePlaceholder(UErrorCode&);
void parseTextEscape(UnicodeString&, UErrorCode&);
UnicodeString parseText(UErrorCode&);
Key parseKey(UErrorCode&);
SelectorKeys parseNonEmptyKeys(UErrorCode&);
void errorPattern(UErrorCode& status);
Pattern parseQuotedPattern(UErrorCode&);
// The input string
const UnicodeString &source;
// The current position within the input string
uint32_t index;
// Represents the current line (and when an error is indicated),
// character offset within the line of the parse error
MessageParseError parseError;
// The structure to use for recording errors
StaticErrors& errors;
// Normalized version of the input string (optional whitespace removed)
UnicodeString& normalizedInput;
// The parent builder
MFDataModel::Builder &dataModel;
}; // class Parser
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_PARSER_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -0,0 +1,336 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_data_model.h"
#include "messageformat2_macros.h"
#include "messageformat2_serializer.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
namespace message2 {
// Generates a string representation of a data model
// ------------------------------------------------
using namespace data_model;
// Private helper methods
void Serializer::whitespace() {
result += SPACE;
}
void Serializer::emit(UChar32 c) {
result += c;
}
void Serializer::emit(const UnicodeString& s) {
result += s;
}
template <int32_t N>
void Serializer::emit(const UChar32 (&token)[N]) {
// Don't emit the terminator
for (int32_t i = 0; i < N - 1; i++) {
emit(token[i]);
}
}
void Serializer::emit(const Literal& l) {
if (l.isQuoted()) {
emit(PIPE);
const UnicodeString& contents = l.unquoted();
for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
// Re-escape any PIPE or BACKSLASH characters
switch(contents[i]) {
case BACKSLASH:
case PIPE: {
emit(BACKSLASH);
break;
}
default: {
break;
}
}
emit(contents[i]);
}
emit(PIPE);
} else {
emit(l.unquoted());
}
}
void Serializer::emit(const Key& k) {
if (k.isWildcard()) {
emit(ASTERISK);
return;
}
emit(k.asLiteral());
}
void Serializer::emit(const SelectorKeys& k) {
const Key* ks = k.getKeysInternal();
int32_t len = k.len;
// It would be an error for `keys` to be empty;
// that would mean this is the single `pattern`
// variant, and in that case, this method shouldn't be called
U_ASSERT(len > 0);
for (int32_t i = 0; i < len; i++) {
if (i != 0) {
whitespace();
}
emit(ks[i]);
}
}
void Serializer::emit(const Operand& rand) {
U_ASSERT(!rand.isNull());
if (rand.isVariable()) {
emit(DOLLAR);
emit(rand.asVariable());
} else {
// Literal: quoted or unquoted
emit(rand.asLiteral());
}
}
void Serializer::emit(const OptionMap& options) {
// Errors should have been checked before this point
UErrorCode localStatus = U_ZERO_ERROR;
U_ASSERT(!options.bogus);
for (int32_t i = 0; i < options.size(); i++) {
const Option& opt = options.getOption(i, localStatus);
// No need to check error code, since we already checked
// that !bogus
whitespace();
emit(opt.getName());
emit(EQUALS);
emit(opt.getValue());
}
}
void Serializer::emitAttributes(const OptionMap& attributes) {
// Errors should have been checked before this point
UErrorCode localStatus = U_ZERO_ERROR;
U_ASSERT(!attributes.bogus);
for (int32_t i = 0; i < attributes.size(); i++) {
const Option& attr = attributes.getOption(i, localStatus);
// No need to check error code, since we already checked
// that !bogus
whitespace();
emit(AT);
emit(attr.getName());
const Operand& v = attr.getValue();
if (!v.isNull()) {
emit(EQUALS);
emit(v);
}
}
}
void Serializer::emit(const Reserved& reserved) {
// Re-escape '\' / '{' / '|' / '}'
for (int32_t i = 0; i < reserved.numParts(); i++) {
const Literal& l = reserved.getPart(i);
if (l.isQuoted()) {
emit(l);
} else {
const UnicodeString& s = l.unquoted();
for (int32_t j = 0; ((int32_t) j) < s.length(); j++) {
switch(s[j]) {
case LEFT_CURLY_BRACE:
case PIPE:
case RIGHT_CURLY_BRACE:
case BACKSLASH: {
emit(BACKSLASH);
break;
}
default:
break;
}
emit(s[j]);
}
}
}
}
void Serializer::emit(const Expression& expr) {
emit(LEFT_CURLY_BRACE);
if (!expr.isReserved() && !expr.isFunctionCall()) {
// Literal or variable, no annotation
emit(expr.getOperand());
} else {
// Function call or reserved
if (!expr.isStandaloneAnnotation()) {
// Must be a function call that has an operand
emit(expr.getOperand());
whitespace();
}
UErrorCode localStatus = U_ZERO_ERROR;
const Operator* rator = expr.getOperator(localStatus);
U_ASSERT(U_SUCCESS(localStatus));
if (rator->isReserved()) {
const Reserved& reserved = rator->asReserved();
emit(reserved);
} else {
emit(COLON);
emit(rator->getFunctionName());
// No whitespace after function name, in case it has
// no options. (when there are options, emit(OptionMap) will
// emit the leading whitespace)
emit(rator->getOptionsInternal());
}
}
emitAttributes(expr.getAttributesInternal());
emit(RIGHT_CURLY_BRACE);
}
void Serializer::emit(const PatternPart& part) {
if (part.isText()) {
// Raw text
const UnicodeString& text = part.asText();
// Re-escape '{'/'}'/'\'
for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
switch(text[i]) {
case BACKSLASH:
case LEFT_CURLY_BRACE:
case RIGHT_CURLY_BRACE: {
emit(BACKSLASH);
break;
}
default:
break;
}
emit(text[i]);
}
return;
}
// Markup
if (part.isMarkup()) {
const Markup& markup = part.asMarkup();
emit(LEFT_CURLY_BRACE);
if (markup.isClose()) {
emit(SLASH);
} else {
emit(NUMBER_SIGN);
}
emit(markup.getName());
emit(markup.getOptionsInternal());
emitAttributes(markup.getAttributesInternal());
if (markup.isStandalone()) {
emit(SLASH);
}
emit(RIGHT_CURLY_BRACE);
return;
}
// Expression
emit(part.contents());
}
void Serializer::emit(const Pattern& pat) {
int32_t len = pat.numParts();
// Always quote pattern, which should match the normalized input
// if the parser is constructing it correctly
emit(LEFT_CURLY_BRACE);
emit(LEFT_CURLY_BRACE);
for (int32_t i = 0; i < len; i++) {
// No whitespace is needed here -- see the `pattern` nonterminal in the grammar
emit(pat.getPart(i));
}
emit(RIGHT_CURLY_BRACE);
emit(RIGHT_CURLY_BRACE);
}
void Serializer::serializeDeclarations() {
const Binding* bindings = dataModel.getLocalVariablesInternal();
U_ASSERT(bindings != nullptr);
for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
const Binding& b = bindings[i];
if (b.isLocal()) {
// No whitespace needed here -- see `message` in the grammar
emit(ID_LOCAL);
whitespace();
emit(DOLLAR);
emit(b.getVariable());
// No whitespace needed here -- see `local-declaration` in the grammar
emit(EQUALS);
// No whitespace needed here -- see `local-declaration` in the grammar
} else {
// Input declaration
emit(ID_INPUT);
// No whitespace needed here -- see `input-declaration` in the grammar
}
emit(b.getValue());
}
}
void Serializer::serializeUnsupported() {
const UnsupportedStatement* statements = dataModel.getUnsupportedStatementsInternal();
U_ASSERT(statements != nullptr);
for (int32_t i = 0; i < dataModel.unsupportedStatementsLen; i++) {
const UnsupportedStatement& s = statements[i];
emit(s.getKeyword());
UErrorCode localErrorCode = U_ZERO_ERROR;
const Reserved* r = s.getBody(localErrorCode);
if (U_SUCCESS(localErrorCode)) {
whitespace();
emit(*r);
}
const Expression* e = s.getExpressionsInternal();
for (int32_t j = 0; j < s.expressionsLen; j++) {
emit(e[j]);
}
}
}
void Serializer::serializeSelectors() {
U_ASSERT(!dataModel.hasPattern());
const Expression* selectors = dataModel.getSelectorsInternal();
emit(ID_MATCH);
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// No whitespace needed here -- see `selectors` in the grammar
emit(selectors[i]);
}
}
void Serializer::serializeVariants() {
U_ASSERT(!dataModel.hasPattern());
const Variant* variants = dataModel.getVariantsInternal();
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
const Variant& v = variants[i];
emit(v.getKeys());
// No whitespace needed here -- see `variant` in the grammar
emit(v.getPattern());
}
}
// Main (public) serializer method
void Serializer::serialize() {
serializeDeclarations();
serializeUnsupported();
// Pattern message
if (dataModel.hasPattern()) {
emit(dataModel.getPattern());
} else {
// Selectors message
serializeSelectors();
serializeVariants();
}
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,69 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT_SERIALIZER_H
#define MESSAGEFORMAT_SERIALIZER_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_data_model.h"
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// Serializer class (private)
// Converts a data model back to a string
// TODO: Should be private; made public so tests
// can use it
class U_I18N_API Serializer : public UMemory {
public:
Serializer(const MFDataModel& m, UnicodeString& s) : dataModel(m), result(s) {}
void serialize();
const MFDataModel& dataModel;
UnicodeString& result;
private:
void whitespace();
void emit(UChar32);
template <int32_t N>
void emit(const UChar32 (&)[N]);
void emit(const UnicodeString&);
void emit(const Literal&);
void emit(const Key&);
void emit(const SelectorKeys&);
void emit(const Operand&);
void emit(const Reserved&);
void emit(const Expression&);
void emit(const PatternPart&);
void emit(const Pattern&);
void emit(const Variant*);
void emitAttributes(const OptionMap&);
void emit(const OptionMap&);
void serializeUnsupported();
void serializeDeclarations();
void serializeSelectors();
void serializeVariants();
}; // class Serializer
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_SERIALIZER_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -94,6 +94,17 @@ measunit.cpp
measunit_extra.cpp
measure.cpp
msgfmt.cpp
messageformat2.cpp
messageformat2_arguments.cpp
messageformat2_checker.cpp
messageformat2_data_model.cpp
messageformat2_errors.cpp
messageformat2_evaluation.cpp
messageformat2_formatter.cpp
messageformat2_formattable.cpp
messageformat2_function_registry.cpp
messageformat2_parser.cpp
messageformat2_serializer.cpp
name2uni.cpp
nfrs.cpp
nfrule.cpp

View file

@ -0,0 +1,406 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#ifndef MESSAGEFORMAT2_H
#define MESSAGEFORMAT2_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
*/
#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/unistr.h"
#ifndef U_HIDE_DEPRECATED_API
U_NAMESPACE_BEGIN
namespace message2 {
class CachedFormatters;
class Environment;
class MessageContext;
class ResolvedSelector;
class StaticErrors;
/**
* <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0.
*
* <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the
* description of the syntax with examples and use cases</a> and the corresponding
* <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p>
*
* The MessageFormatter class is mutable and movable. It is not copyable.
* (It is mutable because if it has a custom function registry, the registry may include
* `FormatterFactory` objects implementing custom formatters, which are allowed to contain
* mutable state.)
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API MessageFormatter : public UObject {
// Note: This class does not currently inherit from the existing
// `Format` class.
public:
/**
* Move assignment operator:
* The source MessageFormatter will be left in a valid but undefined state.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MessageFormatter& operator=(MessageFormatter&&) noexcept;
/**
* Destructor.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual ~MessageFormatter();
/**
* Formats the message to a string, using the data model that was previously set or parsed,
* and the given `arguments` object.
*
* @param arguments Reference to message arguments
* @param status Input/output error code used to indicate syntax errors, data model
* errors, resolution errors, formatting errors, selection errors, as well
* as other errors (such as memory allocation failures). Partial output
* is still provided in the presence of most error types.
* @return The string result of formatting the message with the given arguments.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);
/**
* Not yet implemented; formats the message to a `FormattedMessage` object,
* using the data model that was previously set or parsed,
* and the given `arguments` object.
*
* @param arguments Reference to message arguments
* @param status Input/output error code used to indicate syntax errors, data model
* errors, resolution errors, formatting errors, selection errors, as well
* as other errors (such as memory allocation failures). Partial output
* is still provided in the presence of most error types.
* @return The `FormattedMessage` representing the formatted message.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
(void) arguments;
if (U_SUCCESS(status)) {
status = U_UNSUPPORTED_ERROR;
}
return FormattedMessage(status);
}
/**
* Accesses the locale that this `MessageFormatter` object was created with.
*
* @return A reference to the locale.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
const Locale& getLocale() const { return locale; }
/**
* Serializes the data model as a string in MessageFormat 2.0 syntax.
*
* @return result A string representation of the data model.
* The string is a valid MessageFormat 2.0 message.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
UnicodeString getPattern() const;
/**
* Accesses the data model referred to by this
* `MessageFormatter` object.
*
* @return A reference to the data model.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
const MFDataModel& getDataModel() const;
/**
* The mutable Builder class allows each part of the MessageFormatter to be initialized
* separately; calling its `build()` method yields an immutable MessageFormatter.
*
* Not copyable or movable.
*/
class U_I18N_API Builder : public UObject {
private:
friend class MessageFormatter;
// The pattern to be parsed to generate the formatted message
UnicodeString pattern;
bool hasPattern = false;
bool hasDataModel = false;
// The data model to be used to generate the formatted message
// Initialized either by `setDataModel()`, or by the parser
// through a call to `setPattern()`
MFDataModel dataModel;
// Normalized representation of the pattern;
// ignored if `setPattern()` wasn't called
UnicodeString normalizedInput;
// Errors (internal representation of parse errors)
// Ignored if `setPattern()` wasn't called
StaticErrors* errors;
Locale locale;
// Not owned
const MFFunctionRegistry* customMFFunctionRegistry;
public:
/**
* Sets the locale to use for formatting.
*
* @param locale The desired locale.
* @return A reference to the builder.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& setLocale(const Locale& locale);
/**
* Sets the pattern (contents of the message) and parses it
* into a data model. If a data model was
* previously set, it is removed.
*
* @param pattern A string in MessageFormat 2.0 syntax.
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* @param status Input/output error code. If the
* pattern cannot be parsed, set to failure code.
* @return A reference to the builder.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
/**
* Sets a custom function registry.
*
* @param functionRegistry Reference to the function registry to use.
* `functionRegistry` is not copied,
* and the caller must ensure its lifetime contains
* the lifetime of the `MessageFormatter` object built by this
* builder.
* @return A reference to the builder.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
/**
* Sets a data model. If a pattern was previously set, it is removed.
*
* @param dataModel Data model to format. Passed by move.
* @return A reference to the builder.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& setDataModel(MFDataModel&& dataModel);
/**
* Constructs a new immutable MessageFormatter using the pattern or data model
* that was previously set, and the locale (if it was previously set)
* or default locale (otherwise).
*
* The builder object (`this`) can still be used after calling `build()`.
*
* @param status Input/output error code. If neither the pattern
* nor the data model is set, set to failure code.
* @return The new MessageFormatter object
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MessageFormatter build(UErrorCode& status) const;
/**
* Default constructor.
* Returns a Builder with the default locale and with no
* data model or pattern set. Either `setPattern()`
* or `setDataModel()` has to be called before calling `build()`.
*
* @param status Input/output error code.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder(UErrorCode& status);
/**
* Destructor.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual ~Builder();
}; // class MessageFormatter::Builder
// TODO: Shouldn't be public; only used for testing
/**
* Returns a string consisting of the input with optional spaces removed.
*
* @return A normalized string representation of the input
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
private:
friend class Builder;
friend class MessageContext;
MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
MessageFormatter() = delete; // default constructor not implemented
// Do not define default assignment operator
const MessageFormatter &operator=(const MessageFormatter &) = delete;
ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const;
ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const;
// Selection methods
// Takes a vector of FormattedPlaceholders
void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
// Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
void filterVariants(const UVector&, UVector&, UErrorCode&) const;
// Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
void sortVariants(const UVector&, UVector&, UErrorCode&) const;
// Takes a vector of strings (input) and a vector of strings (output)
void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const;
// Takes a vector of FormattedPlaceholders (input),
// and a vector of vectors of strings (output)
void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
// Formatting methods
[[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
// Formats a call to a formatting function
// Dispatches on argument type
[[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument,
MessageContext& context,
UErrorCode& status) const;
// Dispatches on function name
[[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName,
FormattedPlaceholder&& argument,
FunctionOptions&& options,
MessageContext& context,
UErrorCode& status) const;
// Formats an expression that appears as a selector
ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const;
// Formats an expression that appears in a pattern or as the definition of a local variable
[[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const;
[[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
[[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
[[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
// Function registry methods
bool hasCustomMFFunctionRegistry() const {
return (customMFFunctionRegistry != nullptr);
}
// Precondition: custom function registry exists
// Note: this is non-const because the values in the MFFunctionRegistry are mutable
// (a FormatterFactory can have mutable state)
const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
bool isCustomFormatter(const FunctionName&) const;
FormatterFactory* lookupFormatterFactory(MessageContext&, const FunctionName&, UErrorCode& status) const;
bool isBuiltInSelector(const FunctionName&) const;
bool isBuiltInFormatter(const FunctionName&) const;
bool isCustomSelector(const FunctionName&) const;
const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
const Formatter* maybeCachedFormatter(MessageContext&, const FunctionName&, UErrorCode&) const;
Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
const Formatter& getFormatter(MessageContext&, const FunctionName&, UErrorCode&) const;
bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
// Checking for resolution errors
void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
void initErrors(UErrorCode&);
void clearErrors() const;
void cleanup() noexcept;
// The locale this MessageFormatter was created with
/* const */ Locale locale;
// Registry for built-in functions
MFFunctionRegistry standardMFFunctionRegistry;
// Registry for custom functions; may be null if no custom registry supplied
// Note: this is *not* owned by the MessageFormatter object
// The reason for this choice is to have a non-destructive MessageFormatter::Builder,
// while also not requiring the function registry to be deeply-copyable. Making the
// function registry copyable would impose a requirement on any implementations
// of the FormatterFactory and SelectorFactory interfaces to implement a custom
// clone() method, which is necessary to avoid sharing between copies of the
// function registry (and thus double-frees)
// Not deeply immutable (the values in the function registry are mutable,
// as a FormatterFactory can have mutable state
const MFFunctionRegistry* customMFFunctionRegistry;
// Data model, representing the parsed message
MFDataModel dataModel;
// Normalized version of the input string (optional whitespace removed)
UnicodeString normalizedInput;
// Formatter cache
// Must be a raw pointer to avoid including the internal header file
// defining CachedFormatters
// Owned by `this`
// TODO: This is an optimization that the "TemperatureFormatter" test
// (ported from ICU4J) was checking for; however, that test was removed
// in order to make `setFormatter()` safe, so maybe this should be
// removed too
CachedFormatters* cachedFormatters;
// Errors -- only used while parsing and checking for data model errors; then
// the MessageContext keeps track of errors
// Must be a raw pointer to avoid including the internal header file
// defining StaticErrors
// Owned by `this`
StaticErrors* errors;
}; // class MessageFormatter
} // namespace message2
U_NAMESPACE_END
#endif // U_HIDE_DEPRECATED_API
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_H
// eof

View file

@ -0,0 +1,143 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#ifndef MESSAGEFORMAT2_ARGUMENTS_H
#define MESSAGEFORMAT2_ARGUMENTS_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
*/
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_formattable.h"
#include "unicode/unistr.h"
#ifndef U_HIDE_DEPRECATED_API
#include <map>
U_NAMESPACE_BEGIN
/// @cond DOXYGEN_IGNORE
// Export an explicit template instantiation of the LocalPointer that is used as a
// data member of various MessageFormatDataModel classes.
// (When building DLLs for Windows this is required.)
// (See measunit_impl.h, datefmt.h, collationiterator.h, erarules.h and others
// for similar examples.)
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
#if defined(_MSC_VER)
// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!=
#pragma warning(push)
#pragma warning(disable: 4661)
#endif
template class U_I18N_API LocalPointerBase<UnicodeString>;
template class U_I18N_API LocalPointerBase<message2::Formattable>;
template class U_I18N_API LocalArray<UnicodeString>;
template class U_I18N_API LocalArray<message2::Formattable>;
#if defined(_MSC_VER)
// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!=
#pragma warning(pop)
#endif
#endif
/// @endcond
namespace message2 {
class MessageContext;
// Arguments
// ----------
/**
*
* The `MessageArguments` class represents the named arguments to a message.
* It is immutable and movable. It is not copyable.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API MessageArguments : public UObject {
public:
/**
* Message arguments constructor, which takes a map and returns a container
* of arguments that can be passed to a `MessageFormatter`.
*
* @param args A reference to a map from strings (argument names) to `message2::Formattable`
* objects (argument values). The keys and values of the map are copied into the result.
* @param status Input/output error code.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MessageArguments(const std::map<UnicodeString, Formattable>& args, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
argumentNames = LocalArray<UnicodeString>(new UnicodeString[argsLen = (int32_t) args.size()]);
arguments = LocalArray<Formattable>(new Formattable[argsLen]);
if (!argumentNames.isValid() || !arguments.isValid()) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t i = 0;
for (auto iter = args.begin(); iter != args.end(); ++iter) {
argumentNames[i] = iter->first;
arguments[i] = iter->second;
i++;
}
}
/**
* Move operator:
* The source MessageArguments will be left in a valid but undefined state.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MessageArguments& operator=(MessageArguments&&) noexcept;
/**
* Default constructor.
* Returns an empty arguments mapping.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MessageArguments() = default;
/**
* Destructor.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual ~MessageArguments();
private:
friend class MessageContext;
const Formattable* getArgument(const data_model::VariableName&, UErrorCode&) const;
// Avoids using Hashtable so that code constructing a Hashtable
// doesn't have to appear in this header file
LocalArray<UnicodeString> argumentNames;
LocalArray<Formattable> arguments;
int32_t argsLen = 0;
}; // class MessageArguments
} // namespace message2
U_NAMESPACE_END
#endif // U_HIDE_DEPRECATED_API
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_ARGUMENTS_H
// eof

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,38 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#ifndef MESSAGEFORMAT_DATA_MODEL_NAMES_H
#define MESSAGEFORMAT_DATA_MODEL_NAMES_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#ifndef U_HIDE_DEPRECATED_API
U_NAMESPACE_BEGIN
namespace message2 {
namespace data_model {
typedef UnicodeString VariableName;
typedef UnicodeString FunctionName;
} // namespace data_model
} // namespace message2
U_NAMESPACE_END
#endif // U_HIDE_DEPRECATED_API
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_DATA_MODEL_NAMES_H
// eof

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,389 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#ifndef MESSAGEFORMAT2_FUNCTION_REGISTRY_H
#define MESSAGEFORMAT2_FUNCTION_REGISTRY_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_formattable.h"
#ifndef U_HIDE_DEPRECATED_API
#include <map>
U_NAMESPACE_BEGIN
class Hashtable;
class UVector;
namespace message2 {
using namespace data_model;
/**
* Interface that factory classes for creating formatters must implement.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API FormatterFactory : public UObject {
// TODO: the coding guidelines say that interface classes
// shouldn't inherit from UObject, but if I change it so these
// classes don't, and the individual formatter factory classes
// inherit from public FormatterFactory, public UObject, then
// memory leaks ensue
public:
/**
* Constructs a new formatter object. This method is not const;
* formatter factories with local state may be defined.
*
* @param locale Locale to be used by the formatter.
* @param status Input/output error code.
* @return The new Formatter, which is non-null if U_SUCCESS(status).
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual Formatter* createFormatter(const Locale& locale, UErrorCode& status) = 0;
virtual ~FormatterFactory();
FormatterFactory& operator=(const FormatterFactory&) = delete;
}; // class FormatterFactory
/**
* Interface that factory classes for creating selectors must implement.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API SelectorFactory : public UObject {
public:
/**
* Constructs a new selector object.
*
* @param locale Locale to be used by the selector.
* @param status Input/output error code.
* @return The new selector, which is non-null if U_SUCCESS(status).
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual Selector* createSelector(const Locale& locale, UErrorCode& status) const = 0;
virtual ~SelectorFactory();
SelectorFactory& operator=(const SelectorFactory&) = delete;
}; // class SelectorFactory
/**
* Defines mappings from names of formatters and selectors to functions implementing them.
* The required set of formatter and selector functions is defined in the spec. Users can
* also define custom formatter and selector functions.
*
* `MFFunctionRegistry` is immutable and movable. It is not copyable.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API MFFunctionRegistry : public UObject {
private:
using FormatterMap = Hashtable; // Map from stringified function names to FormatterFactory*
using SelectorMap = Hashtable; // Map from stringified function names to SelectorFactory*
public:
/**
* Looks up a formatter factory by the name of the formatter. The result is non-const,
* since formatter factories may have local state. Returns the result by pointer
* rather than by reference since it can fail.
*
* @param formatterName Name of the desired formatter.
* @return A pointer to the `FormatterFactory` registered under `formatterName`, or null
* if no formatter was registered under that name. The pointer is not owned
* by the caller.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
FormatterFactory* getFormatter(const FunctionName& formatterName) const;
/**
* Looks up a selector factory by the name of the selector. (This returns the result by pointer
* rather than by reference since `FormatterFactory` is an abstract class.)
*
* @param selectorName Name of the desired selector.
* @return A pointer to the `SelectorFactory` registered under `selectorName`, or null
* if no formatter was registered under that name.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
const SelectorFactory* getSelector(const FunctionName& selectorName) const;
/**
* Looks up a formatter factory by a type tag. This method gets the name of the default formatter registered
* for that type. If no formatter was explicitly registered for this type, it returns false.
*
* @param formatterType Type tag for the desired `FormattableObject` type to be formatted.
* @param name Output parameter; initialized to the name of the default formatter for `formatterType`
* if one has been registered. Its value is undefined otherwise.
* @return True if and only if the function registry contains a default formatter for `formatterType`.
* If the return value is false, then the value of `name` is undefined.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
UBool getDefaultFormatterNameByType(const UnicodeString& formatterType, FunctionName& name) const;
/**
* The mutable Builder class allows each formatter and selector factory
* to be initialized separately; calling its `build()` method yields an
* immutable MFFunctionRegistry object.
*
* Builder is not copyable or movable.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API Builder : public UObject {
private:
// Must use raw pointers to avoid instantiating `LocalPointer` on an internal type
FormatterMap* formatters;
SelectorMap* selectors;
Hashtable* formattersByType;
// Do not define copy constructor/assignment operator
Builder& operator=(const Builder&) = delete;
Builder(const Builder&) = delete;
public:
/*
Notes about `adoptFormatter()`'s type signature:
Alternative considered: take a non-owned FormatterFactory*
This is unsafe.
Alternative considered: take a FormatterFactory&
This requires getFormatter() to cast the reference to a pointer,
as it must return an unowned FormatterFactory* since it can fail.
That is also unsafe, since the caller could delete the pointer.
The "TemperatureFormatter" test from the previous ICU4J version doesn't work now,
as it only works if the `formatterFactory` argument is non-owned.
If registering a non-owned FormatterFactory is desirable, this could
be re-thought.
*/
/**
* Registers a formatter factory to a given formatter name.
*
* @param formatterName Name of the formatter being registered.
* @param formatterFactory A pointer to a FormatterFactory object to use
* for creating `formatterName` formatters. This argument is adopted.
* @param errorCode Input/output error code
* @return A reference to the builder.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& adoptFormatter(const data_model::FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode);
/**
* Registers a formatter factory to a given type tag.
* (See `FormattableObject` for details on type tags.)
*
* @param type Tag for objects to be formatted with this formatter.
* @param functionName A reference to the name of the function to use for
* creating formatters for `formatterType` objects.
* @param errorCode Input/output error code
* @return A reference to the builder.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& setDefaultFormatterNameByType(const UnicodeString& type, const data_model::FunctionName& functionName, UErrorCode& errorCode);
/**
* Registers a selector factory to a given selector name. Adopts `selectorFactory`.
*
* @param selectorName Name of the selector being registered.
* @param selectorFactory A SelectorFactory object to use for creating `selectorName`
* selectors.
* @param errorCode Input/output error code
* @return A reference to the builder.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& adoptSelector(const data_model::FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode);
/**
* Creates an immutable `MFFunctionRegistry` object with the selectors and formatters
* that were previously registered. The builder cannot be used after this call.
* The `build()` method is destructive to avoid the need for a deep copy of the
* `FormatterFactory` and `SelectorFactory` objects (this would be necessary because
* `FormatterFactory` can have mutable state), which in turn would require implementors
* of those interfaces to implement a `clone()` method.
*
* @return The new MFFunctionRegistry
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MFFunctionRegistry build();
/**
* Default constructor.
* Returns a Builder with no functions registered.
*
* @param errorCode Input/output error code
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
Builder(UErrorCode& errorCode);
/**
* Destructor.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual ~Builder();
}; // class MFFunctionRegistry::Builder
/**
* Move assignment operator:
* The source MFFunctionRegistry will be left in a valid but undefined state.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MFFunctionRegistry& operator=(MFFunctionRegistry&&) noexcept;
/**
* Move constructor:
* The source MFFunctionRegistry will be left in a valid but undefined state.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
MFFunctionRegistry(MFFunctionRegistry&& other) { *this = std::move(other); }
/**
* Destructor.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual ~MFFunctionRegistry();
private:
friend class MessageContext;
friend class MessageFormatter;
// Do not define copy constructor or copy assignment operator
MFFunctionRegistry& operator=(const MFFunctionRegistry&) = delete;
MFFunctionRegistry(const MFFunctionRegistry&) = delete;
MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType);
MFFunctionRegistry() {}
// Debugging; should only be called on a function registry with
// all the standard functions registered
void checkFormatter(const char*) const;
void checkSelector(const char*) const;
void checkStandard() const;
bool hasFormatter(const data_model::FunctionName& f) const;
bool hasSelector(const data_model::FunctionName& s) const;
void cleanup() noexcept;
// Must use raw pointers to avoid instantiating `LocalPointer` on an internal type
FormatterMap* formatters = nullptr;
SelectorMap* selectors = nullptr;
// Mapping from strings (type tags) to FunctionNames
Hashtable* formattersByType = nullptr;
}; // class MFFunctionRegistry
/**
* Interface that formatter classes must implement.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API Formatter : public UObject {
public:
/**
* Formats the input passed in `context` by setting an output using one of the
* `FormattingContext` methods or indicating an error.
*
* @param toFormat Placeholder, including a source formattable value and possibly
* the output of a previous formatter applied to it; see
* `message2::FormattedPlaceholder` for details. Passed by move.
* @param options The named function options. Passed by move
* @param status Input/output error code. Should not be set directly by the
* custom formatter, which should use `FormattingContext::setFormattingWarning()`
* to signal errors. The custom formatter may pass `status` to other ICU functions
* that can signal errors using this mechanism.
*
* @return The formatted value.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual FormattedPlaceholder format(FormattedPlaceholder&& toFormat,
FunctionOptions&& options,
UErrorCode& status) const = 0;
virtual ~Formatter();
}; // class Formatter
/**
* Interface that selector classes must implement.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
class U_I18N_API Selector : public UObject {
public:
/**
* Compares the input to an array of keys, and returns an array of matching
* keys sorted by preference.
*
* @param toFormat The unnamed function argument; passed by move.
* @param options A reference to the named function options.
* @param keys An array of strings that are compared to the input
* (`context.getFormattableInput()`) in an implementation-specific way.
* @param keysLen The length of `keys`.
* @param prefs An array of strings with length `keysLen`. The contents of
* the array is undefined. `selectKey()` should set the contents
* of `prefs` to a subset of `keys`, with the best match placed at the lowest index.
* @param prefsLen A reference that `selectKey()` should set to the length of `prefs`,
* which must be less than or equal to `keysLen`.
* @param status Input/output error code. Should not be set directly by the
* custom selector, which should use `FormattingContext::setSelectorError()`
* to signal errors. The custom selector may pass `status` to other ICU functions
* that can signal errors using this mechanism.
*
* @internal ICU 75.0 technology preview
* @deprecated This API is for technology preview only.
*/
virtual void selectKey(FormattedPlaceholder&& toFormat,
FunctionOptions&& options,
const UnicodeString* keys,
int32_t keysLen,
UnicodeString* prefs,
int32_t& prefsLen,
UErrorCode& status) const = 0;
// Note: This takes array arguments because the internal MessageFormat code has to
// call this method, and can't include any code that constructs std::vectors.
virtual ~Selector();
}; // class Selector
} // namespace message2
U_NAMESPACE_END
#endif // U_HIDE_DEPRECATED_API
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_H
// eof

View file

@ -1095,6 +1095,11 @@ group: formatting
tmunit.o tmutamt.o tmutfmt.o
# messageformat
choicfmt.o msgfmt.o plurfmt.o selfmt.o umsg.o
# MessageFormat 2
messageformat2.o messageformat2_arguments.o messageformat2_checker.o
messageformat2_data_model.o messageformat2_errors.o messageformat2_evaluation.o
messageformat2_formattable.o messageformat2_formatter.o
messageformat2_function_registry.o messageformat2_parser.o messageformat2_serializer.o
deps
decnumber formattable format units numberformatter number_skeletons numberparser
formatted_value_sbimpl

View file

@ -112,6 +112,23 @@ allowed_errors = (
("common/umutex.o", "__once_proxy"),
("common/umutex.o", "__tls_get_addr"),
("common/unifiedcache.o", "std::__throw_system_error(int)"),
# Some of the MessageFormat 2 modules reference exception-related symbols
# in instantiations of the `std::get()` method that gets an alternative
# from a `std::variant`.
# These instantiations of `std::get()` are only called by compiler-generated
# code (the implementations of built-in `swap()` methods for types
# that include a `std::variant`; and `std::__detail::__variant::__gen_vtable_impl()`,
# which constructs vtables. The MessageFormat 2 code itself only calls
# `std::get_if()`, which is exception-free; never `std::get()`.
("i18n/messageformat2_data_model.o", "typeinfo for std::exception"),
("i18n/messageformat2_data_model.o", "vtable for std::exception"),
("i18n/messageformat2_data_model.o", "std::exception::~exception()"),
("i18n/messageformat2_formattable.o", "typeinfo for std::exception"),
("i18n/messageformat2_formattable.o", "vtable for std::exception"),
("i18n/messageformat2_formattable.o", "std::exception::~exception()"),
("i18n/messageformat2_function_registry.o", "typeinfo for std::exception"),
("i18n/messageformat2_function_registry.o", "vtable for std::exception"),
("i18n/messageformat2_function_registry.o", "std::exception::~exception()")
)
def _Resolve(name, parents):

View file

@ -49,6 +49,7 @@ fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
loctest.o localebuildertest.o localematchertest.o \
messageformat2test.o messageformat2test_builtin.o messageformat2test_custom.o messageformat2test_features.o messageformat2test_fromjson.o messageformat2test_icu.o \
miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \

View file

@ -139,6 +139,12 @@
<ClCompile Include="itrbnfrt.cpp" />
<ClCompile Include="locnmtst.cpp" />
<ClCompile Include="measfmttest.cpp" />
<ClCompile Include="messageformat2test_builtin.cpp" />
<ClCompile Include="messageformat2test.cpp" />
<ClCompile Include="messageformat2test_custom.cpp" />
<ClCompile Include="messageformat2test_features.cpp" />
<ClCompile Include="messageformat2test_fromjson.cpp" />
<ClCompile Include="messageformat2test_icu.cpp" />
<ClCompile Include="miscdtfm.cpp" />
<ClCompile Include="msfmrgts.cpp" />
<ClCompile Include="nmfmapts.cpp" />

View file

@ -571,6 +571,12 @@
<ClCompile Include="uchar_type_build_test.cpp">
<Filter>configuration</Filter>
</ClCompile>
<ClCompile Include="messageformat2test_builtin.cpp" />
<ClCompile Include="messageformat2test.cpp" />
<ClCompile Include="messageformat2test_custom.cpp" />
<ClCompile Include="messageformat2test_features.cpp" />
<ClCompile Include="messageformat2test_fromjson.cpp" />
<ClCompile Include="messageformat2test_icu.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="itrbbi.h">
@ -961,4 +967,4 @@
<Filter>locales &amp; resources</Filter>
</ClInclude>
</ItemGroup>
</Project>
</Project>

View file

@ -33,6 +33,7 @@
#include "dtfmapts.h" // DateFormatAPI
#include "dtfmttst.h" // DateFormatTest
#include "tmsgfmt.h" // TestMessageFormat
#include "messageformat2test.h" // TestMessageFormat2
#include "dtfmrgts.h" // DateFormatRegressionTest
#include "msfmrgts.h" // MessageFormatRegressionTest
#include "miscdtfm.h" // DateFormatMiscTests
@ -287,6 +288,7 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
callTest(*test, par);
}
break;
TESTCLASS(60,TestMessageFormat2);
default: name = ""; break; //needed to end loop
}
if (exec) {

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,202 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef _TESTMESSAGEFORMAT2
#define _TESTMESSAGEFORMAT2
#include "unicode/rep.h"
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "messageformat2test_utils.h"
#include "unicode/unistr.h"
#include "unicode/messageformat2_formattable.h"
#include "unicode/parseerr.h"
#include "intltest.h"
/**
* TestMessageFormat2 tests MessageFormat2
*/
struct TestResult {
const UnicodeString pattern;
const UnicodeString output;
};
struct TestResultError {
const UnicodeString pattern;
const UnicodeString output;
UErrorCode expected;
};
class TestMessageFormat2: public IntlTest {
public:
void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ) override;
/**
* test MessageFormat2 with various given patterns
**/
void testVariousPatterns(void);
void featureTests(void);
void messageFormat1Tests(void);
void testAPICustomFunctions(void);
// Test custom functions
void testCustomFunctions(void);
// Test standard functions
void testBuiltInFunctions(void);
void testDataModelErrors(void);
void testResolutionErrors(void);
// Test the data model API
void testDataModelAPI(void);
void testAPI(void);
void testInvalidPatterns(void);
void testAPISimple(void);
private:
void testSemanticallyInvalidPattern(uint32_t, const UnicodeString&, UErrorCode);
void testRuntimeErrorPattern(uint32_t, const UnicodeString&, UErrorCode);
void testRuntimeWarningPattern(uint32_t, const UnicodeString&, const UnicodeString&, UErrorCode);
void testInvalidPattern(uint32_t, const UnicodeString&);
void testInvalidPattern(uint32_t, const UnicodeString&, uint32_t);
void testInvalidPattern(uint32_t, const UnicodeString&, uint32_t, uint32_t);
void testValidPatterns(const TestResult*, int32_t, IcuTestErrorCode&);
void testResolutionErrors(IcuTestErrorCode&);
void testNoSyntaxErrors(const UnicodeString*, int32_t, IcuTestErrorCode&);
void jsonTests(IcuTestErrorCode&);
void specTests();
void runSpecTests(IcuTestErrorCode&);
// Built-in function testing
void testDateTime(IcuTestErrorCode&);
void testNumbers(IcuTestErrorCode&);
// Custom function testing
void testPersonFormatter(IcuTestErrorCode&);
void testCustomFunctionsComplexMessage(IcuTestErrorCode&);
void testGrammarCasesFormatter(IcuTestErrorCode&);
void testListFormatter(IcuTestErrorCode&);
void testMessageRefFormatter(IcuTestErrorCode&);
// Feature tests
void testEmptyMessage(message2::TestCase::Builder&, IcuTestErrorCode&);
void testPlainText(message2::TestCase::Builder&, IcuTestErrorCode&);
void testPlaceholders(message2::TestCase::Builder&, IcuTestErrorCode&);
void testArgumentMissing(message2::TestCase::Builder&, IcuTestErrorCode&);
void testDefaultLocale(message2::TestCase::Builder&, IcuTestErrorCode&);
void testSpecialPluralWithDecimals(message2::TestCase::Builder&, IcuTestErrorCode&);
void testDefaultFunctionAndOptions(message2::TestCase::Builder&, IcuTestErrorCode&);
void testSimpleSelection(message2::TestCase::Builder&, IcuTestErrorCode&);
void testComplexSelection(message2::TestCase::Builder&, IcuTestErrorCode&);
void testSimpleLocalVariable(message2::TestCase::Builder&, IcuTestErrorCode&);
void testLocalVariableWithSelect(message2::TestCase::Builder&, IcuTestErrorCode&);
void testDateFormat(message2::TestCase::Builder&, IcuTestErrorCode&);
void testPlural(message2::TestCase::Builder&, IcuTestErrorCode&);
void testPluralOrdinal(message2::TestCase::Builder&, IcuTestErrorCode&);
void testDeclareBeforeUse(message2::TestCase::Builder&, IcuTestErrorCode&);
// MessageFormat 1 tests
void testSample(message2::TestCase::Builder&, IcuTestErrorCode&);
void testStaticFormat(message2::TestCase::Builder&, IcuTestErrorCode&);
void testSimpleFormat(message2::TestCase::Builder&, IcuTestErrorCode&);
void testSelectFormatToPattern(message2::TestCase::Builder&, IcuTestErrorCode&);
void testMessageFormatDateTimeSkeleton(message2::TestCase::Builder&, IcuTestErrorCode&);
void testMf1Behavior(message2::TestCase::Builder&, IcuTestErrorCode&);
}; // class TestMessageFormat2
U_NAMESPACE_BEGIN
namespace message2 {
// Custom function classes
class PersonNameFormatterFactory : public FormatterFactory {
public:
Formatter* createFormatter(const Locale&, UErrorCode&) override;
};
class Person : public FormattableObject {
public:
UnicodeString title;
UnicodeString firstName;
UnicodeString lastName;
Person(UnicodeString t, UnicodeString f, UnicodeString l) : title(t), firstName(f), lastName(l), tagName("person") {}
~Person();
const UnicodeString& tag() const override { return tagName; }
private:
const UnicodeString tagName;
};
class PersonNameFormatter : public Formatter {
public:
FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override;
};
class FormattableProperties : public FormattableObject {
public:
const UnicodeString& tag() const override { return tagName; }
FormattableProperties(Hashtable* hash) : properties(hash), tagName("properties") {
U_ASSERT(hash != nullptr);
}
~FormattableProperties();
LocalPointer<Hashtable> properties;
private:
const UnicodeString tagName;
};
class GrammarCasesFormatterFactory : public FormatterFactory {
public:
Formatter* createFormatter(const Locale&, UErrorCode&) override;
};
class GrammarCasesFormatter : public Formatter {
public:
FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override;
static MFFunctionRegistry customRegistry(UErrorCode&);
private:
void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const;
};
class ListFormatterFactory : public FormatterFactory {
public:
Formatter* createFormatter(const Locale&, UErrorCode&) override;
};
class ListFormatter : public Formatter {
public:
FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override;
static MFFunctionRegistry customRegistry(UErrorCode&);
private:
friend class ListFormatterFactory;
const Locale& locale;
ListFormatter(const Locale& loc) : locale(loc) {}
};
class ResourceManagerFactory : public FormatterFactory {
public:
Formatter* createFormatter(const Locale&, UErrorCode&) override;
};
class ResourceManager : public Formatter {
public:
FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override;
static MFFunctionRegistry customRegistry(UErrorCode&);
static Hashtable* properties(UErrorCode&);
static UnicodeString propertiesAsString(const Hashtable&);
static Hashtable* parseProperties(const UnicodeString&, UErrorCode&);
private:
friend class ResourceManagerFactory;
ResourceManager(const Locale& loc) : locale(loc) {}
const Locale& locale;
};
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif

View file

@ -0,0 +1,191 @@
// © 2024 and later: Unicode, Inc. and others.
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/calendar.h"
#include "messageformat2test.h"
using namespace icu::message2;
/*
Tests reflect the syntax specified in
https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf
release LDML45-alpha:
https://github.com/unicode-org/message-format-wg/releases/tag/LDML45-alpha
*/
void TestMessageFormat2::testDateTime(IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
LocalPointer<Calendar> cal(Calendar::createInstance(errorCode));
TestCase::Builder testBuilder;
testBuilder.setName("testDateTime");
// November 23, 2022 at 7:42:37.123 PM
cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37);
UDate TEST_DATE = cal->getTime(errorCode);
UnicodeString date = "date";
testBuilder.setLocale(Locale("ro"));
TestCase test = testBuilder.setPattern("Testing date formatting: {$date :datetime}.")
.setExpected("Testing date formatting: 23.11.2022, 19:42.")
.setDateArgument(date, TEST_DATE)
.build();
TestUtils::runTestCase(*this, test, errorCode);
// Formatted string as argument -- `:date` should format the source Formattable
test = testBuilder.setPattern(".local $dateStr = {$date :datetime}\n\
{{Testing date formatting: {$dateStr :datetime}}}")
.setExpected("Testing date formatting: 23.11.2022, 19:42.")
.setExpectSuccess()
.setDateArgument(date, TEST_DATE)
.build();
// Style
testBuilder.setLocale(Locale("en", "US"));
test = testBuilder.setPattern("Testing date formatting: {$date :date style=long}.")
.setExpected("Testing date formatting: November 23, 2022.")
.setDateArgument(date, TEST_DATE)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Testing date formatting: {$date :date style=medium}.")
.setExpected("Testing date formatting: Nov 23, 2022.")
.setDateArgument(date, TEST_DATE)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Testing date formatting: {$date :date style=short}.")
.setExpected("Testing date formatting: 11/23/22.")
.setDateArgument(date, TEST_DATE)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Testing date formatting: {$date :time style=long}.")
.setExpected(CharsToUnicodeString("Testing date formatting: 7:42:37\\u202FPM PST."))
.setDateArgument(date, TEST_DATE)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Testing date formatting: {$date :time style=medium}.")
.setExpected(CharsToUnicodeString("Testing date formatting: 7:42:37\\u202FPM."))
.setDateArgument(date, TEST_DATE)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Testing date formatting: {$date :time style=short}.")
.setExpected(CharsToUnicodeString("Testing date formatting: 7:42\\u202FPM."))
.setDateArgument(date, TEST_DATE)
.build();
TestUtils::runTestCase(*this, test, errorCode);
// Error cases
// Number as argument
test = testBuilder.setPattern(".local $num = {|42| :number}\n\
{{Testing date formatting: {$num :datetime}}}")
.clearArguments()
.setExpected("Testing date formatting: {|42|}")
.setExpectedError(U_MF_OPERAND_MISMATCH_ERROR)
.build();
TestUtils::runTestCase(*this, test, errorCode);
// Literal string as argument
test = testBuilder.setPattern("Testing date formatting: {|horse| :datetime}")
.setExpected("Testing date formatting: {|horse|}")
.setExpectedError(U_MF_OPERAND_MISMATCH_ERROR)
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testNumbers(IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
double value = 1234567890.97531;
UnicodeString val = "val";
TestCase::Builder testBuilder;
testBuilder.setName("testNumbers");
// Literals
TestCase test = testBuilder.setPattern("From literal: {123456789 :number}!")
.setArgument(val, value)
.setExpected("From literal: 123.456.789!")
.setLocale(Locale("ro"))
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("From literal: {|123456789.531| :number}!")
.setArgument(val, value)
.setExpected("From literal: 123.456.789,531!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
// This should fail, because number literals are not treated
// as localized numbers
test = testBuilder.setPattern("From literal: {|123456789,531| :number}!")
.setArgument(val, value)
.setExpectedError(U_MF_OPERAND_MISMATCH_ERROR)
.setExpected("From literal: {|123456789,531|}!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("From literal: {|123456789.531| :number}!")
.setArgument(val, value)
.setExpectSuccess()
.setExpected(CharsToUnicodeString("From literal: \\u1041\\u1042\\u1043,\\u1044\\u1045\\u1046,\\u1047\\u1048\\u1049.\\u1045\\u1043\\u1041!"))
.setLocale(Locale("my"))
.build();
TestUtils::runTestCase(*this, test, errorCode);
// Testing that the detection works for various types (without specifying :number)
test = testBuilder.setPattern("Default double: {$val}!")
.setLocale(Locale("en", "IN"))
.setArgument(val, value)
.setExpected("Default double: 1,23,45,67,890.97531!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Default double: {$val}!")
.setLocale(Locale("ro"))
.setArgument(val, value)
.setExpected("Default double: 1.234.567.890,97531!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Default float: {$val}!")
.setLocale(Locale("ro"))
.setArgument(val, 3.1415926535)
.setExpected("Default float: 3,141593!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Default int64: {$val}!")
.setLocale(Locale("ro"))
.setArgument(val, (int64_t) 1234567890123456789)
.setExpected("Default int64: 1.234.567.890.123.456.789!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Default number: {$val}!")
.setLocale(Locale("ro"))
.setDecimalArgument(val, "1234567890123456789.987654321", errorCode)
.setExpected("Default number: 1.234.567.890.123.456.789,987654!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
// Omitted CurrencyAmount test from ICU4J since it's not supported by Formattable
}
void TestMessageFormat2::testBuiltInFunctions() {
IcuTestErrorCode errorCode(*this, "testBuiltInFunctions");
testDateTime(errorCode);
testNumbers(errorCode);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,728 @@
// © 2024 and later: Unicode, Inc. and others.
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "plurrule_impl.h"
#include "unicode/listformatter.h"
#include "messageformat2test.h"
#include "hash.h"
#include "intltest.h"
using namespace message2;
using namespace pluralimpl;
/*
Tests reflect the syntax specified in
https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf
as of the following commit from 2023-05-09:
https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867
*/
using namespace data_model;
void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode)
.adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode)
.build());
UnicodeString name = "name";
LocalPointer<Person> person(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")));
TestCase::Builder testBuilder;
testBuilder.setName("testPersonFormatter");
testBuilder.setLocale(Locale("en"));
TestCase test = testBuilder.setPattern("Hello {$name :person formality=formal}")
.setArgument(name, person.getAlias())
.setExpected("Hello {$name}")
.setExpectedError(U_MF_UNKNOWN_FUNCTION_ERROR)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Hello {$name :person formality=informal}")
.setArgument(name, person.getAlias())
.setExpected("Hello {$name}")
.setExpectedError(U_MF_UNKNOWN_FUNCTION_ERROR)
.build();
TestUtils::runTestCase(*this, test, errorCode);
testBuilder.setFunctionRegistry(&customRegistry);
test = testBuilder.setPattern("Hello {$name :person formality=formal}")
.setArgument(name, person.getAlias())
.setExpected("Hello Mr. Doe")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Hello {$name :person formality=informal}")
.setArgument(name, person.getAlias())
.setExpected("Hello John")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Hello {$name :person formality=formal length=long}")
.setArgument(name, person.getAlias())
.setExpected("Hello Mr. John Doe")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Hello {$name :person formality=formal length=medium}")
.setArgument(name, person.getAlias())
.setExpected("Hello John Doe")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Hello {$name :person formality=formal length=short}")
.setArgument(name, person.getAlias())
.setExpected("Hello Mr. Doe")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode)
.adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode)
.build());
UnicodeString host = "host";
UnicodeString hostGender = "hostGender";
UnicodeString guest = "guest";
UnicodeString guestCount = "guestCount";
LocalPointer<Person> jane(new Person(UnicodeString("Ms."), UnicodeString("Jane"), UnicodeString("Doe")));
LocalPointer<Person> john(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")));
LocalPointer<Person> anonymous(new Person(UnicodeString("Mx."), UnicodeString("Anonymous"), UnicodeString("Doe")));
if (!jane.isValid() || !john.isValid() || !anonymous.isValid()) {
((UErrorCode&) errorCode) = U_MEMORY_ALLOCATION_ERROR;
return;
}
UnicodeString message = ".local $hostName = {$host :person length=long}\n\
.local $guestName = {$guest :person length=long}\n\
.input {$guestCount :number}\n\
.match {$hostGender :string} {$guestCount :number}\n\
female 0 {{{$hostName} does not give a party.}}\n\
female 1 {{{$hostName} invites {$guestName} to her party.}}\n\
female 2 {{{$hostName} invites {$guestName} and one other person to her party.}}\n\
female * {{{$hostName} invites {$guestCount} people, including {$guestName}, to her party.}}\n\
male 0 {{{$hostName} does not give a party.}}\n\
male 1 {{{$hostName} invites {$guestName} to his party.}}\n\
male 2 {{{$hostName} invites {$guestName} and one other person to his party.}}\n\
male * {{{$hostName} invites {$guestCount} people, including {$guestName}, to his party.}}\n\
* 0 {{{$hostName} does not give a party.}}\n\
* 1 {{{$hostName} invites {$guestName} to their party.}}\n\
* 2 {{{$hostName} invites {$guestName} and one other person to their party.}}\n\
* * {{{$hostName} invites {$guestCount} people, including {$guestName}, to their party.}}";
TestCase::Builder testBuilder;
testBuilder.setName("testCustomFunctionsComplexMessage");
testBuilder.setLocale(Locale("en"));
testBuilder.setPattern(message);
testBuilder.setFunctionRegistry(&customRegistry);
TestCase test = testBuilder.setArgument(host, jane.getAlias())
.setArgument(hostGender, "female")
.setArgument(guest, john.getAlias())
.setArgument(guestCount, (int64_t) 3)
.setExpected("Ms. Jane Doe invites 3 people, including Mr. John Doe, to her party.")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument(host, jane.getAlias())
.setArgument(hostGender, "female")
.setArgument(guest, john.getAlias())
.setArgument(guestCount, (int64_t) 2)
.setExpected("Ms. Jane Doe invites Mr. John Doe and one other person to her party.")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument(host, jane.getAlias())
.setArgument(hostGender, "female")
.setArgument(guest, john.getAlias())
.setArgument(guestCount, (int64_t) 1)
.setExpected("Ms. Jane Doe invites Mr. John Doe to her party.")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument(host, john.getAlias())
.setArgument(hostGender, "male")
.setArgument(guest, jane.getAlias())
.setArgument(guestCount, (int64_t) 3)
.setExpected("Mr. John Doe invites 3 people, including Ms. Jane Doe, to his party.")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument(host, anonymous.getAlias())
.setArgument(hostGender, "unknown")
.setArgument(guest, jane.getAlias())
.setArgument(guestCount, (int64_t) 2)
.setExpected("Mx. Anonymous Doe invites Ms. Jane Doe and one other person to their party.")
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testCustomFunctions() {
IcuTestErrorCode errorCode(*this, "testCustomFunctions");
testPersonFormatter(errorCode);
testCustomFunctionsComplexMessage(errorCode);
testGrammarCasesFormatter(errorCode);
testListFormatter(errorCode);
testMessageRefFormatter(errorCode);
}
// -------------- Custom function implementations
Formatter* PersonNameFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return nullptr;
}
// Locale not used
(void) locale;
Formatter* result = new PersonNameFormatter();
if (result == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return result;
}
message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return {};
}
message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("not a person");
if (!arg.canFormat() || arg.asFormattable().getType() != UFMT_OBJECT) {
return errorVal;
}
const Formattable& toFormat = arg.asFormattable();
FunctionOptionsMap opt = options.getOptions();
bool hasFormality = opt.count("formality") > 0 && opt["formality"].getType() == UFMT_STRING;
bool hasLength = opt.count("length") > 0 && opt["length"].getType() == UFMT_STRING;
bool useFormal = hasFormality && opt["formality"].getString(errorCode) == "formal";
UnicodeString length = hasLength ? opt["length"].getString(errorCode) : "short";
const FormattableObject* fp = toFormat.getObject(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
if (fp == nullptr || fp->tag() != u"person") {
return errorVal;
}
const Person* p = static_cast<const Person*>(fp);
UnicodeString title = p->title;
UnicodeString firstName = p->firstName;
UnicodeString lastName = p->lastName;
UnicodeString result;
if (length == "long") {
result += title;
result += " ";
result += firstName;
result += " ";
result += lastName;
} else if (length == "medium") {
if (useFormal) {
result += firstName;
result += " ";
result += lastName;
} else {
result += title;
result += " ";
result += firstName;
}
} else if (useFormal) {
// Default to "short" length
result += title;
result += " ";
result += lastName;
} else {
result += firstName;
}
return FormattedPlaceholder(arg, FormattedValue(std::move(result)));
}
FormattableProperties::~FormattableProperties() {}
Person::~Person() {}
/*
See ICU4J: CustomFormatterGrammarCaseTest.java
*/
Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return nullptr;
}
// Locale not used
(void) locale;
Formatter* result = new GrammarCasesFormatter();
if (result == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return result;
}
/* static */ void GrammarCasesFormatter::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const {
UnicodeString postfix;
if (value.endsWith("ana")) {
value.extract(0, value.length() - 3, postfix);
postfix += "nei";
}
else if (value.endsWith("ca")) {
value.extract(0, value.length() - 2, postfix);
postfix += "căi";
}
else if (value.endsWith("ga")) {
value.extract(0, value.length() - 2, postfix);
postfix += "găi";
}
else if (value.endsWith("a")) {
value.extract(0, value.length() - 1, postfix);
postfix += "ei";
}
else {
postfix = "lui " + value;
}
result += postfix;
}
message2::FormattedPlaceholder GrammarCasesFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return {};
}
// Argument must be present
if (!arg.canFormat()) {
errorCode = U_MF_FORMATTING_ERROR;
return message2::FormattedPlaceholder("grammarBB");
}
// Assumes the argument is not-yet-formatted
const Formattable& toFormat = arg.asFormattable();
UnicodeString result;
FunctionOptionsMap opt = options.getOptions();
switch (toFormat.getType()) {
case UFMT_STRING: {
const UnicodeString& in = toFormat.getString(errorCode);
bool hasCase = opt.count("case") > 0;
bool caseIsString = opt["case"].getType() == UFMT_STRING;
if (hasCase && caseIsString && (opt["case"].getString(errorCode) == "dative" || opt["case"].getString(errorCode) == "genitive")) {
getDativeAndGenitive(in, result);
} else {
result += in;
}
U_ASSERT(U_SUCCESS(errorCode));
break;
}
default: {
result += toFormat.getString(errorCode);
break;
}
}
return message2::FormattedPlaceholder(arg, FormattedValue(std::move(result)));
}
void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
MFFunctionRegistry customRegistry = MFFunctionRegistry::Builder(errorCode)
.adoptFormatter(FunctionName("grammarBB"), new GrammarCasesFormatterFactory(), errorCode)
.build();
TestCase::Builder testBuilder;
testBuilder.setName("testGrammarCasesFormatter - genitive");
testBuilder.setFunctionRegistry(&customRegistry);
testBuilder.setLocale(Locale("ro"));
testBuilder.setPattern("Cartea {$owner :grammarBB case=genitive}");
TestCase test = testBuilder.setArgument("owner", "Maria")
.setExpected("Cartea Mariei")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("owner", "Rodica")
.setExpected("Cartea Rodicăi")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("owner", "Ileana")
.setExpected("Cartea Ilenei")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("owner", "Petre")
.setExpected("Cartea lui Petre")
.build();
TestUtils::runTestCase(*this, test, errorCode);
testBuilder.setName("testGrammarCasesFormatter - nominative");
testBuilder.setPattern("M-a sunat {$owner :grammarBB case=nominative}");
test = testBuilder.setArgument("owner", "Maria")
.setExpected("M-a sunat Maria")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("owner", "Rodica")
.setExpected("M-a sunat Rodica")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("owner", "Ileana")
.setExpected("M-a sunat Ileana")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("owner", "Petre")
.setExpected("M-a sunat Petre")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
/*
See ICU4J: CustomFormatterListTest.java
*/
Formatter* ListFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return nullptr;
}
Formatter* result = new ListFormatter(locale);
if (result == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return result;
}
message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return {};
}
message2::FormattedPlaceholder errorVal = FormattedPlaceholder("listformat");
// Argument must be present
if (!arg.canFormat()) {
errorCode = U_MF_FORMATTING_ERROR;
return errorVal;
}
// Assumes arg is not-yet-formatted
const Formattable& toFormat = arg.asFormattable();
FunctionOptionsMap opt = options.getOptions();
bool hasType = opt.count("type") > 0 && opt["type"].getType() == UFMT_STRING;
UListFormatterType type = UListFormatterType::ULISTFMT_TYPE_AND;
if (hasType) {
if (opt["type"].getString(errorCode) == "OR") {
type = UListFormatterType::ULISTFMT_TYPE_OR;
} else if (opt["type"].getString(errorCode) == "UNITS") {
type = UListFormatterType::ULISTFMT_TYPE_UNITS;
}
}
bool hasWidth = opt.count("width") > 0 && opt["width"].getType() == UFMT_STRING;
UListFormatterWidth width = UListFormatterWidth::ULISTFMT_WIDTH_WIDE;
if (hasWidth) {
if (opt["width"].getString(errorCode) == "SHORT") {
width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT;
} else if (opt["width"].getString(errorCode) == "NARROW") {
width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW;
}
}
U_ASSERT(U_SUCCESS(errorCode));
LocalPointer<icu::ListFormatter> lf(icu::ListFormatter::createInstance(locale, type, width, errorCode));
if (U_FAILURE(errorCode)) {
return {};
}
UnicodeString result;
switch (toFormat.getType()) {
case UFMT_ARRAY: {
int32_t n_items;
const Formattable* objs = toFormat.getArray(n_items, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
return errorVal;
}
UnicodeString* parts = new UnicodeString[n_items];
if (parts == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return {};
}
for (int32_t i = 0; i < n_items; i++) {
parts[i] = objs[i].getString(errorCode);
}
U_ASSERT(U_SUCCESS(errorCode));
lf->format(parts, n_items, result, errorCode);
delete[] parts;
break;
}
default: {
result += toFormat.getString(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
break;
}
}
return FormattedPlaceholder(arg, FormattedValue(std::move(result)));
}
void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
const message2::Formattable progLanguages[3] = {
message2::Formattable("C/C++"),
message2::Formattable("Java"),
message2::Formattable("Python")
};
TestCase::Builder testBuilder;
MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode)
.adoptFormatter(FunctionName("listformat"), new ListFormatterFactory(), errorCode)
.build();
CHECK_ERROR(errorCode);
testBuilder.setFunctionRegistry(&reg);
testBuilder.setArgument("languages", progLanguages, 3);
TestCase test = testBuilder.setName("testListFormatter")
.setPattern("I know {$languages :listformat type=AND}!")
.setExpected("I know C/C++, Java, and Python!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setName("testListFormatter")
.setPattern("You are allowed to use {$languages :listformat type=OR}!")
.setExpected("You are allowed to use C/C++, Java, or Python!")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
/*
See ICU4J: CustomFormatterMessageRefTest.java
*/
/* static */ Hashtable* message2::ResourceManager::properties(UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);
UnicodeString* firefox = new UnicodeString(".match {$gcase :string} genitive {{Firefoxin}} * {{Firefox}}");
UnicodeString* chrome = new UnicodeString(".match {$gcase :string} genitive {{Chromen}} * {{Chrome}}");
UnicodeString* safari = new UnicodeString(".match {$gcase :string} genitive {{Safarin}} * {{Safari}}");
if (firefox != nullptr && chrome != nullptr && safari != nullptr) {
Hashtable* result = new Hashtable(uhash_compareUnicodeString, nullptr, errorCode);
if (result == nullptr) {
return nullptr;
}
result->setValueDeleter(uprv_deleteUObject);
result->put("safari", safari, errorCode);
result->put("firefox", firefox, errorCode);
result->put("chrome", chrome, errorCode);
return result;
}
// Allocation failed
errorCode = U_MEMORY_ALLOCATION_ERROR;
if (firefox != nullptr) {
delete firefox;
}
if (chrome != nullptr) {
delete chrome;
}
if (safari != nullptr) {
delete safari;
}
return nullptr;
}
Formatter* ResourceManagerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return nullptr;
}
Formatter* result = new ResourceManager(locale);
if (result == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return result;
}
using Arguments = MessageArguments;
static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
}
return MessageArguments(opts, status);
}
message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return {};
}
message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("msgref");
// Argument must be present
if (!arg.canFormat()) {
errorCode = U_MF_FORMATTING_ERROR;
return errorVal;
}
// Assumes arg is not-yet-formatted
const Formattable& toFormat = arg.asFormattable();
UnicodeString in;
switch (toFormat.getType()) {
case UFMT_STRING: {
in = toFormat.getString(errorCode);
break;
}
default: {
// Ignore non-strings
return errorVal;
}
}
FunctionOptionsMap opt = options.getOptions();
bool hasProperties = opt.count("resbundle") > 0 && opt["resbundle"].getType() == UFMT_OBJECT && opt["resbundle"].getObject(errorCode)->tag() == u"properties";
// If properties were provided, look up the given string in the properties,
// yielding a message
if (hasProperties) {
const FormattableProperties* properties = reinterpret_cast<const FormattableProperties*>(opt["resbundle"].getObject(errorCode));
U_ASSERT(U_SUCCESS(errorCode));
UnicodeString* msg = static_cast<UnicodeString*>(properties->properties->get(in));
if (msg == nullptr) {
// No message given for this key -- error out
errorCode = U_MF_FORMATTING_ERROR;
return errorVal;
}
MessageFormatter::Builder mfBuilder(errorCode);
UParseError parseErr;
// Any parse/data model errors will be propagated
MessageFormatter mf = mfBuilder.setPattern(*msg, parseErr, errorCode).build(errorCode);
Arguments arguments = localToGlobal(opt, errorCode);
if (U_FAILURE(errorCode)) {
return errorVal;
}
UErrorCode savedStatus = errorCode;
UnicodeString result = mf.formatToString(arguments, errorCode);
// Here, we want to ignore errors (this matches the behavior in the ICU4J test).
// For example: we want $gcase to default to "$gcase" if the gcase option was
// omitted.
if (U_FAILURE(errorCode)) {
errorCode = savedStatus;
}
return FormattedPlaceholder(arg, FormattedValue(std::move(result)));
} else {
// Properties must be provided
errorCode = U_MF_FORMATTING_ERROR;
}
return errorVal;
}
void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
Hashtable* properties = ResourceManager::properties(errorCode);
CHECK_ERROR(errorCode);
LocalPointer<FormattableProperties> fProperties(new FormattableProperties(properties));
if (!fProperties.isValid()) {
((UErrorCode&) errorCode) = U_MEMORY_ALLOCATION_ERROR;
return;
}
MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode)
.adoptFormatter(FunctionName("msgRef"), new ResourceManagerFactory(), errorCode)
.build();
CHECK_ERROR(errorCode);
TestCase::Builder testBuilder;
testBuilder.setLocale(Locale("ro"));
testBuilder.setFunctionRegistry(&reg);
testBuilder.setPattern(*((UnicodeString*) properties->get("firefox")));
testBuilder.setName("message-ref");
TestCase test = testBuilder.setArgument("gcase", "whatever")
.setExpected("Firefox")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("gcase", "genitive")
.setExpected("Firefoxin")
.build();
TestUtils::runTestCase(*this, test, errorCode);
testBuilder.setPattern(*((UnicodeString*) properties->get("chrome")));
test = testBuilder.setArgument("gcase", "whatever")
.setExpected("Chrome")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("gcase", "genitive")
.setExpected("Chromen")
.build();
TestUtils::runTestCase(*this, test, errorCode);
testBuilder.setArgument("res", fProperties.getAlias());
testBuilder.setPattern("Please start {$browser :msgRef gcase=genitive resbundle=$res}");
test = testBuilder.setArgument("browser", "firefox")
.setExpected("Please start Firefoxin")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("browser", "chrome")
.setExpected("Please start Chromen")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("browser", "safari")
.setExpected("Please start Safarin")
.build();
TestUtils::runTestCase(*this, test, errorCode);
testBuilder.setPattern("Please start {$browser :msgRef resbundle=$res}");
test = testBuilder.setArgument("browser", "firefox")
.setExpected("Please start Firefox")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("browser", "chrome")
.setExpected("Please start Chrome")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("browser", "safari")
.setExpected("Please start Safari")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,474 @@
// © 2024 and later: Unicode, Inc. and others.
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/gregocal.h"
#include "messageformat2test.h"
using namespace icu::message2;
using namespace data_model;
/*
Tests based on ICU4J's MessageFormat2Test.java
and Mf2FeaturesTest.java
*/
/*
TODO: Tests need to be unified in a single format that
both ICU4C and ICU4J can use, rather than being embedded in code.
*/
/*
Tests reflect the syntax specified in
https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf
as of the following commit from 2023-05-09:
https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867
*/
void TestMessageFormat2::testEmptyMessage(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
TestUtils::runTestCase(*this, testBuilder.setPattern("")
.setExpected("")
.build(), errorCode);
}
void TestMessageFormat2::testPlainText(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
TestUtils::runTestCase(*this, testBuilder.setPattern("Hello World!")
.setExpected("Hello World!")
.build(), errorCode);
}
void TestMessageFormat2::testPlaceholders(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
TestUtils::runTestCase(*this, testBuilder.setPattern("Hello, {$userName}!")
.setExpected("Hello, John!")
.setArgument("userName", "John")
.build(), errorCode);
}
void TestMessageFormat2::testArgumentMissing(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
UnicodeString message = "Hello {$name}, today is {$today :date style=long}.";
LocalPointer<Calendar> cal(Calendar::createInstance(errorCode));
CHECK_ERROR(errorCode);
// November 23, 2022 at 7:42:37.123 PM
cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37);
UDate TEST_DATE = cal->getTime(errorCode);
CHECK_ERROR(errorCode);
TestCase test = testBuilder.setPattern(message)
.clearArguments()
.setArgument("name", "John")
.setDateArgument("today", TEST_DATE)
.setExpected("Hello John, today is November 23, 2022.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
// Missing date argument
test = testBuilder.setPattern(message)
.clearArguments()
.setArgument("name", "John")
.setExpected("Hello John, today is {$today}.")
.setExpectedError(U_MF_UNRESOLVED_VARIABLE_ERROR)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern(message)
.clearArguments()
.setDateArgument("today", TEST_DATE)
.setExpectedError(U_MF_UNRESOLVED_VARIABLE_ERROR)
.setExpected("Hello {$name}, today is November 23, 2022.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
// Both arguments missing
test = testBuilder.setPattern(message)
.clearArguments()
.setExpectedError(U_MF_UNRESOLVED_VARIABLE_ERROR)
.setExpected("Hello {$name}, today is {$today}.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testDefaultLocale(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
LocalPointer<Calendar> cal(Calendar::createInstance(errorCode));
CHECK_ERROR(errorCode);
// November 23, 2022 at 7:42:37.123 PM
cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37);
UDate TEST_DATE = cal->getTime(errorCode);
CHECK_ERROR(errorCode);
UnicodeString message = "Date: {$date :date style=long}.";
UnicodeString expectedEn = "Date: November 23, 2022.";
UnicodeString expectedRo = "Date: 23 noiembrie 2022.";
testBuilder.setPattern(message);
TestCase test = testBuilder.clearArguments()
.setDateArgument("date", TEST_DATE)
.setExpected(expectedEn)
.setExpectSuccess()
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setExpected(expectedRo)
.setLocale(Locale("ro"))
.build();
TestUtils::runTestCase(*this, test, errorCode);
Locale originalLocale = Locale::getDefault();
Locale::setDefault(Locale::forLanguageTag("ro", errorCode), errorCode);
CHECK_ERROR(errorCode);
test = testBuilder.setExpected(expectedEn)
.setLocale(Locale("en", "US"))
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setExpected(expectedRo)
.setLocale(Locale::forLanguageTag("ro", errorCode))
.build();
TestUtils::runTestCase(*this, test, errorCode);
Locale::setDefault(originalLocale, errorCode);
CHECK_ERROR(errorCode);
}
void TestMessageFormat2::testSpecialPluralWithDecimals(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
UnicodeString message;
message = ".local $amount = {$count :number}\n\
.match {$amount :number}\n\
1 {{I have {$amount} dollar.}}\n\
* {{I have {$amount} dollars.}}";
TestCase test = testBuilder.setPattern(message)
.clearArguments()
.setArgument("count", (int64_t) 1)
.setExpected("I have 1 dollar.")
.setLocale(Locale("en", "US"))
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testDefaultFunctionAndOptions(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
LocalPointer<Calendar> cal(Calendar::createInstance(errorCode));
CHECK_ERROR(errorCode);
// November 23, 2022 at 7:42:37.123 PM
cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37);
UDate TEST_DATE = cal->getTime(errorCode);
CHECK_ERROR(errorCode);
TestCase test = testBuilder.setPattern("Testing date formatting: {$date}.")
.clearArguments()
.setDateArgument("date", TEST_DATE)
.setExpected("Testing date formatting: 23.11.2022, 19:42.")
.setLocale(Locale("ro"))
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setPattern("Testing date formatting: {$date :datetime}.")
.setExpected("Testing date formatting: 23.11.2022, 19:42.")
.setLocale(Locale("ro"))
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testSimpleSelection(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
(void) testBuilder;
(void) errorCode;
/* Covered by testPlural */
}
void TestMessageFormat2::testComplexSelection(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
UnicodeString message = ".match {$photoCount :number} {$userGender :string}\n\
1 masculine {{{$userName} added a new photo to his album.}}\n\
1 feminine {{{$userName} added a new photo to her album.}}\n\
1 * {{{$userName} added a new photo to their album.}}\n\
* masculine {{{$userName} added {$photoCount} photos to his album.}}\n\
* feminine {{{$userName} added {$photoCount} photos to her album.}}\n\
* * {{{$userName} added {$photoCount} photos to their album.}}";
testBuilder.setPattern(message);
int64_t count = 1;
TestCase test = testBuilder.clearArguments().setArgument("photoCount", count)
.setArgument("userGender", "masculine")
.setArgument("userName", "John")
.setExpected("John added a new photo to his album.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("userGender", "feminine")
.setArgument("userName", "Anna")
.setExpected("Anna added a new photo to her album.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("userGender", "unknown")
.setArgument("userName", "Anonymous")
.setExpected("Anonymous added a new photo to their album.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
count = 13;
test = testBuilder.clearArguments().setArgument("photoCount", count)
.setArgument("userGender", "masculine")
.setArgument("userName", "John")
.setExpected("John added 13 photos to his album.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("userGender", "feminine")
.setArgument("userName", "Anna")
.setExpected("Anna added 13 photos to her album.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("userGender", "unknown")
.setArgument("userName", "Anonymous")
.setExpected("Anonymous added 13 photos to their album.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testSimpleLocalVariable(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
LocalPointer<Calendar> cal(Calendar::createInstance(errorCode));
CHECK_ERROR(errorCode);
// November 23, 2022 at 7:42:37.123 PM
cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37);
UDate TEST_DATE = cal->getTime(errorCode);
CHECK_ERROR(errorCode);
testBuilder.setPattern(".input {$expDate :date style=medium}\n\
{{Your tickets expire on {$expDate}.}}");
int64_t count = 1;
TestUtils::runTestCase(*this, testBuilder.clearArguments().setArgument("count", count)
.setLocale(Locale("en"))
.setDateArgument("expDate", TEST_DATE)
.setExpected("Your tickets expire on Nov 23, 2022.")
.build(), errorCode);
}
void TestMessageFormat2::testLocalVariableWithSelect(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
LocalPointer<Calendar> cal(Calendar::createInstance(errorCode));
CHECK_ERROR(errorCode);
// November 23, 2022 at 7:42:37.123 PM
cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37);
UDate TEST_DATE = cal->getTime(errorCode);
CHECK_ERROR(errorCode);
testBuilder.setPattern(".input {$expDate :date style=medium}\n\
.match {$count :number}\n\
1 {{Your ticket expires on {$expDate}.}}\n\
* {{Your {$count} tickets expire on {$expDate}.}}");
int64_t count = 1;
TestCase test = testBuilder.clearArguments().setArgument("count", count)
.setLocale(Locale("en"))
.setDateArgument("expDate", TEST_DATE)
.setExpected("Your ticket expires on Nov 23, 2022.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
count = 3;
test = testBuilder.setArgument("count", count)
.setExpected("Your 3 tickets expire on Nov 23, 2022.")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testDateFormat(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
LocalPointer<Calendar> cal(Calendar::createInstance(errorCode));
CHECK_ERROR(errorCode);
cal->set(2022, Calendar::OCTOBER, 27, 0, 0, 0);
UDate expiration = cal->getTime(errorCode);
CHECK_ERROR(errorCode);
TestCase test = testBuilder.clearArguments().setPattern("Your card expires on {$exp :date style=medium}!")
.setLocale(Locale("en"))
.setExpected("Your card expires on Oct 27, 2022!")
.setDateArgument("exp", expiration)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setPattern("Your card expires on {$exp :date style=full}!")
.setExpected("Your card expires on Thursday, October 27, 2022!")
.setDateArgument("exp", expiration)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setPattern("Your card expires on {$exp :date style=long}!")
.setExpected("Your card expires on October 27, 2022!")
.setDateArgument("exp", expiration)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setPattern("Your card expires on {$exp :date style=medium}!")
.setExpected("Your card expires on Oct 27, 2022!")
.setDateArgument("exp", expiration)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setPattern("Your card expires on {$exp :date style=short}!")
.setExpected("Your card expires on 10/27/22!")
.setDateArgument("exp", expiration)
.build();
TestUtils::runTestCase(*this, test, errorCode);
/*
This test would require the calendar to be passed as a UObject* with the datetime formatter
doing an RTTI check -- however, that would be awkward, since it would have to check the tag for each
possible subclass of `Calendar`. datetime currently has no support for formatting any object argument
cal.adoptInstead(new GregorianCalendar(2022, Calendar::OCTOBER, 27, errorCode));
if (cal.isValid()) {
test = testBuilder.setPattern("Your card expires on {$exp :datetime skeleton=yMMMdE}!")
.setExpected("Your card expires on Thu, Oct 27, 2022!")
.setArgument("exp", cal.orphan(), errorCode)
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
*/
// Implied function based on type of the object to format
test = testBuilder.clearArguments().setPattern("Your card expires on {$exp}!")
.setExpected(CharsToUnicodeString("Your card expires on 10/27/22, 12:00\\u202FAM!"))
.setDateArgument("exp", expiration)
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testPlural(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
UnicodeString message = ".match {$count :number}\n\
1 {{You have one notification.}}\n \
* {{You have {$count} notifications.}}";
int64_t count = 1;
TestCase test = testBuilder.clearArguments().setPattern(message)
.setExpected("You have one notification.")
.setArgument("count", count)
.build();
TestUtils::runTestCase(*this, test, errorCode);
count = 42;
test = testBuilder.clearArguments().setExpected("You have 42 notifications.")
.setArgument("count", count)
.build();
TestUtils::runTestCase(*this, test, errorCode);
count = 1;
test = testBuilder.clearArguments().setPattern(message)
.setExpected("You have one notification.")
.setArgument("count", "1")
.build();
TestUtils::runTestCase(*this, test, errorCode);
count = 42;
test = testBuilder.clearArguments().setExpected("You have 42 notifications.")
.setArgument("count", "42")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testPluralOrdinal(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
UnicodeString message = ".match {$place :number select=ordinal}\n\
1 {{You got the gold medal}}\n \
2 {{You got the silver medal}}\n \
3 {{You got the bronze medal}}\n\
one {{You got in the {$place}st place}}\n\
two {{You got in the {$place}nd place}}\n \
few {{You got in the {$place}rd place}}\n \
* {{You got in the {$place}th place}}";
TestCase test = testBuilder.clearArguments().setPattern(message)
.setExpected("You got the gold medal")
.setArgument("place", "1")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setExpected("You got the silver medal")
.setArgument("place", "2")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setExpected("You got the bronze medal")
.setArgument("place", "3")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setExpected("You got in the 21st place")
.setArgument("place", "21")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setExpected("You got in the 32nd place")
.setArgument("place", "32")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setExpected("You got in the 23rd place")
.setArgument("place", "23")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments().setExpected("You got in the 15th place")
.setArgument("place", "15")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testDeclareBeforeUse(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
UnicodeString message = ".local $foo = {$baz :number}\n\
.local $bar = {$foo}\n \
.local $baz = {$bar}\n \
{{The message uses {$baz} and works}}";
testBuilder.setPattern(message);
testBuilder.setName("declare-before-use");
TestCase test = testBuilder.clearArguments().setExpected("The message uses {$baz} and works")
.setExpectedError(U_MF_DUPLICATE_DECLARATION_ERROR)
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::featureTests() {
IcuTestErrorCode errorCode(*this, "featureTests");
TestCase::Builder testBuilder;
testBuilder.setName("featureTests");
testEmptyMessage(testBuilder, errorCode);
testPlainText(testBuilder, errorCode);
testPlaceholders(testBuilder, errorCode);
testArgumentMissing(testBuilder, errorCode);
testDefaultLocale(testBuilder, errorCode);
testSpecialPluralWithDecimals(testBuilder, errorCode);
testDefaultFunctionAndOptions(testBuilder, errorCode);
testSimpleSelection(testBuilder, errorCode);
testComplexSelection(testBuilder, errorCode);
testSimpleLocalVariable(testBuilder, errorCode);
testLocalVariableWithSelect(testBuilder, errorCode);
testDateFormat(testBuilder, errorCode);
testPlural(testBuilder, errorCode);
testPluralOrdinal(testBuilder, errorCode);
testDeclareBeforeUse(testBuilder, errorCode);
}
TestCase::~TestCase() {}
TestCase::Builder::~Builder() {}
#endif /* #if !UCONFIG_NO_FORMATTING */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,155 @@
// © 2024 and later: Unicode, Inc. and others.
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/gregocal.h"
#include "unicode/msgfmt.h"
#include "messageformat2test.h"
using namespace icu::message2;
/*
Tests based on ICU4J's Mf2IcuTest.java
*/
/*
TODO: Tests need to be unified in a single format that
both ICU4C and ICU4J can use, rather than being embedded in code.
*/
/*
Tests reflect the syntax specified in
https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf
as of the following commit from 2023-05-09:
https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867
*/
void TestMessageFormat2::testSample(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
TestUtils::runTestCase(*this, testBuilder.setPattern("There are {$count} files on {$where}")
.setArgument("count", "abc")
.setArgument("where", "def")
.setExpected("There are abc files on def")
.build(), errorCode);
}
void TestMessageFormat2::testStaticFormat(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
TestUtils::runTestCase(*this, testBuilder.setPattern("At {$when :time style=medium} on {$when :date style=medium}, \
there was {$what} on planet {$planet :integer}.")
.setArgument("planet", (int64_t) 7)
.setDateArgument("when", (UDate) 871068000000)
.setArgument("what", "a disturbance in the Force")
.setExpected(CharsToUnicodeString("At 12:20:00\\u202FPM on Aug 8, 1997, there was a disturbance in the Force on planet 7."))
.build(), errorCode);
}
void TestMessageFormat2::testSimpleFormat(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
testBuilder.setPattern("The disk \"{$diskName}\" contains {$fileCount} file(s).");
testBuilder.setArgument("diskName", "MyDisk");
TestCase test = testBuilder.setArgument("fileCount", (int64_t) 0)
.setExpected("The disk \"MyDisk\" contains 0 file(s).")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("fileCount", (int64_t) 1)
.setExpected("The disk \"MyDisk\" contains 1 file(s).")
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("fileCount", (int64_t) 12)
.setExpected("The disk \"MyDisk\" contains 12 file(s).")
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testSelectFormatToPattern(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
UnicodeString pattern = CharsToUnicodeString(".match {$userGender :string}\n\
female {{{$userName} est all\\u00E9e \\u00E0 Paris.}}\n\
* {{{$userName} est all\\u00E9 \\u00E0 Paris.}}");
testBuilder.setPattern(pattern);
TestCase test = testBuilder.setArgument("userName", "Charlotte")
.setArgument("userGender", "female")
.setExpected(CharsToUnicodeString("Charlotte est all\\u00e9e \\u00e0 Paris."))
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("userName", "Guillaume")
.setArgument("userGender", "male")
.setExpected(CharsToUnicodeString("Guillaume est all\\u00e9 \\u00e0 Paris."))
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.setArgument("userName", "Dominique")
.setArgument("userGender", "unknown")
.setExpected(CharsToUnicodeString("Dominique est all\\u00e9 \\u00e0 Paris."))
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::testMf1Behavior(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
UDate testDate = UDate(1671782400000); // 2022-12-23
UnicodeString user = "John";
UnicodeString badArgumentsNames[] = {
"userX", "todayX"
};
UnicodeString goodArgumentsNames[] = {
"user", "today"
};
icu::Formattable oldArgumentsValues[] = {
icu::Formattable(user), icu::Formattable(testDate, icu::Formattable::kIsDate)
};
UnicodeString expectedGood = "Hello John, today is December 23, 2022.";
LocalPointer<MessageFormat> mf1(new MessageFormat("Hello {user}, today is {today,date,long}.", errorCode));
CHECK_ERROR(errorCode);
UnicodeString result;
mf1->format(badArgumentsNames, oldArgumentsValues, 2, result, errorCode);
assertEquals("testMf1Behavior", (UBool) true, U_SUCCESS(errorCode));
assertEquals("old icu test", "Hello {user}, today is {today}.", result);
result.remove();
mf1->format(goodArgumentsNames, oldArgumentsValues, 2, result, errorCode);
assertEquals("testMf1Behavior", (UBool) true, U_SUCCESS(errorCode));
assertEquals("old icu test", expectedGood, result);
TestCase test = testBuilder.setPattern("Hello {$user}, today is {$today :date style=long}.")
.setArgument(badArgumentsNames[0], user)
.setDateArgument(badArgumentsNames[1], testDate)
.setExpected("Hello {$user}, today is {$today}.")
.setExpectedError(U_MF_UNRESOLVED_VARIABLE_ERROR)
.build();
TestUtils::runTestCase(*this, test, errorCode);
test = testBuilder.clearArguments()
.setExpectSuccess()
.setArgument(goodArgumentsNames[0], user)
.setDateArgument(goodArgumentsNames[1], testDate)
.setExpected(expectedGood)
.build();
TestUtils::runTestCase(*this, test, errorCode);
}
void TestMessageFormat2::messageFormat1Tests() {
IcuTestErrorCode errorCode(*this, "featureTests");
TestCase::Builder testBuilder;
testBuilder.setName("messageFormat1Tests");
testSample(testBuilder, errorCode);
testStaticFormat(testBuilder, errorCode);
testSimpleFormat(testBuilder, errorCode);
testSelectFormatToPattern(testBuilder, errorCode);
testMf1Behavior(testBuilder, errorCode);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,312 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef _TESTMESSAGEFORMAT2_UTILS
#define _TESTMESSAGEFORMAT2_UTILS
#include "unicode/locid.h"
#include "unicode/messageformat2_formattable.h"
#include "unicode/messageformat2.h"
#include "intltest.h"
#include "messageformat2_macros.h"
#include "messageformat2_serializer.h"
#if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN namespace message2 {
class TestCase : public UMemory {
private:
/* const */ UnicodeString testName;
/* const */ UnicodeString pattern;
/* const */ Locale locale;
/* const */ std::map<UnicodeString, Formattable> arguments;
/* const */ UErrorCode expectedError;
/* const */ bool expectedNoSyntaxError;
/* const */ bool hasExpectedOutput;
/* const */ UnicodeString expected;
/* const */ bool hasLineNumberAndOffset;
/* const */ uint32_t lineNumber;
/* const */ uint32_t offset;
/* const */ bool ignoreError;
// Function registry is not owned by the TestCase object
const MFFunctionRegistry* functionRegistry = nullptr;
public:
const UnicodeString& getPattern() const { return pattern; }
const Locale& getLocale() const { return locale; }
std::map<UnicodeString, Formattable> getArguments() const { return std::move(arguments); }
const UnicodeString& getTestName() const { return testName; }
bool expectSuccess() const {
return (!ignoreError && U_SUCCESS(expectedError));
}
bool expectFailure() const {
return (!ignoreError && U_FAILURE(expectedError));
}
bool expectNoSyntaxError() const {
return expectedNoSyntaxError;
}
UErrorCode expectedErrorCode() const {
U_ASSERT(!expectSuccess());
return expectedError;
}
bool lineNumberAndOffsetMatch(uint32_t actualLine, uint32_t actualOffset) const {
return (!hasLineNumberAndOffset ||
((actualLine == lineNumber) && actualOffset == offset));
}
bool outputMatches(const UnicodeString& result) const {
return (!hasExpectedOutput || (expected == result));
}
const UnicodeString& expectedOutput() const {
U_ASSERT(hasExpectedOutput);
return expected;
}
uint32_t getLineNumber() const {
U_ASSERT(hasLineNumberAndOffset);
return lineNumber;
}
uint32_t getOffset() const {
U_ASSERT(hasLineNumberAndOffset);
return offset;
}
bool hasCustomRegistry() const { return functionRegistry != nullptr; }
const MFFunctionRegistry* getCustomRegistry() const {
U_ASSERT(hasCustomRegistry());
return functionRegistry;
}
TestCase(const TestCase&);
TestCase& operator=(TestCase&& other) noexcept = default;
virtual ~TestCase();
class Builder : public UObject {
friend class TestCase;
public:
Builder& setName(UnicodeString name) { testName = name; return *this; }
Builder& setPattern(UnicodeString pat) { pattern = pat; return *this; }
Builder& setArgument(const UnicodeString& k, const UnicodeString& val) {
arguments[k] = Formattable(val);
return *this;
}
Builder& setArgument(const UnicodeString& k, const Formattable* val, int32_t count) {
U_ASSERT(val != nullptr);
arguments[k] = Formattable(val, count);
return *this;
}
Builder& setArgument(const UnicodeString& k, double val) {
arguments[k] = Formattable(val);
return *this;
}
Builder& setArgument(const UnicodeString& k, int64_t val) {
arguments[k] = Formattable(val);
return *this;
}
Builder& setDateArgument(const UnicodeString& k, UDate date) {
arguments[k] = Formattable::forDate(date);
return *this;
}
Builder& setDecimalArgument(const UnicodeString& k, std::string_view decimal, UErrorCode& errorCode) {
THIS_ON_ERROR(errorCode);
arguments[k] = Formattable::forDecimal(decimal, errorCode);
return *this;
}
Builder& setArgument(const UnicodeString& k, const FormattableObject* val) {
U_ASSERT(val != nullptr);
arguments[k] = Formattable(val);
return *this;
}
Builder& clearArguments() {
arguments.clear();
return *this;
}
Builder& setExpected(UnicodeString e) {
hasExpectedOutput = true;
expected = e;
return *this;
}
Builder& clearExpected() {
hasExpectedOutput = false;
return *this;
}
Builder& setExpectedError(UErrorCode errorCode) {
expectedError = U_SUCCESS(errorCode) ? U_ZERO_ERROR : errorCode;
return *this;
}
Builder& setNoSyntaxError() {
expectNoSyntaxError = true;
return *this;
}
Builder& setExpectSuccess() {
return setExpectedError(U_ZERO_ERROR);
}
Builder& setLocale(Locale&& loc) {
locale = loc;
return *this;
}
Builder& setExpectedLineNumberAndOffset(uint32_t line, uint32_t o) {
hasLineNumberAndOffset = true;
lineNumber = line;
offset = o;
return *this;
}
Builder& setIgnoreError() {
ignoreError = true;
return *this;
}
Builder& clearIgnoreError() {
ignoreError = false;
return *this;
}
Builder& setFunctionRegistry(const MFFunctionRegistry* reg) {
U_ASSERT(reg != nullptr);
functionRegistry = reg;
return *this;
}
TestCase build() const {
return TestCase(*this);
}
virtual ~Builder();
private:
UnicodeString testName;
UnicodeString pattern;
Locale locale;
std::map<UnicodeString, Formattable> arguments;
bool hasExpectedOutput;
UnicodeString expected;
UErrorCode expectedError;
bool expectNoSyntaxError;
bool hasLineNumberAndOffset;
uint32_t lineNumber;
uint32_t offset;
bool ignoreError;
const MFFunctionRegistry* functionRegistry = nullptr; // Not owned
public:
Builder() : pattern(""), locale(Locale::getDefault()), hasExpectedOutput(false), expected(""), expectedError(U_ZERO_ERROR), expectNoSyntaxError(false), hasLineNumberAndOffset(false), ignoreError(false) {}
};
private:
TestCase(const Builder& builder) :
testName(builder.testName),
pattern(builder.pattern),
locale(builder.locale),
arguments(builder.arguments),
expectedError(builder.expectedError),
expectedNoSyntaxError(builder.expectNoSyntaxError),
hasExpectedOutput(builder.hasExpectedOutput),
expected(builder.expected),
hasLineNumberAndOffset(builder.hasLineNumberAndOffset),
lineNumber(builder.hasLineNumberAndOffset ? builder.lineNumber : 0),
offset(builder.hasLineNumberAndOffset ? builder.offset : 0),
ignoreError(builder.ignoreError),
functionRegistry(builder.functionRegistry) {
// If an error is not expected, then the expected
// output should be present
U_ASSERT(expectFailure() || expectNoSyntaxError() || hasExpectedOutput);
}
}; // class TestCase
class TestUtils {
public:
// Runs a single test case
static void runTestCase(IntlTest& tmsg,
const TestCase& testCase,
IcuTestErrorCode& errorCode) {
CHECK_ERROR(errorCode);
UParseError parseError;
MessageFormatter::Builder mfBuilder(errorCode);
mfBuilder.setPattern(testCase.getPattern(), parseError, errorCode).setLocale(testCase.getLocale());
if (testCase.hasCustomRegistry()) {
mfBuilder.setFunctionRegistry(*testCase.getCustomRegistry());
}
MessageFormatter mf = mfBuilder.build(errorCode);
UnicodeString result;
if (U_SUCCESS(errorCode)) {
result = mf.formatToString(MessageArguments(testCase.getArguments(), errorCode), errorCode);
}
if (testCase.expectSuccess() || (testCase.expectedErrorCode() != U_MF_SYNTAX_ERROR
// For now, don't round-trip messages with these errors,
// since duplicate options are dropped
&& testCase.expectedErrorCode() != U_MF_DUPLICATE_OPTION_NAME_ERROR)) {
const UnicodeString& in = mf.getNormalizedPattern();
UnicodeString out;
if (!roundTrip(in, mf.getDataModel(), out)) {
failRoundTrip(tmsg, testCase, in, out);
}
}
if (testCase.expectNoSyntaxError()) {
if (errorCode == U_MF_SYNTAX_ERROR) {
failSyntaxError(tmsg, testCase);
}
errorCode.reset();
return;
}
if (testCase.expectSuccess() && U_FAILURE(errorCode)) {
failExpectedSuccess(tmsg, testCase, errorCode);
return;
}
if (testCase.expectFailure() && errorCode != testCase.expectedErrorCode()) {
failExpectedFailure(tmsg, testCase, errorCode);
return;
}
if (!testCase.lineNumberAndOffsetMatch(parseError.line, parseError.offset)) {
failWrongOffset(tmsg, testCase, parseError.line, parseError.offset);
}
if (!testCase.outputMatches(result)) {
failWrongOutput(tmsg, testCase, result);
return;
}
errorCode.reset();
}
static bool roundTrip(const UnicodeString& normalizedInput, const MFDataModel& dataModel, UnicodeString& result) {
Serializer(dataModel, result).serialize();
return (normalizedInput == result);
}
static void failSyntaxError(IntlTest& tmsg, const TestCase& testCase) {
tmsg.dataerrln(testCase.getTestName());
tmsg.logln(testCase.getTestName() + " failed test with pattern: " + testCase.getPattern() + " and error code U_MF_SYNTAX_WARNING; expected no syntax error");
}
static void failExpectedSuccess(IntlTest& tmsg, const TestCase& testCase, IcuTestErrorCode& errorCode) {
tmsg.dataerrln(testCase.getTestName());
tmsg.logln(testCase.getTestName() + " failed test with pattern: " + testCase.getPattern() + " and error code " + ((int32_t) errorCode));
errorCode.reset();
}
static void failExpectedFailure(IntlTest& tmsg, const TestCase& testCase, IcuTestErrorCode& errorCode) {
tmsg.dataerrln(testCase.getTestName());
tmsg.logln(testCase.getTestName() + " failed test with wrong error code; pattern: " + testCase.getPattern() + " and error code " + ((int32_t) errorCode) + "(expected error code: " + ((int32_t) testCase.expectedErrorCode()) + " )");
errorCode.reset();
}
static void failWrongOutput(IntlTest& tmsg, const TestCase& testCase, const UnicodeString& result) {
tmsg.dataerrln(testCase.getTestName());
tmsg.logln(testCase.getTestName() + " failed test with wrong output; pattern: " + testCase.getPattern() + " and expected output = " + testCase.expectedOutput() + " and actual output = " + result);
}
static void failRoundTrip(IntlTest& tmsg, const TestCase& testCase, const UnicodeString& in, const UnicodeString& output) {
tmsg.dataerrln(testCase.getTestName());
tmsg.logln(testCase.getTestName() + " failed test with wrong output; normalized input = " + in + " serialized data model = " + output);
}
static void failWrongOffset(IntlTest& tmsg, const TestCase& testCase, uint32_t actualLine, uint32_t actualOffset) {
tmsg.dataerrln("Test failed with wrong line or character offset in parse error; expected (line %d, offset %d), got (line %d, offset %d)", testCase.getLineNumber(), testCase.getOffset(),
actualLine, actualOffset);
tmsg.logln(UnicodeString(testCase.getTestName()) + " pattern = " + testCase.getPattern() + " - failed by returning the wrong line number or offset in the parse error");
}
}; // class TestUtils
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif