ICU-22942 MF2 ICU4C: NFC-normalize names and keys according to spec

Includes adding !UCONFIG_NO_NORMALIZATION guards to all MF2 files
This commit is contained in:
Tim Chevalier 2024-10-10 16:56:26 -07:00
parent 376da67f8d
commit 0357501948
32 changed files with 283 additions and 20 deletions

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -11,8 +13,10 @@
#include "unicode/messageformat2_data_model.h"
#include "unicode/messageformat2_formattable.h"
#include "unicode/messageformat2.h"
#include "unicode/normalizer2.h"
#include "unicode/unistr.h"
#include "messageformat2_allocation.h"
#include "messageformat2_checker.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_macros.h"
@ -37,7 +41,7 @@ static Formattable evalLiteral(const Literal& lit) {
// The fallback for a variable name is itself.
UnicodeString str(DOLLAR);
str += var;
const Formattable* val = context.getGlobal(var, errorCode);
const Formattable* val = context.getGlobal(*this, var, errorCode);
if (U_SUCCESS(errorCode)) {
return (FormattedPlaceholder(*val, str));
}
@ -52,9 +56,9 @@ static Formattable evalLiteral(const Literal& lit) {
}
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
const Operand& rand,
MessageContext& context,
UErrorCode &status) const {
const Operand& rand,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
@ -71,15 +75,19 @@ static Formattable evalLiteral(const Literal& lit) {
// Eager vs. lazy evaluation is an open issue:
// see https://github.com/unicode-org/message-format-wg/issues/299
// NFC-normalize the variable name. See
// https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers
const VariableName normalized = normalizeNFC(var);
// Look up the variable in the environment
if (env.has(var)) {
if (env.has(normalized)) {
// `var` is a local -- look it up
const Closure& rhs = env.lookup(var);
const Closure& rhs = env.lookup(normalized);
// Format the expression using the environment from the closure
return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
}
// Variable wasn't found in locals -- check if it's global
FormattedPlaceholder result = evalArgument(var, context, status);
FormattedPlaceholder result = evalArgument(normalized, context, status);
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
status = U_ZERO_ERROR;
// Unbound variable -- set a resolution error
@ -761,6 +769,7 @@ void MessageFormatter::formatSelectors(MessageContext& context, const Environmen
UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
EMPTY_ON_ERROR(status);
// Create a new environment that will store closures for all local variables
Environment* env = Environment::create(status);
// Create a new context with the given arguments and the `errors` structure
@ -813,12 +822,14 @@ void MessageFormatter::check(MessageContext& context, const Environment& localEn
// Check that variable is in scope
const VariableName& var = rand.asVariable();
UnicodeString normalized = normalizeNFC(var);
// Check local scope
if (localEnv.has(var)) {
if (localEnv.has(normalized)) {
return;
}
// Check global scope
context.getGlobal(var, status);
context.getGlobal(*this, normalized, status);
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
status = U_ZERO_ERROR;
context.getErrors().setUnresolvedVariable(var, status);
@ -855,7 +866,10 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*&
// memoizing the value of localEnv up to this point
// Add the LHS to the environment for checking the next declaration
env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
env = Environment::create(normalizeNFC(decl.getVariable()),
Closure(rhs, *env),
env,
status);
CHECK_ERROR(status);
}
}
@ -866,3 +880,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -10,6 +10,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -139,6 +141,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_UTILS_H

View file

@ -3,12 +3,16 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
#include "unicode/messageformat2.h"
#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model_names.h"
#include "messageformat2_evaluation.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
@ -22,11 +26,15 @@ namespace message2 {
using Arguments = MessageArguments;
const Formattable* Arguments::getArgument(const VariableName& arg, UErrorCode& errorCode) const {
const Formattable* Arguments::getArgument(const MessageFormatter& context,
const VariableName& arg,
UErrorCode& errorCode) const {
if (U_SUCCESS(errorCode)) {
U_ASSERT(argsLen == 0 || arguments.isValid());
for (int32_t i = 0; i < argsLen; i++) {
if (argumentNames[i] == arg) {
UnicodeString normalized = context.normalizeNFC(argumentNames[i]);
// arg already assumed to be normalized
if (normalized == arg) {
return &arguments[i];
}
}
@ -57,3 +65,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -3,12 +3,16 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
#include "unicode/messageformat2.h"
#include "messageformat2_allocation.h"
#include "messageformat2_checker.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_macros.h"
#include "uvector.h" // U_ASSERT
@ -104,6 +108,13 @@ TypeEnvironment::~TypeEnvironment() {}
// ---------------------
UnicodeString Checker::normalizeNFC(const Key& k) const {
if (k.isWildcard()) {
return UnicodeString("*");
}
return context.normalizeNFC(k.asLiteral().unquoted());
}
static bool areDefaultKeys(const Key* keys, int32_t len) {
U_ASSERT(len > 0);
for (int32_t i = 0; i < len; i++) {
@ -185,7 +196,7 @@ void Checker::checkVariants(UErrorCode& status) {
// This variant was already checked,
// so we know keys1.len == len
for (int32_t kk = 0; kk < len; kk++) {
if (!(keys[kk] == keys1[kk])) {
if (!(normalizeNFC(keys[kk]) == normalizeNFC(keys1[kk]))) {
allEqual = false;
break;
}
@ -312,3 +323,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -10,6 +10,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -56,14 +58,19 @@ namespace message2 {
// an explicit declaration
}; // class TypeEnvironment
class MessageFormatter;
// Checks a data model for semantic errors
// (Errors are defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md )
class Checker {
public:
void check(UErrorCode&);
Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {}
Checker(const MFDataModel& d, StaticErrors& e, const MessageFormatter& mf)
: dataModel(d), errors(e), context(mf) {}
private:
UnicodeString normalizeNFC(const Key&) const;
void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
@ -78,6 +85,9 @@ namespace message2 {
void check(const Pattern&);
const MFDataModel& dataModel;
StaticErrors& errors;
// Used for NFC normalization
const MessageFormatter& context;
}; // class Checker
} // namespace message2
@ -88,6 +98,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_CHECKER_H

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -918,3 +920,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -290,3 +292,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -15,6 +15,8 @@
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
*/
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -151,6 +153,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_ERRORS_H

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -190,13 +192,16 @@ PrioritizedVariant::~PrioritizedVariant() {}
errors.checkErrors(status);
}
const Formattable* MessageContext::getGlobal(const VariableName& v, UErrorCode& errorCode) const {
return arguments.getArgument(v, errorCode);
const Formattable* MessageContext::getGlobal(const MessageFormatter& context,
const VariableName& v,
UErrorCode& errorCode) const {
return arguments.getArgument(context, v, errorCode);
}
MessageContext::MessageContext(const MessageArguments& args,
const StaticErrors& e,
UErrorCode& status) : arguments(args), errors(e, status) {}
MessageContext::~MessageContext() {}
} // namespace message2
@ -205,3 +210,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -14,6 +14,7 @@
* \file
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
*/
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
@ -174,11 +175,15 @@ namespace message2 {
// The context contains all the information needed to process
// an entire message: arguments, formatter cache, and error list
class MessageFormatter;
class MessageContext : public UMemory {
public:
MessageContext(const MessageArguments&, const StaticErrors&, UErrorCode&);
const Formattable* getGlobal(const VariableName&, UErrorCode&) const;
const Formattable* getGlobal(const MessageFormatter&,
const VariableName&,
UErrorCode&) const;
// If any errors were set, update `status` accordingly
void checkErrors(UErrorCode& status) const;
@ -191,6 +196,7 @@ namespace message2 {
const MessageArguments& arguments; // External message arguments
// Errors accumulated during parsing/formatting
DynamicErrors errors;
}; // class MessageContext
} // namespace message2
@ -201,6 +207,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_EVALUATION_H

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -336,3 +338,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -116,6 +118,24 @@ namespace message2 {
// MessageFormatter
// Returns the NFC-normalized version of s, returning s itself
// if it's already normalized.
UnicodeString MessageFormatter::normalizeNFC(const UnicodeString& s) const {
UErrorCode status = U_ZERO_ERROR;
// Check if string is already normalized
UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status);
// If so, return it
if (U_SUCCESS(status) && result == UNORM_YES) {
return s;
}
// Otherwise, normalize it
UnicodeString normalized = nfcNormalizer->normalize(s, status);
if (U_FAILURE(status)) {
return {};
}
return normalized;
}
MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) {
CHECK_ERROR(success);
@ -163,6 +183,8 @@ namespace message2 {
errors = errorsNew.orphan();
}
nfcNormalizer = Normalizer2::getNFCInstance(success);
// Note: we currently evaluate variables lazily,
// without memoization. This call is still necessary
// to check out-of-scope uses of local variables in
@ -170,7 +192,7 @@ namespace message2 {
// only be checked when arguments are known)
// Check for resolution errors
Checker(dataModel, *errors).check(success);
Checker(dataModel, *errors, *this).check(success);
}
void MessageFormatter::cleanup() noexcept {
@ -191,6 +213,7 @@ namespace message2 {
signalErrors = other.signalErrors;
errors = other.errors;
other.errors = nullptr;
nfcNormalizer = other.nfcNormalizer;
return *this;
}
@ -352,3 +375,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -1242,3 +1244,4 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -10,6 +10,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -226,6 +228,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_INTERNAL_H

View file

@ -10,6 +10,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -97,6 +99,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_MACROS_H

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -1926,3 +1928,4 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -16,6 +16,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -175,6 +177,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_PARSER_H

View file

@ -3,6 +3,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -285,3 +287,4 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -10,6 +10,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -63,6 +65,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_SERIALIZER_H

View file

@ -8,6 +8,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -20,6 +22,7 @@
#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/normalizer2.h"
#include "unicode/unistr.h"
#ifndef U_HIDE_DEPRECATED_API
@ -325,6 +328,8 @@ namespace message2 {
private:
friend class Builder;
friend class Checker;
friend class MessageArguments;
friend class MessageContext;
MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
@ -352,6 +357,9 @@ namespace message2 {
void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
// Formatting methods
// Used for normalizing variable names and keys for comparison
UnicodeString normalizeNFC(const UnicodeString&) const;
[[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
// Formats a call to a formatting function
@ -445,6 +453,10 @@ namespace message2 {
// formatting methods return best-effort output.
// The default is false.
bool signalErrors = false;
// Used for implementing normalizeNFC()
const Normalizer2* nfcNormalizer = nullptr;
}; // class MessageFormatter
} // namespace message2
@ -457,6 +469,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_H

View file

@ -8,6 +8,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -43,7 +45,7 @@ template class U_I18N_API LocalArray<message2::Formattable>;
namespace message2 {
class MessageContext;
class MessageFormatter;
// Arguments
// ----------
@ -112,7 +114,9 @@ namespace message2 {
private:
friend class MessageContext;
const Formattable* getArgument(const data_model::VariableName&, UErrorCode&) const;
const Formattable* getArgument(const MessageFormatter&,
const data_model::VariableName&,
UErrorCode&) const;
// Avoids using Hashtable so that code constructing a Hashtable
// doesn't have to appear in this header file
@ -131,6 +135,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_ARGUMENTS_H

View file

@ -8,6 +8,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -2592,6 +2594,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_DATA_MODEL_H

View file

@ -8,6 +8,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -1010,6 +1012,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_FORMATTABLE_H

View file

@ -8,6 +8,8 @@
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -422,6 +424,8 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_H

View file

@ -288,8 +288,10 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
callTest(*test, par);
}
break;
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_MF2
TESTCLASS(60,TestMessageFormat2);
#endif
#endif
default: name = ""; break; //needed to end loop
}

View file

@ -2,6 +2,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -400,3 +402,4 @@ TestCase::Builder::~Builder() {}
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -7,6 +7,8 @@
#include "unicode/rep.h"
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -188,4 +190,6 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif

View file

@ -2,6 +2,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -730,3 +732,5 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) {
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -2,6 +2,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -157,3 +159,5 @@ void TestMessageFormat2::messageFormat1Tests() {
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -2,6 +2,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -309,6 +311,9 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode);
// Other tests (non-spec)
// TODO: Delete this file after https://github.com/unicode-org/message-format-wg/pull/904
// lands and the tests here are updated from the spec repo
runTestsFromJsonFile(*this, "normalization.json", errorCode);
runTestsFromJsonFile(*this, "more-functions.json", errorCode);
runTestsFromJsonFile(*this, "valid-tests.json", errorCode);
runTestsFromJsonFile(*this, "resolution-errors.json", errorCode);
@ -358,3 +363,5 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -6,6 +6,8 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
@ -344,4 +346,6 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif

67
testdata/message2/normalization.json vendored Normal file
View file

@ -0,0 +1,67 @@
{
"$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json",
"scenario": "Syntax",
"description": "Test cases that do not depend on any registry definitions.",
"defaultTestProperties": {
"locale": "en-US"
},
"tests": [
{
"description": "NFC: text is not normalized",
"src": "\u1E0A\u0323",
"exp": "\u1E0A\u0323"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
"src": ".local $\u0044\u0323\u0307 = {foo} {{{$\u1E0c\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
"src": ".local $\u1E0c\u0307 = {foo} {{{$\u0044\u0323\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
"src": ".input {$\u1E0c\u0307} {{{$\u0044\u0323\u0307}}}",
"params": [{"name": "\u1E0c\u0307", "value": "foo"}],
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
"src": ".input {$\u0044\u0323\u0307} {{{$\u1E0c\u0307}}}",
"params": [{"name": "\u0044\u0323\u0307", "value": "foo"}],
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; reordering",
"src": ".local $\u0044\u0307\u0323 = {foo} {{{$\u1E0c\u0307}}}",
"exp": "foo"
},
{
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; special case mapping",
"src": ".local $\u0041\u030A\u0301 = {foo} {{{$\u01FA}}}",
"exp": "foo"
},
{
"description": "NFC: keys are normalized",
"src": ".local $x = {\u1E0C\u0307 :string} .match {$x} \u1E0A\u0323 {{Right}} * {{Wrong}}",
"exp": "Right"
},
{
"description": "NFC: keys are normalized (unquoted)",
"src": ".local $x = {\u1E0A\u0323 :string} .match {$x} \u1E0A\u0323 {{Not normalized}} \u1E0C\u0307 {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
},
{
"description": "NFC: keys are normalized (quoted)",
"src": ".local $x = {\u1E0A\u0323 :string} .match {$x} |\u1E0A\u0323| {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
},
{
"description": "NFC: keys are normalized (mixed)",
"src": ".local $x = {\u1E0A\u0323 :string} .match {$x} \u1E0A\u0323 {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
"expErrors": [{"type": "duplicate-variant"}]
}
]
}