mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 15:42:14 +00:00
ICU-22942 MF2 ICU4C: NFC-normalize names and keys according to spec
Includes adding !UCONFIG_NO_NORMALIZATION guards to all MF2 files
This commit is contained in:
parent
376da67f8d
commit
0357501948
32 changed files with 283 additions and 20 deletions
icu4c/source
i18n
messageformat2.cppmessageformat2_allocation.hmessageformat2_arguments.cppmessageformat2_checker.cppmessageformat2_checker.hmessageformat2_data_model.cppmessageformat2_errors.cppmessageformat2_errors.hmessageformat2_evaluation.cppmessageformat2_evaluation.hmessageformat2_formattable.cppmessageformat2_formatter.cppmessageformat2_function_registry.cppmessageformat2_function_registry_internal.hmessageformat2_macros.hmessageformat2_parser.cppmessageformat2_parser.hmessageformat2_serializer.cppmessageformat2_serializer.h
unicode
test/intltest
testdata/message2
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -11,8 +13,10 @@
|
|||
#include "unicode/messageformat2_data_model.h"
|
||||
#include "unicode/messageformat2_formattable.h"
|
||||
#include "unicode/messageformat2.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "messageformat2_allocation.h"
|
||||
#include "messageformat2_checker.h"
|
||||
#include "messageformat2_evaluation.h"
|
||||
#include "messageformat2_macros.h"
|
||||
|
||||
|
@ -37,7 +41,7 @@ static Formattable evalLiteral(const Literal& lit) {
|
|||
// The fallback for a variable name is itself.
|
||||
UnicodeString str(DOLLAR);
|
||||
str += var;
|
||||
const Formattable* val = context.getGlobal(var, errorCode);
|
||||
const Formattable* val = context.getGlobal(*this, var, errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
return (FormattedPlaceholder(*val, str));
|
||||
}
|
||||
|
@ -52,9 +56,9 @@ static Formattable evalLiteral(const Literal& lit) {
|
|||
}
|
||||
|
||||
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
|
||||
const Operand& rand,
|
||||
MessageContext& context,
|
||||
UErrorCode &status) const {
|
||||
const Operand& rand,
|
||||
MessageContext& context,
|
||||
UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
@ -71,15 +75,19 @@ static Formattable evalLiteral(const Literal& lit) {
|
|||
// Eager vs. lazy evaluation is an open issue:
|
||||
// see https://github.com/unicode-org/message-format-wg/issues/299
|
||||
|
||||
// NFC-normalize the variable name. See
|
||||
// https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers
|
||||
const VariableName normalized = normalizeNFC(var);
|
||||
|
||||
// Look up the variable in the environment
|
||||
if (env.has(var)) {
|
||||
if (env.has(normalized)) {
|
||||
// `var` is a local -- look it up
|
||||
const Closure& rhs = env.lookup(var);
|
||||
const Closure& rhs = env.lookup(normalized);
|
||||
// Format the expression using the environment from the closure
|
||||
return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
|
||||
}
|
||||
// Variable wasn't found in locals -- check if it's global
|
||||
FormattedPlaceholder result = evalArgument(var, context, status);
|
||||
FormattedPlaceholder result = evalArgument(normalized, context, status);
|
||||
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
// Unbound variable -- set a resolution error
|
||||
|
@ -761,6 +769,7 @@ void MessageFormatter::formatSelectors(MessageContext& context, const Environmen
|
|||
UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
|
||||
EMPTY_ON_ERROR(status);
|
||||
|
||||
|
||||
// Create a new environment that will store closures for all local variables
|
||||
Environment* env = Environment::create(status);
|
||||
// Create a new context with the given arguments and the `errors` structure
|
||||
|
@ -813,12 +822,14 @@ void MessageFormatter::check(MessageContext& context, const Environment& localEn
|
|||
|
||||
// Check that variable is in scope
|
||||
const VariableName& var = rand.asVariable();
|
||||
UnicodeString normalized = normalizeNFC(var);
|
||||
|
||||
// Check local scope
|
||||
if (localEnv.has(var)) {
|
||||
if (localEnv.has(normalized)) {
|
||||
return;
|
||||
}
|
||||
// Check global scope
|
||||
context.getGlobal(var, status);
|
||||
context.getGlobal(*this, normalized, status);
|
||||
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
context.getErrors().setUnresolvedVariable(var, status);
|
||||
|
@ -855,7 +866,10 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*&
|
|||
// memoizing the value of localEnv up to this point
|
||||
|
||||
// Add the LHS to the environment for checking the next declaration
|
||||
env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
|
||||
env = Environment::create(normalizeNFC(decl.getVariable()),
|
||||
Closure(rhs, *env),
|
||||
env,
|
||||
status);
|
||||
CHECK_ERROR(status);
|
||||
}
|
||||
}
|
||||
|
@ -866,3 +880,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -139,6 +141,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_UTILS_H
|
||||
|
|
|
@ -3,12 +3,16 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
||||
#include "unicode/messageformat2.h"
|
||||
#include "unicode/messageformat2_arguments.h"
|
||||
#include "unicode/messageformat2_data_model_names.h"
|
||||
#include "messageformat2_evaluation.h"
|
||||
#include "uvector.h" // U_ASSERT
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -22,11 +26,15 @@ namespace message2 {
|
|||
|
||||
using Arguments = MessageArguments;
|
||||
|
||||
const Formattable* Arguments::getArgument(const VariableName& arg, UErrorCode& errorCode) const {
|
||||
const Formattable* Arguments::getArgument(const MessageFormatter& context,
|
||||
const VariableName& arg,
|
||||
UErrorCode& errorCode) const {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
U_ASSERT(argsLen == 0 || arguments.isValid());
|
||||
for (int32_t i = 0; i < argsLen; i++) {
|
||||
if (argumentNames[i] == arg) {
|
||||
UnicodeString normalized = context.normalizeNFC(argumentNames[i]);
|
||||
// arg already assumed to be normalized
|
||||
if (normalized == arg) {
|
||||
return &arguments[i];
|
||||
}
|
||||
}
|
||||
|
@ -57,3 +65,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -3,12 +3,16 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
||||
#include "unicode/messageformat2.h"
|
||||
#include "messageformat2_allocation.h"
|
||||
#include "messageformat2_checker.h"
|
||||
#include "messageformat2_evaluation.h"
|
||||
#include "messageformat2_macros.h"
|
||||
#include "uvector.h" // U_ASSERT
|
||||
|
||||
|
@ -104,6 +108,13 @@ TypeEnvironment::~TypeEnvironment() {}
|
|||
|
||||
// ---------------------
|
||||
|
||||
UnicodeString Checker::normalizeNFC(const Key& k) const {
|
||||
if (k.isWildcard()) {
|
||||
return UnicodeString("*");
|
||||
}
|
||||
return context.normalizeNFC(k.asLiteral().unquoted());
|
||||
}
|
||||
|
||||
static bool areDefaultKeys(const Key* keys, int32_t len) {
|
||||
U_ASSERT(len > 0);
|
||||
for (int32_t i = 0; i < len; i++) {
|
||||
|
@ -185,7 +196,7 @@ void Checker::checkVariants(UErrorCode& status) {
|
|||
// This variant was already checked,
|
||||
// so we know keys1.len == len
|
||||
for (int32_t kk = 0; kk < len; kk++) {
|
||||
if (!(keys[kk] == keys1[kk])) {
|
||||
if (!(normalizeNFC(keys[kk]) == normalizeNFC(keys1[kk]))) {
|
||||
allEqual = false;
|
||||
break;
|
||||
}
|
||||
|
@ -312,3 +323,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -56,14 +58,19 @@ namespace message2 {
|
|||
// an explicit declaration
|
||||
}; // class TypeEnvironment
|
||||
|
||||
class MessageFormatter;
|
||||
|
||||
// Checks a data model for semantic errors
|
||||
// (Errors are defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md )
|
||||
class Checker {
|
||||
public:
|
||||
void check(UErrorCode&);
|
||||
Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {}
|
||||
Checker(const MFDataModel& d, StaticErrors& e, const MessageFormatter& mf)
|
||||
: dataModel(d), errors(e), context(mf) {}
|
||||
private:
|
||||
|
||||
UnicodeString normalizeNFC(const Key&) const;
|
||||
|
||||
void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
|
||||
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
|
||||
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
|
||||
|
@ -78,6 +85,9 @@ namespace message2 {
|
|||
void check(const Pattern&);
|
||||
const MFDataModel& dataModel;
|
||||
StaticErrors& errors;
|
||||
|
||||
// Used for NFC normalization
|
||||
const MessageFormatter& context;
|
||||
}; // class Checker
|
||||
|
||||
} // namespace message2
|
||||
|
@ -88,6 +98,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT_CHECKER_H
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -918,3 +920,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -290,3 +292,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
|
||||
*/
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -151,6 +153,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_ERRORS_H
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -190,13 +192,16 @@ PrioritizedVariant::~PrioritizedVariant() {}
|
|||
errors.checkErrors(status);
|
||||
}
|
||||
|
||||
const Formattable* MessageContext::getGlobal(const VariableName& v, UErrorCode& errorCode) const {
|
||||
return arguments.getArgument(v, errorCode);
|
||||
const Formattable* MessageContext::getGlobal(const MessageFormatter& context,
|
||||
const VariableName& v,
|
||||
UErrorCode& errorCode) const {
|
||||
return arguments.getArgument(context, v, errorCode);
|
||||
}
|
||||
|
||||
MessageContext::MessageContext(const MessageArguments& args,
|
||||
const StaticErrors& e,
|
||||
UErrorCode& status) : arguments(args), errors(e, status) {}
|
||||
|
||||
MessageContext::~MessageContext() {}
|
||||
|
||||
} // namespace message2
|
||||
|
@ -205,3 +210,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
* \file
|
||||
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
|
||||
*/
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
|
@ -174,11 +175,15 @@ namespace message2 {
|
|||
// The context contains all the information needed to process
|
||||
// an entire message: arguments, formatter cache, and error list
|
||||
|
||||
class MessageFormatter;
|
||||
|
||||
class MessageContext : public UMemory {
|
||||
public:
|
||||
MessageContext(const MessageArguments&, const StaticErrors&, UErrorCode&);
|
||||
|
||||
const Formattable* getGlobal(const VariableName&, UErrorCode&) const;
|
||||
const Formattable* getGlobal(const MessageFormatter&,
|
||||
const VariableName&,
|
||||
UErrorCode&) const;
|
||||
|
||||
// If any errors were set, update `status` accordingly
|
||||
void checkErrors(UErrorCode& status) const;
|
||||
|
@ -191,6 +196,7 @@ namespace message2 {
|
|||
const MessageArguments& arguments; // External message arguments
|
||||
// Errors accumulated during parsing/formatting
|
||||
DynamicErrors errors;
|
||||
|
||||
}; // class MessageContext
|
||||
|
||||
} // namespace message2
|
||||
|
@ -201,6 +207,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_EVALUATION_H
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -336,3 +338,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -116,6 +118,24 @@ namespace message2 {
|
|||
|
||||
// MessageFormatter
|
||||
|
||||
// Returns the NFC-normalized version of s, returning s itself
|
||||
// if it's already normalized.
|
||||
UnicodeString MessageFormatter::normalizeNFC(const UnicodeString& s) const {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Check if string is already normalized
|
||||
UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status);
|
||||
// If so, return it
|
||||
if (U_SUCCESS(status) && result == UNORM_YES) {
|
||||
return s;
|
||||
}
|
||||
// Otherwise, normalize it
|
||||
UnicodeString normalized = nfcNormalizer->normalize(s, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) {
|
||||
CHECK_ERROR(success);
|
||||
|
||||
|
@ -163,6 +183,8 @@ namespace message2 {
|
|||
errors = errorsNew.orphan();
|
||||
}
|
||||
|
||||
nfcNormalizer = Normalizer2::getNFCInstance(success);
|
||||
|
||||
// Note: we currently evaluate variables lazily,
|
||||
// without memoization. This call is still necessary
|
||||
// to check out-of-scope uses of local variables in
|
||||
|
@ -170,7 +192,7 @@ namespace message2 {
|
|||
// only be checked when arguments are known)
|
||||
|
||||
// Check for resolution errors
|
||||
Checker(dataModel, *errors).check(success);
|
||||
Checker(dataModel, *errors, *this).check(success);
|
||||
}
|
||||
|
||||
void MessageFormatter::cleanup() noexcept {
|
||||
|
@ -191,6 +213,7 @@ namespace message2 {
|
|||
signalErrors = other.signalErrors;
|
||||
errors = other.errors;
|
||||
other.errors = nullptr;
|
||||
nfcNormalizer = other.nfcNormalizer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -352,3 +375,5 @@ U_NAMESPACE_END
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -1242,3 +1244,4 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -226,6 +228,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_INTERNAL_H
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -97,6 +99,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_MACROS_H
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -1926,3 +1928,4 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -175,6 +177,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT_PARSER_H
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -285,3 +287,4 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -63,6 +65,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT_SERIALIZER_H
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -20,6 +22,7 @@
|
|||
#include "unicode/messageformat2_arguments.h"
|
||||
#include "unicode/messageformat2_data_model.h"
|
||||
#include "unicode/messageformat2_function_registry.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
@ -325,6 +328,8 @@ namespace message2 {
|
|||
|
||||
private:
|
||||
friend class Builder;
|
||||
friend class Checker;
|
||||
friend class MessageArguments;
|
||||
friend class MessageContext;
|
||||
|
||||
MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
|
||||
|
@ -352,6 +357,9 @@ namespace message2 {
|
|||
void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
|
||||
|
||||
// Formatting methods
|
||||
|
||||
// Used for normalizing variable names and keys for comparison
|
||||
UnicodeString normalizeNFC(const UnicodeString&) const;
|
||||
[[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
|
||||
void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
|
||||
// Formats a call to a formatting function
|
||||
|
@ -445,6 +453,10 @@ namespace message2 {
|
|||
// formatting methods return best-effort output.
|
||||
// The default is false.
|
||||
bool signalErrors = false;
|
||||
|
||||
// Used for implementing normalizeNFC()
|
||||
const Normalizer2* nfcNormalizer = nullptr;
|
||||
|
||||
}; // class MessageFormatter
|
||||
|
||||
} // namespace message2
|
||||
|
@ -457,6 +469,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_H
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -43,7 +45,7 @@ template class U_I18N_API LocalArray<message2::Formattable>;
|
|||
|
||||
namespace message2 {
|
||||
|
||||
class MessageContext;
|
||||
class MessageFormatter;
|
||||
|
||||
// Arguments
|
||||
// ----------
|
||||
|
@ -112,7 +114,9 @@ namespace message2 {
|
|||
private:
|
||||
friend class MessageContext;
|
||||
|
||||
const Formattable* getArgument(const data_model::VariableName&, UErrorCode&) const;
|
||||
const Formattable* getArgument(const MessageFormatter&,
|
||||
const data_model::VariableName&,
|
||||
UErrorCode&) const;
|
||||
|
||||
// Avoids using Hashtable so that code constructing a Hashtable
|
||||
// doesn't have to appear in this header file
|
||||
|
@ -131,6 +135,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_ARGUMENTS_H
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -2592,6 +2594,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT_DATA_MODEL_H
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -1010,6 +1012,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_FORMATTABLE_H
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -422,6 +424,8 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_H
|
||||
|
|
|
@ -288,8 +288,10 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
|
|||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
#if !UCONFIG_NO_MF2
|
||||
TESTCLASS(60,TestMessageFormat2);
|
||||
#endif
|
||||
#endif
|
||||
default: name = ""; break; //needed to end loop
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -400,3 +402,4 @@ TestCase::Builder::~Builder() {}
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#include "unicode/rep.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -188,4 +190,6 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -730,3 +732,5 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) {
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -157,3 +159,5 @@ void TestMessageFormat2::messageFormat1Tests() {
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -309,6 +311,9 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
|
|||
runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode);
|
||||
|
||||
// Other tests (non-spec)
|
||||
// TODO: Delete this file after https://github.com/unicode-org/message-format-wg/pull/904
|
||||
// lands and the tests here are updated from the spec repo
|
||||
runTestsFromJsonFile(*this, "normalization.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "more-functions.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "valid-tests.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "resolution-errors.json", errorCode);
|
||||
|
@ -358,3 +363,5 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
|
|||
#endif /* #if !UCONFIG_NO_MF2 */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
@ -344,4 +346,6 @@ U_NAMESPACE_END
|
|||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif
|
||||
|
|
67
testdata/message2/normalization.json
vendored
Normal file
67
testdata/message2/normalization.json
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
{
|
||||
"$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json",
|
||||
"scenario": "Syntax",
|
||||
"description": "Test cases that do not depend on any registry definitions.",
|
||||
"defaultTestProperties": {
|
||||
"locale": "en-US"
|
||||
},
|
||||
"tests": [
|
||||
{
|
||||
"description": "NFC: text is not normalized",
|
||||
"src": "\u1E0A\u0323",
|
||||
"exp": "\u1E0A\u0323"
|
||||
},
|
||||
{
|
||||
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
|
||||
"src": ".local $\u0044\u0323\u0307 = {foo} {{{$\u1E0c\u0307}}}",
|
||||
"exp": "foo"
|
||||
},
|
||||
{
|
||||
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
|
||||
"src": ".local $\u1E0c\u0307 = {foo} {{{$\u0044\u0323\u0307}}}",
|
||||
"exp": "foo"
|
||||
},
|
||||
{
|
||||
"description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't",
|
||||
"src": ".input {$\u1E0c\u0307} {{{$\u0044\u0323\u0307}}}",
|
||||
"params": [{"name": "\u1E0c\u0307", "value": "foo"}],
|
||||
"exp": "foo"
|
||||
},
|
||||
{
|
||||
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is",
|
||||
"src": ".input {$\u0044\u0323\u0307} {{{$\u1E0c\u0307}}}",
|
||||
"params": [{"name": "\u0044\u0323\u0307", "value": "foo"}],
|
||||
"exp": "foo"
|
||||
},
|
||||
{
|
||||
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; reordering",
|
||||
"src": ".local $\u0044\u0307\u0323 = {foo} {{{$\u1E0c\u0307}}}",
|
||||
"exp": "foo"
|
||||
},
|
||||
{
|
||||
"description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; special case mapping",
|
||||
"src": ".local $\u0041\u030A\u0301 = {foo} {{{$\u01FA}}}",
|
||||
"exp": "foo"
|
||||
},
|
||||
{
|
||||
"description": "NFC: keys are normalized",
|
||||
"src": ".local $x = {\u1E0C\u0307 :string} .match {$x} \u1E0A\u0323 {{Right}} * {{Wrong}}",
|
||||
"exp": "Right"
|
||||
},
|
||||
{
|
||||
"description": "NFC: keys are normalized (unquoted)",
|
||||
"src": ".local $x = {\u1E0A\u0323 :string} .match {$x} \u1E0A\u0323 {{Not normalized}} \u1E0C\u0307 {{Normalized}} * {{Wrong}}",
|
||||
"expErrors": [{"type": "duplicate-variant"}]
|
||||
},
|
||||
{
|
||||
"description": "NFC: keys are normalized (quoted)",
|
||||
"src": ".local $x = {\u1E0A\u0323 :string} .match {$x} |\u1E0A\u0323| {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
|
||||
"expErrors": [{"type": "duplicate-variant"}]
|
||||
},
|
||||
{
|
||||
"description": "NFC: keys are normalized (mixed)",
|
||||
"src": ".local $x = {\u1E0A\u0323 :string} .match {$x} \u1E0A\u0323 {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}",
|
||||
"expErrors": [{"type": "duplicate-variant"}]
|
||||
}
|
||||
]
|
||||
}
|
Loading…
Add table
Reference in a new issue