ICU-22762 MF2: Add builder method to control error handling behavior

Add MessageFormatter::Builder::setErrorHandlingBehavior() method
and a new enum type MessageFormatter::UMFErrorHandlingBehavior
to denote strict or best-effort behavior.

The reason for adding a single method that takes an enum is to allow
for the possibility of more error handling modes in the future.

Co-authored-by: Markus Scherer <markus.icu@gmail.com>
This commit is contained in:
Tim Chevalier 2024-08-02 12:57:11 -07:00
parent ac737dabcf
commit 23edf9cca6
10 changed files with 289 additions and 72 deletions

View file

@ -814,7 +814,12 @@ UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments
}
}
// Update status according to all errors seen while formatting
context.checkErrors(status);
if (signalErrors) {
context.checkErrors(status);
}
if (U_FAILURE(status)) {
result.remove();
}
return result;
}
@ -869,7 +874,7 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*&
CHECK_ERROR(status);
const Binding* decls = getDataModel().getLocalVariablesInternal();
U_ASSERT(env != nullptr && decls != nullptr);
U_ASSERT(env != nullptr && (decls != nullptr || getDataModel().bindingsLen == 0));
for (int32_t i = 0; i < getDataModel().bindingsLen; i++) {
const Binding& decl = decls[i];

View file

@ -930,9 +930,10 @@ const Pattern& MFDataModel::getPattern() const {
return *(std::get_if<Pattern>(&body));
}
// Returns nullptr if no bindings
const Binding* MFDataModel::getLocalVariablesInternal() const {
U_ASSERT(!bogus);
U_ASSERT(bindings.isValid());
U_ASSERT(bindingsLen == 0 || bindings.isValid());
return bindings.getAlias();
}
@ -948,9 +949,10 @@ const Variant* MFDataModel::getVariantsInternal() const {
return std::get_if<Matcher>(&body)->variants.getAlias();
}
// Returns nullptr if no unsupported statements
const UnsupportedStatement* MFDataModel::getUnsupportedStatementsInternal() const {
U_ASSERT(!bogus);
U_ASSERT(unsupportedStatements.isValid());
U_ASSERT(unsupportedStatementsLen == 0 || unsupportedStatements != nullptr);
return unsupportedStatements.getAlias();
}
@ -1056,7 +1058,6 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
UErrorCode localErrorCode = U_ZERO_ERROR;
if (other.hasPattern()) {
// body.emplace<Pattern>(Pattern(*std::get_if<Pattern>(&other.body)));
body = *std::get_if<Pattern>(&other.body);
} else {
const Expression* otherSelectors = other.getSelectorsInternal();
@ -1069,17 +1070,17 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
bogus = true;
return;
}
// body.emplace<Matcher>(Matcher(copiedSelectors, numSelectors, copiedVariants, numVariants));
body = Matcher(copiedSelectors, numSelectors, copiedVariants, numVariants);
}
bindingsLen = other.bindingsLen;
bindings.adoptInstead(copyArray(other.bindings.getAlias(), bindingsLen, localErrorCode));
if (U_FAILURE(localErrorCode)) {
bogus = true;
if (bindingsLen > 0) {
bindings.adoptInstead(copyArray(other.bindings.getAlias(), bindingsLen, localErrorCode));
}
unsupportedStatementsLen = other.unsupportedStatementsLen;
unsupportedStatements.adoptInstead(copyArray(other.unsupportedStatements.getAlias(), unsupportedStatementsLen, localErrorCode));
if (unsupportedStatementsLen > 0) {
unsupportedStatements.adoptInstead(copyArray(other.unsupportedStatements.getAlias(), unsupportedStatementsLen, localErrorCode));
}
if (U_FAILURE(localErrorCode)) {
bogus = true;
}
@ -1106,9 +1107,14 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC
U_ASSERT(builder.bindings != nullptr);
bindingsLen = builder.bindings->size();
bindings.adoptInstead(copyVectorToArray<Binding>(*builder.bindings, errorCode));
if (bindingsLen > 0) {
bindings.adoptInstead(copyVectorToArray<Binding>(*builder.bindings, errorCode));
}
unsupportedStatementsLen = builder.unsupportedStatements->size();
unsupportedStatements.adoptInstead(copyVectorToArray<UnsupportedStatement>(*builder.unsupportedStatements, errorCode));
if (unsupportedStatementsLen > 0) {
unsupportedStatements.adoptInstead(copyVectorToArray<UnsupportedStatement>(*builder.unsupportedStatements,
errorCode));
}
if (U_FAILURE(errorCode)) {
bogus = true;
}

View file

@ -121,41 +121,10 @@ namespace message2 {
if (count() == 0) {
return;
}
if (staticErrors.syntaxAndDataModelErrors->size() > 0) {
switch (staticErrors.first().type) {
case StaticErrorType::DuplicateDeclarationError: {
status = U_MF_DUPLICATE_DECLARATION_ERROR;
break;
}
case StaticErrorType::DuplicateOptionName: {
status = U_MF_DUPLICATE_OPTION_NAME_ERROR;
break;
}
case StaticErrorType::VariantKeyMismatchError: {
status = U_MF_VARIANT_KEY_MISMATCH_ERROR;
break;
}
case StaticErrorType::DuplicateVariant: {
status = U_MF_DUPLICATE_VARIANT_ERROR;
break;
}
case StaticErrorType::NonexhaustivePattern: {
status = U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
break;
}
case StaticErrorType::MissingSelectorAnnotation: {
status = U_MF_MISSING_SELECTOR_ANNOTATION_ERROR;
break;
}
case StaticErrorType::SyntaxError: {
status = U_MF_SYNTAX_ERROR;
break;
}
case StaticErrorType::UnsupportedStatementError: {
status = U_MF_UNSUPPORTED_STATEMENT_ERROR;
}
}
} else {
staticErrors.checkErrors(status);
if (U_FAILURE(status)) {
return;
}
U_ASSERT(resolutionAndFormattingErrors->size() > 0);
switch (first().type) {
case DynamicErrorType::UnknownFunction: {
@ -183,7 +152,6 @@ namespace message2 {
break;
}
}
}
}
void StaticErrors::addSyntaxError(UErrorCode& status) {
@ -277,6 +245,47 @@ namespace message2 {
}
}
void StaticErrors::checkErrors(UErrorCode& status) const {
if (U_FAILURE(status)) {
return;
}
if (syntaxAndDataModelErrors->size() > 0) {
switch (first().type) {
case StaticErrorType::DuplicateDeclarationError: {
status = U_MF_DUPLICATE_DECLARATION_ERROR;
break;
}
case StaticErrorType::DuplicateOptionName: {
status = U_MF_DUPLICATE_OPTION_NAME_ERROR;
break;
}
case StaticErrorType::VariantKeyMismatchError: {
status = U_MF_VARIANT_KEY_MISMATCH_ERROR;
break;
}
case StaticErrorType::DuplicateVariant: {
status = U_MF_DUPLICATE_VARIANT_ERROR;
break;
}
case StaticErrorType::NonexhaustivePattern: {
status = U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
break;
}
case StaticErrorType::MissingSelectorAnnotation: {
status = U_MF_MISSING_SELECTOR_ANNOTATION_ERROR;
break;
}
case StaticErrorType::SyntaxError: {
status = U_MF_SYNTAX_ERROR;
break;
}
case StaticErrorType::UnsupportedStatementError: {
status = U_MF_UNSUPPORTED_STATEMENT_ERROR;
}
}
}
}
const StaticError& StaticErrors::first() const {
U_ASSERT(syntaxAndDataModelErrors.isValid() && syntaxAndDataModelErrors->size() > 0);
return *static_cast<StaticError*>(syntaxAndDataModelErrors->elementAt(0));

View file

@ -100,8 +100,9 @@ namespace message2 {
bool hasSyntaxError() const { return syntaxError; }
bool hasMissingSelectorAnnotationError() const { return missingSelectorAnnotationError; }
void addError(StaticError&&, UErrorCode&);
void checkErrors(UErrorCode&);
void checkErrors(UErrorCode&) const;
void clear();
const StaticError& first() const;
StaticErrors(const StaticErrors&, UErrorCode&);
StaticErrors(StaticErrors&&) noexcept;

View file

@ -27,12 +27,31 @@ namespace message2 {
// -------------------------------------
// Creates a MessageFormat instance based on the pattern.
MessageFormatter::Builder& MessageFormatter::Builder::setPattern(const UnicodeString& pat, UParseError& parseError, UErrorCode& errorCode) {
void MessageFormatter::Builder::clearState() {
normalizedInput.remove();
delete errors;
errors = nullptr;
}
MessageFormatter::Builder& MessageFormatter::Builder::setPattern(const UnicodeString& pat,
UParseError& parseError,
UErrorCode& errorCode) {
clearState();
// Create errors
errors = create<StaticErrors>(StaticErrors(errorCode), errorCode);
THIS_ON_ERROR(errorCode);
// Parse the pattern
MFDataModel::Builder tree(errorCode);
Parser(pat, tree, *errors, normalizedInput).parse(parseError, errorCode);
// Fail on syntax errors
if (errors->hasSyntaxError()) {
errors->checkErrors(errorCode);
// Check that the checkErrors() method set the error code
U_ASSERT(U_FAILURE(errorCode));
}
// Build the data model based on what was parsed
dataModel = tree.build(errorCode);
hasDataModel = true;
@ -55,9 +74,7 @@ namespace message2 {
}
MessageFormatter::Builder& MessageFormatter::Builder::setDataModel(MFDataModel&& newDataModel) {
normalizedInput.remove();
delete errors;
errors = nullptr;
clearState();
hasPattern = false;
hasDataModel = true;
dataModel = std::move(newDataModel);
@ -65,6 +82,13 @@ namespace message2 {
return *this;
}
MessageFormatter::Builder&
MessageFormatter::Builder::setErrorHandlingBehavior(
MessageFormatter::UMFErrorHandlingBehavior type) {
signalErrors = type == U_MF_STRICT;
return *this;
}
/*
This build() method is non-destructive, which entails the risk that
its borrowed MFFunctionRegistry and (if the setDataModel() method was called)
@ -86,6 +110,7 @@ namespace message2 {
MessageFormatter::Builder::~Builder() {
if (errors != nullptr) {
delete errors;
errors = nullptr;
}
}
@ -116,6 +141,7 @@ namespace message2 {
standardMFFunctionRegistry.checkStandard();
normalizedInput = builder.normalizedInput;
signalErrors = builder.signalErrors;
// Build data model
// First, check that there is a data model
@ -162,6 +188,7 @@ namespace message2 {
customMFFunctionRegistry = other.customMFFunctionRegistry;
dataModel = std::move(other.dataModel);
normalizedInput = std::move(other.normalizedInput);
signalErrors = other.signalErrors;
errors = other.errors;
other.errors = nullptr;
return *this;

View file

@ -248,7 +248,7 @@ void Serializer::emit(const Pattern& pat) {
void Serializer::serializeDeclarations() {
const Binding* bindings = dataModel.getLocalVariablesInternal();
U_ASSERT(bindings != nullptr);
U_ASSERT(dataModel.bindingsLen == 0 || bindings != nullptr);
for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
const Binding& b = bindings[i];
@ -272,7 +272,7 @@ void Serializer::serializeDeclarations() {
void Serializer::serializeUnsupported() {
const UnsupportedStatement* statements = dataModel.getUnsupportedStatementsInternal();
U_ASSERT(statements != nullptr);
U_ASSERT(dataModel.unsupportedStatementsLen == 0 || statements != nullptr);
for (int32_t i = 0; i < dataModel.unsupportedStatementsLen; i++) {
const UnsupportedStatement& s = statements[i];

View file

@ -139,6 +139,31 @@ namespace message2 {
*/
const MFDataModel& getDataModel() const;
/**
* Used in conjunction with the
* MessageFormatter::Builder::setErrorHandlingBehavior() method.
*
* @internal ICU 76 technology preview
* @deprecated This API is for technology preview only.
*/
typedef enum UMFErrorHandlingBehavior {
/**
* Suppress errors and return best-effort output.
*
* @internal ICU 76 technology preview
* @deprecated This API is for technology preview only.
*/
U_MF_BEST_EFFORT = 0,
/**
* Signal all MessageFormat errors using the UErrorCode
* argument.
*
* @internal ICU 76 technology preview
* @deprecated This API is for technology preview only.
*/
U_MF_STRICT
} UMFErrorHandlingBehavior;
/**
* The mutable Builder class allows each part of the MessageFormatter to be initialized
* separately; calling its `build()` method yields an immutable MessageFormatter.
@ -166,7 +191,10 @@ namespace message2 {
Locale locale;
// Not owned
const MFFunctionRegistry* customMFFunctionRegistry;
// Error behavior; see comment in `MessageFormatter` class
bool signalErrors = false;
void clearState();
public:
/**
* Sets the locale to use for formatting.
@ -218,6 +246,36 @@ namespace message2 {
* @deprecated This API is for technology preview only.
*/
Builder& setDataModel(MFDataModel&& dataModel);
/**
* Set the error handling behavior for this formatter.
*
* "Strict" error behavior means that that formatting methods
* will set their UErrorCode arguments to signal MessageFormat
* data model, resolution, and runtime errors. Syntax errors are
* always signaled.
*
* "Best effort" error behavior means that MessageFormat errors are
* suppressed: formatting methods will _not_ set their
* UErrorCode arguments to signal MessageFormat data model,
* resolution, or runtime errors. Best-effort output
* will be returned. Syntax errors are always signaled.
* This is the default behavior.
*
* @param type An enum with type UMFErrorHandlingBehavior;
* if type == `U_MF_STRICT`, then
* errors are handled strictly.
* If type == `U_MF_BEST_EFFORT`, then
* best-effort output is returned.
*
* The default is to suppress all MessageFormat errors
* and return best-effort output.
*
* @return A reference to the builder.
*
* @internal ICU 76 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
/**
* Constructs a new immutable MessageFormatter using the pattern or data model
* that was previously set, and the locale (if it was previously set)
@ -378,8 +436,15 @@ namespace message2 {
// Must be a raw pointer to avoid including the internal header file
// defining StaticErrors
// Owned by `this`
StaticErrors* errors;
StaticErrors* errors = nullptr;
// Error handling behavior.
// If true, then formatting methods set their UErrorCode arguments
// to signal MessageFormat errors, and no useful output is returned.
// If false, then MessageFormat errors are not signaled and the
// formatting methods return best-effort output.
// The default is false.
bool signalErrors = false;
}; // class MessageFormatter
} // namespace message2

View file

@ -30,6 +30,7 @@ TestMessageFormat2::runIndexedTest(int32_t index, UBool exec,
TESTCASE_AUTO(testAPI);
TESTCASE_AUTO(testAPISimple);
TESTCASE_AUTO(testDataModelAPI);
TESTCASE_AUTO(testFormatterAPI);
TESTCASE_AUTO(dataDrivenTests);
TESTCASE_AUTO_END;
}
@ -64,6 +65,77 @@ void TestMessageFormat2::testDataModelAPI() {
assertEquals("testDataModelAPI", i, 3);
}
// Needs more tests
void TestMessageFormat2::testFormatterAPI() {
IcuTestErrorCode errorCode(*this, "testFormatterAPI");
UnicodeString result;
UParseError parseError;
// Check that constructing the formatter fails
// if there's a syntax error
UnicodeString pattern = "{{}";
MessageFormatter::Builder mfBuilder(errorCode);
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_BEST_EFFORT); // This shouldn't matter, since there's a syntax error
mfBuilder.setPattern(pattern, parseError, errorCode);
MessageFormatter mf = mfBuilder.build(errorCode);
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR,
"testFormatterAPI: expected syntax error, best-effort error handling");
// Parsing is done when setPattern() is called,
// so setErrorHandlingBehavior(MessageFormatter::U_MF_STRICT) or setSuppressErrors must be called
// _before_ setPattern() to get the right behavior,
// and if either method is called after setting a pattern,
// setPattern() has to be called again.
// Should get the same behavior with strict errors
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_STRICT);
// Force re-parsing, as above comment
mfBuilder.setPattern(pattern, parseError, errorCode);
mf = mfBuilder.build(errorCode);
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR,
"testFormatterAPI: expected syntax error, strict error handling");
// Try the same thing for a pattern with a resolution error
pattern = "{{{$x}}}";
// Check that a pattern with a resolution error gives fallback output
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_BEST_EFFORT);
mfBuilder.setPattern(pattern, parseError, errorCode);
mf = mfBuilder.build(errorCode);
errorCode.errIfFailureAndReset("testFormatterAPI: expected success from builder, best-effort error handling");
result = mf.formatToString(MessageArguments(), errorCode);
errorCode.errIfFailureAndReset("testFormatterAPI: expected success from formatter, best-effort error handling");
assertEquals("testFormatterAPI: fallback for message with unresolved variable",
result, "{$x}");
// Check that we do get an error with strict errors
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_STRICT);
mf = mfBuilder.build(errorCode);
errorCode.errIfFailureAndReset("testFormatterAPI: builder should succeed with resolution error");
result = mf.formatToString(MessageArguments(), errorCode);
errorCode.expectErrorAndReset(U_MF_UNRESOLVED_VARIABLE_ERROR,
"testFormatterAPI: formatting should fail with resolution error and strict error handling");
// Finally, check a valid pattern
pattern = "hello";
mfBuilder.setPattern(pattern, parseError, errorCode);
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_BEST_EFFORT);
mf = mfBuilder.build(errorCode);
errorCode.errIfFailureAndReset("testFormatterAPI: expected success from builder with valid pattern, best-effort error handling");
result = mf.formatToString(MessageArguments(), errorCode);
errorCode.errIfFailureAndReset("testFormatterAPI: expected success from formatter with valid pattern, best-effort error handling");
assertEquals("testFormatterAPI: wrong output with valid pattern, best-effort error handling",
result, "hello");
// Check that behavior is the same with strict errors
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_STRICT);
mf = mfBuilder.build(errorCode);
errorCode.errIfFailureAndReset("testFormatterAPI: expected success from builder with valid pattern, strict error handling");
result = mf.formatToString(MessageArguments(), errorCode);
errorCode.errIfFailureAndReset("testFormatterAPI: expected success from formatter with valid pattern, strict error handling");
assertEquals("testFormatterAPI: wrong output with valid pattern, strict error handling",
result, "hello");
}
// Example for design doc -- version without null and error checks
void TestMessageFormat2::testAPISimple() {
IcuTestErrorCode errorCode1(*this, "testAPI");
@ -216,9 +288,11 @@ void TestMessageFormat2::testAPICustomFunctions() {
MessageFormatter::Builder mfBuilder(errorCode);
UnicodeString result;
// This fails, because we did not provide a function registry:
MessageFormatter mf = mfBuilder.setPattern("Hello {$name :person formality=informal}", parseError, errorCode)
.setLocale(locale)
.build(errorCode);
MessageFormatter mf = mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_STRICT)
.setPattern("Hello {$name :person formality=informal}",
parseError, errorCode)
.setLocale(locale)
.build(errorCode);
result = mf.formatToString(arguments, errorCode);
assertEquals("testAPICustomFunctions", U_MF_UNKNOWN_FUNCTION_ERROR, errorCode);

View file

@ -45,6 +45,8 @@ public:
void testCustomFunctions(void);
// Test the data model API
void testDataModelAPI(void);
// Test the formatting API
void testFormatterAPI(void);
void testAPI(void);
void testAPISimple(void);

View file

@ -228,22 +228,32 @@ class TestUtils {
if (testCase.hasCustomRegistry()) {
mfBuilder.setFunctionRegistry(*testCase.getCustomRegistry());
}
// Initially, set error behavior to strict.
// We'll re-run to check for errors.
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_STRICT);
MessageFormatter mf = mfBuilder.build(errorCode);
UnicodeString result;
// Builder should fail if a syntax error was expected
if (!testCase.expectSuccess() && testCase.expectedErrorCode() == U_MF_SYNTAX_ERROR) {
if (errorCode != testCase.expectedErrorCode()) {
failExpectedFailure(tmsg, testCase, errorCode);
}
errorCode.reset();
return;
}
if (U_SUCCESS(errorCode)) {
result = mf.formatToString(MessageArguments(testCase.getArguments(), errorCode), errorCode);
}
if (testCase.expectSuccess() || (testCase.expectedErrorCode() != U_MF_SYNTAX_ERROR
// For now, don't round-trip messages with these errors,
// since duplicate options are dropped
&& testCase.expectedErrorCode() != U_MF_DUPLICATE_OPTION_NAME_ERROR)) {
const UnicodeString& in = mf.getNormalizedPattern();
UnicodeString out;
if (!roundTrip(in, mf.getDataModel(), out)) {
failRoundTrip(tmsg, testCase, in, out);
}
const UnicodeString& in = mf.getNormalizedPattern();
UnicodeString out;
if (!roundTrip(in, mf.getDataModel(), out)
// For now, don't round-trip messages with these errors,
// since duplicate options are dropped
&& testCase.expectedErrorCode() != U_MF_DUPLICATE_OPTION_NAME_ERROR) {
failRoundTrip(tmsg, testCase, in, out);
}
if (testCase.expectNoSyntaxError()) {
@ -264,9 +274,27 @@ class TestUtils {
if (!testCase.lineNumberAndOffsetMatch(parseError.line, parseError.offset)) {
failWrongOffset(tmsg, testCase, parseError.line, parseError.offset);
}
if (!testCase.outputMatches(result)) {
failWrongOutput(tmsg, testCase, result);
return;
if (U_FAILURE(errorCode) && !testCase.expectSuccess()
&& testCase.expectedErrorCode() != U_MF_SYNTAX_ERROR) {
// Re-run the formatter if there was an error,
// in order to get best-effort output
errorCode.reset();
mfBuilder.setErrorHandlingBehavior(MessageFormatter::U_MF_BEST_EFFORT);
mf = mfBuilder.build(errorCode);
if (U_SUCCESS(errorCode)) {
result = mf.formatToString(MessageArguments(testCase.getArguments(), errorCode), errorCode);
}
if (U_FAILURE(errorCode)) {
// Must be a non-MF2 error code
U_ASSERT(!(errorCode >= U_MF_UNRESOLVED_VARIABLE_ERROR
&& errorCode <= U_FMT_PARSE_ERROR_LIMIT));
}
// Re-run the formatter
result = mf.formatToString(MessageArguments(testCase.getArguments(), errorCode), errorCode);
if (!testCase.outputMatches(result)) {
failWrongOutput(tmsg, testCase, result);
return;
}
}
errorCode.reset();
}
@ -278,7 +306,7 @@ class TestUtils {
static void failSyntaxError(IntlTest& tmsg, const TestCase& testCase) {
tmsg.dataerrln(testCase.getTestName());
tmsg.logln(testCase.getTestName() + " failed test with pattern: " + testCase.getPattern() + " and error code U_MF_SYNTAX_WARNING; expected no syntax error");
tmsg.logln(testCase.getTestName() + " failed test with pattern: " + testCase.getPattern() + " and error code U_MF_SYNTAX_ERROR; expected no syntax error");
}
static void failExpectedSuccess(IntlTest& tmsg, const TestCase& testCase, IcuTestErrorCode& errorCode, int32_t line, int32_t offset) {