ICU-22261 Remove MF2 formatter caching optimization

The implementation was keeping a cache of FormatterFactory
    objects so that subsequent calls to the same formatter re-use the
    same object.
    The problem is that this is unsafe, because
    `MFFunctionRegistry::getFormatter()` returns a non-const `FormatterFactory*`;
    so if the caller deleted the resulting pointer, the formatter cache
    would contain a dangling pointer.
    
    This optimization was added because of an ICU4J test that checked for
    the presence of the optimization. However, for separate reasons
    (making `adoptFormatter()` actually adopt its argument), this test
    was already removed.
    
    The caching could be re-added later if that optimization is needed,
    but for now, remove it (also, no tests were checking for its presence).
This commit is contained in:
Tim Chevalier 2024-04-01 22:36:28 +00:00 committed by Elango Cheran
parent 43cde6bdbf
commit 4f0c2ca71c
5 changed files with 46 additions and 144 deletions

View file

@ -189,10 +189,25 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O
}
if (isFormatter(functionName)) {
const Formatter& formatterImpl = getFormatter(context, functionName, status);
LocalPointer<Formatter> formatterImpl(getFormatter(functionName, status));
if (U_FAILURE(status)) {
if (status == U_MF_FORMATTING_ERROR) {
errs.setFormattingError(functionName, status);
status = U_ZERO_ERROR;
return {};
}
if (status == U_MF_UNKNOWN_FUNCTION_ERROR) {
errs.setUnknownFunction(functionName, status);
status = U_ZERO_ERROR;
return {};
}
// Other errors are non-recoverable
return {};
}
U_ASSERT(formatterImpl != nullptr);
UErrorCode savedStatus = status;
FormattedPlaceholder result = formatterImpl.format(std::move(argument), std::move(options), status);
FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status);
// Update errors
if (savedStatus != status) {
if (U_FAILURE(status)) {

View file

@ -1,68 +0,0 @@
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT2_CACHED_FORMATTERS_H
#define MESSAGEFORMAT2_CACHED_FORMATTERS_H
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "hash.h"
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// Formatter cache
// --------------
class MessageFormatter;
// Map from function names to Formatters
class CachedFormatters : public UObject {
private:
friend class MessageFormatter;
// Maps stringified FunctionNames onto Formatter*
// Adopts its values
Hashtable cache;
CachedFormatters() { cache.setValueDeleter(uprv_deleteUObject); }
public:
// Returns a pointer because Formatter is an abstract class
const Formatter* getFormatter(const FunctionName& f) {
return static_cast<const Formatter*>(cache.get(f));
}
// Adopts its argument
void adoptFormatter(const FunctionName& f, Formatter* val, UErrorCode& status) {
cache.put(f, val, status);
}
CachedFormatters& operator=(const CachedFormatters&) = delete;
virtual ~CachedFormatters();
};
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT2_CACHED_FORMATTERS_H
#endif // U_HIDE_DEPRECATED_API
// eof

View file

@ -8,7 +8,6 @@
#if !UCONFIG_NO_MF2
#include "messageformat2_allocation.h"
#include "messageformat2_cached_formatters.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_macros.h"
#include "uvector.h" // U_ASSERT
@ -186,8 +185,6 @@ PrioritizedVariant::~PrioritizedVariant() {}
Closure::~Closure() {}
CachedFormatters::~CachedFormatters() {}
// MessageContext methods
void MessageContext::checkErrors(UErrorCode& status) const {

View file

@ -9,7 +9,6 @@
#include "unicode/messageformat2.h"
#include "messageformat2_allocation.h"
#include "messageformat2_cached_formatters.h"
#include "messageformat2_checker.h"
#include "messageformat2_errors.h"
#include "messageformat2_evaluation.h"
@ -138,13 +137,6 @@ namespace message2 {
errors = errorsNew.orphan();
}
// Initialize formatter cache
cachedFormatters = new CachedFormatters();
if (cachedFormatters == nullptr) {
success = U_MEMORY_ALLOCATION_ERROR;
return;
}
// Note: we currently evaluate variables lazily,
// without memoization. This call is still necessary
// to check out-of-scope uses of local variables in
@ -156,9 +148,6 @@ namespace message2 {
}
void MessageFormatter::cleanup() noexcept {
if (cachedFormatters != nullptr) {
delete cachedFormatters;
}
if (errors != nullptr) {
delete errors;
}
@ -172,8 +161,6 @@ namespace message2 {
customMFFunctionRegistry = other.customMFFunctionRegistry;
dataModel = std::move(other.dataModel);
normalizedInput = std::move(other.normalizedInput);
cachedFormatters = other.cachedFormatters;
other.cachedFormatters = nullptr;
errors = other.errors;
other.errors = nullptr;
return *this;
@ -219,10 +206,26 @@ namespace message2 {
return result;
}
// Precondition: formatter is defined
const Formatter& MessageFormatter::getFormatter(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
U_ASSERT(isFormatter(functionName));
return *maybeCachedFormatter(context, functionName, status);
// Returns an owned pointer
Formatter* MessageFormatter::getFormatter(const FunctionName& functionName, UErrorCode& status) const {
NULL_ON_ERROR(status);
// Create the formatter
// First, look up the formatter factory for this function
FormatterFactory* formatterFactory = lookupFormatterFactory(functionName, status);
NULL_ON_ERROR(status);
U_ASSERT(formatterFactory != nullptr);
// Create a specific instance of the formatter
Formatter* formatter = formatterFactory->createFormatter(locale, status);
NULL_ON_ERROR(status);
if (formatter == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
return formatter;
}
bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const {
@ -275,16 +278,15 @@ namespace message2 {
return nullptr;
}
// Returns non-owned pointer. Returns pointer rather than reference because it can fail.
// Returns non-const because FormatterFactory is mutable.
FormatterFactory* MessageFormatter::lookupFormatterFactory(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
DynamicErrors& err = context.getErrors();
FormatterFactory* MessageFormatter::lookupFormatterFactory(const FunctionName& functionName,
UErrorCode& status) const {
NULL_ON_ERROR(status);
if (isBuiltInFormatter(functionName)) {
return standardMFFunctionRegistry.getFormatter(functionName);
}
if (isBuiltInSelector(functionName)) {
err.setFormattingError(functionName, status);
status = U_MF_FORMATTING_ERROR;
return nullptr;
}
if (hasCustomMFFunctionRegistry()) {
@ -294,7 +296,7 @@ namespace message2 {
return formatterFactory;
}
if (customMFFunctionRegistry.getSelector(functionName) != nullptr) {
err.setFormattingError(functionName, status);
status = U_MF_FORMATTING_ERROR;
return nullptr;
}
}
@ -302,7 +304,7 @@ namespace message2 {
// isn't built-in, or the function doesn't exist in either the built-in
// or custom registry.
// Unknown function error
err.setUnknownFunction(functionName, status);
status = U_MF_UNKNOWN_FUNCTION_ERROR;
return nullptr;
}
@ -315,39 +317,6 @@ namespace message2 {
return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getSelector(fn) != nullptr;
}
const Formatter* MessageFormatter::maybeCachedFormatter(MessageContext& context, const FunctionName& functionName, UErrorCode& errorCode) const {
NULL_ON_ERROR(errorCode);
U_ASSERT(cachedFormatters != nullptr);
const Formatter* result = cachedFormatters->getFormatter(functionName);
if (result == nullptr) {
// Create the formatter
// First, look up the formatter factory for this function
FormatterFactory* formatterFactory = lookupFormatterFactory(context, functionName, errorCode);
NULL_ON_ERROR(errorCode);
// If the formatter factory was null, there must have been
// an earlier error/warning
if (formatterFactory == nullptr) {
U_ASSERT(context.getErrors().hasUnknownFunctionError() || context.getErrors().hasFormattingError());
return nullptr;
}
// Create a specific instance of the formatter
Formatter* formatter = formatterFactory->createFormatter(locale, errorCode);
NULL_ON_ERROR(errorCode);
if (formatter == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
cachedFormatters->adoptFormatter(functionName, formatter, errorCode);
return formatter;
} else {
return result;
}
}
} // namespace message2
U_NAMESPACE_END

View file

@ -28,7 +28,6 @@ U_NAMESPACE_BEGIN
namespace message2 {
class CachedFormatters;
class Environment;
class MessageContext;
class ResolvedSelector;
@ -328,17 +327,17 @@ namespace message2 {
const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
bool isCustomFormatter(const FunctionName&) const;
FormatterFactory* lookupFormatterFactory(MessageContext&, const FunctionName&, UErrorCode& status) const;
FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
bool isBuiltInSelector(const FunctionName&) const;
bool isBuiltInFormatter(const FunctionName&) const;
bool isCustomSelector(const FunctionName&) const;
const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
const Formatter* maybeCachedFormatter(MessageContext&, const FunctionName&, UErrorCode&) const;
const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
const Formatter& getFormatter(MessageContext&, const FunctionName&, UErrorCode&) const;
Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
// Checking for resolution errors
@ -374,16 +373,6 @@ namespace message2 {
// Normalized version of the input string (optional whitespace removed)
UnicodeString normalizedInput;
// Formatter cache
// Must be a raw pointer to avoid including the internal header file
// defining CachedFormatters
// Owned by `this`
// TODO: This is an optimization that the "TemperatureFormatter" test
// (ported from ICU4J) was checking for; however, that test was removed
// in order to make `setFormatter()` safe, so maybe this should be
// removed too
CachedFormatters* cachedFormatters;
// Errors -- only used while parsing and checking for data model errors; then
// the MessageContext keeps track of errors
// Must be a raw pointer to avoid including the internal header file