ICU-22940 MF2 ICU4C: Error checking improvements in parser

Improve checking for OOM errors when allocating UnicodeSets,
per post-merge comments on https://github.com/unicode-org/icu/pull/3236
This commit is contained in:
Tim Chevalier 2024-12-13 15:46:54 -08:00
parent bd50f8be32
commit f8aa68b0c1
2 changed files with 86 additions and 50 deletions

View file

@ -111,7 +111,7 @@ icu::UInitOnce gMF2ParseUniSetsInitOnce {};
UnicodeSet* initContentChars(UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
return nullptr;
}
UnicodeSet* result = new UnicodeSet(0x0001, 0x0008); // Omit NULL, HTAB and LF
@ -133,7 +133,7 @@ UnicodeSet* initContentChars(UErrorCode& status) {
UnicodeSet* initWhitespace(UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
return nullptr;
}
UnicodeSet* result = new UnicodeSet();
@ -153,7 +153,7 @@ UnicodeSet* initWhitespace(UErrorCode& status) {
UnicodeSet* initBidiControls(UErrorCode& status) {
UnicodeSet* result = new UnicodeSet(UnicodeString("[\\u061C]"), status);
if (U_FAILURE(status)) {
return {};
return nullptr;
}
result->add(0x200E, 0x200F);
result->add(0x2066, 0x2069);
@ -164,7 +164,7 @@ UnicodeSet* initBidiControls(UErrorCode& status) {
UnicodeSet* initAlpha(UErrorCode& status) {
UnicodeSet* result = new UnicodeSet(UnicodeString("[:letter:]"), status);
if (U_FAILURE(status)) {
return {};
return nullptr;
}
result->freeze();
return result;
@ -173,7 +173,7 @@ UnicodeSet* initAlpha(UErrorCode& status) {
UnicodeSet* initDigits(UErrorCode& status) {
UnicodeSet* result = new UnicodeSet(UnicodeString("[:number:]"), status);
if (U_FAILURE(status)) {
return {};
return nullptr;
}
result->freeze();
return result;
@ -181,10 +181,14 @@ UnicodeSet* initDigits(UErrorCode& status) {
UnicodeSet* initNameStartChars(UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
return nullptr;
}
UnicodeSet* result = new UnicodeSet(*unisets::getImpl(unisets::ALPHA));
UnicodeSet* isAlpha = unisets::gUnicodeSets[unisets::ALPHA] = initAlpha(status);
if (U_FAILURE(status)) {
return nullptr;
}
UnicodeSet* result = new UnicodeSet(*isAlpha);
if (result == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
@ -209,16 +213,21 @@ UnicodeSet* initNameStartChars(UErrorCode& status) {
UnicodeSet* initNameChars(UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
return nullptr;
}
UnicodeSet* nameStart = unisets::gUnicodeSets[unisets::NAME_START] = initNameStartChars(status);
UnicodeSet* digit = unisets::gUnicodeSets[unisets::DIGIT] = initDigits(status);
if (U_FAILURE(status)) {
return nullptr;
}
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
};
result->addAll(*unisets::getImpl(unisets::NAME_START));
result->addAll(*unisets::getImpl(unisets::DIGIT));
result->addAll(*nameStart);
result->addAll(*digit);
result->add(HYPHEN);
result->add(PERIOD);
result->add(0x00B7);
@ -230,16 +239,21 @@ UnicodeSet* initNameChars(UErrorCode& status) {
UnicodeSet* initTextChars(UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
return nullptr;
}
UnicodeSet* content = unisets::gUnicodeSets[unisets::CONTENT] = initContentChars(status);
UnicodeSet* whitespace = unisets::gUnicodeSets[unisets::WHITESPACE] = initWhitespace(status);
if (U_FAILURE(status)) {
return nullptr;
}
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
};
result->addAll(*unisets::getImpl(unisets::CONTENT));
result->addAll(*unisets::getImpl(unisets::WHITESPACE));
result->addAll(*content);
result->addAll(*whitespace);
result->add(PERIOD);
result->add(AT);
result->add(PIPE);
@ -249,16 +263,31 @@ UnicodeSet* initTextChars(UErrorCode& status) {
UnicodeSet* initQuotedChars(UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
return nullptr;
}
unisets::gUnicodeSets[unisets::TEXT] = initTextChars(status);
if (U_FAILURE(status)) {
return nullptr;
}
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
};
result->addAll(*unisets::getImpl(unisets::CONTENT));
result->addAll(*unisets::getImpl(unisets::WHITESPACE));
// content and whitespace were initialized by `initTextChars()`
UnicodeSet* content = unisets::getImpl(unisets::CONTENT);
if (content == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
result->addAll(*content);
UnicodeSet* whitespace = unisets::getImpl(unisets::WHITESPACE);
if (whitespace == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
result->addAll(*whitespace);
result->add(PERIOD);
result->add(AT);
result->add(LEFT_CURLY_BRACE);
@ -269,7 +298,7 @@ UnicodeSet* initQuotedChars(UErrorCode& status) {
UnicodeSet* initEscapableChars(UErrorCode& status) {
if (U_FAILURE(status)) {
return {};
return nullptr;
}
UnicodeSet* result = new UnicodeSet();
@ -298,26 +327,44 @@ UBool U_CALLCONV cleanupMF2ParseUniSets() {
void U_CALLCONV initMF2ParseUniSets(UErrorCode& status) {
ucln_i18n_registerCleanup(UCLN_I18N_MF2_UNISETS, cleanupMF2ParseUniSets);
/*
Each of the init functions initializes the UnicodeSets
that it depends on.
gUnicodeSets[unisets::CONTENT] = initContentChars(status);
gUnicodeSets[unisets::WHITESPACE] = initWhitespace(status);
initBidiControls (no dependencies)
initEscapableChars (no dependencies)
initNameChars depends on
initDigits
initNameStartChars depends on
initAlpha
initQuotedChars depends on
initTextChars depends on
initContentChars
initWhitespace
*/
gUnicodeSets[unisets::BIDI] = initBidiControls(status);
gUnicodeSets[unisets::ALPHA] = initAlpha(status);
gUnicodeSets[unisets::DIGIT] = initDigits(status);
gUnicodeSets[unisets::NAME_START] = initNameStartChars(status);
gUnicodeSets[unisets::NAME_CHAR] = initNameChars(status);
gUnicodeSets[unisets::TEXT] = initTextChars(status);
gUnicodeSets[unisets::QUOTED] = initQuotedChars(status);
gUnicodeSets[unisets::ESCAPABLE] = initEscapableChars(status);
if (U_FAILURE(status)) {
cleanupMF2ParseUniSets();
}
}
const UnicodeSet* get(Key key) {
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gMF2ParseUniSetsInitOnce, &initMF2ParseUniSets, localStatus);
if (U_FAILURE(localStatus)) {
const UnicodeSet* get(Key key, UErrorCode& status) {
umtx_initOnce(gMF2ParseUniSetsInitOnce, &initMF2ParseUniSets, status);
if (U_FAILURE(status)) {
return nullptr;
}
return getImpl(key);
UnicodeSet* result = getImpl(key);
if (result == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return result;
}
}

View file

@ -74,7 +74,7 @@ namespace message2 {
UNISETS_KEY_COUNT
};
U_I18N_API const UnicodeSet* get(Key key);
U_I18N_API const UnicodeSet* get(Key key, UErrorCode& status);
}
// Parser class (private)
@ -110,16 +110,16 @@ namespace message2 {
StaticErrors& e,
UnicodeString& normalizedInputRef,
UErrorCode& status)
: contentChars(unisets::get(unisets::CONTENT)),
whitespaceChars(unisets::get(unisets::WHITESPACE)),
bidiControlChars(unisets::get(unisets::BIDI)),
alphaChars(unisets::get(unisets::ALPHA)),
digitChars(unisets::get(unisets::DIGIT)),
nameStartChars(unisets::get(unisets::NAME_START)),
nameChars(unisets::get(unisets::NAME_CHAR)),
textChars(unisets::get(unisets::TEXT)),
quotedChars(unisets::get(unisets::QUOTED)),
escapableChars(unisets::get(unisets::ESCAPABLE)),
: contentChars(unisets::get(unisets::CONTENT, status)),
whitespaceChars(unisets::get(unisets::WHITESPACE, status)),
bidiControlChars(unisets::get(unisets::BIDI, status)),
alphaChars(unisets::get(unisets::ALPHA, status)),
digitChars(unisets::get(unisets::DIGIT, status)),
nameStartChars(unisets::get(unisets::NAME_START, status)),
nameChars(unisets::get(unisets::NAME_CHAR, status)),
textChars(unisets::get(unisets::TEXT, status)),
quotedChars(unisets::get(unisets::QUOTED, status)),
escapableChars(unisets::get(unisets::ESCAPABLE, status)),
source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
(void) status;
parseError.line = 0;
@ -129,17 +129,6 @@ namespace message2 {
parseError.postContext[0] = '\0';
}
UnicodeSet initContentChars(UErrorCode& status);
UnicodeSet initWhitespace(UErrorCode& status);
UnicodeSet initBidiControls(UErrorCode& status);
UnicodeSet initAlpha(UErrorCode& status);
UnicodeSet initDigits(UErrorCode& status);
UnicodeSet initNameStartChars(UErrorCode& status);
UnicodeSet initNameChars(UErrorCode& status);
UnicodeSet initTextChars(UErrorCode& status);
UnicodeSet initQuotedChars(UErrorCode& status);
UnicodeSet initEscapableChars(UErrorCode& status);
bool isContentChar(UChar32) const;
bool isBidiControl(UChar32) const;
bool isWhitespace(UChar32) const;