mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
ICU-22940 MF2 ICU4C: Error checking improvements in parser
Improve checking for OOM errors when allocating UnicodeSets, per post-merge comments on https://github.com/unicode-org/icu/pull/3236
This commit is contained in:
parent
bd50f8be32
commit
f8aa68b0c1
2 changed files with 86 additions and 50 deletions
|
@ -111,7 +111,7 @@ icu::UInitOnce gMF2ParseUniSetsInitOnce {};
|
|||
|
||||
UnicodeSet* initContentChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet(0x0001, 0x0008); // Omit NULL, HTAB and LF
|
||||
|
@ -133,7 +133,7 @@ UnicodeSet* initContentChars(UErrorCode& status) {
|
|||
|
||||
UnicodeSet* initWhitespace(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
|
@ -153,7 +153,7 @@ UnicodeSet* initWhitespace(UErrorCode& status) {
|
|||
UnicodeSet* initBidiControls(UErrorCode& status) {
|
||||
UnicodeSet* result = new UnicodeSet(UnicodeString("[\\u061C]"), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
result->add(0x200E, 0x200F);
|
||||
result->add(0x2066, 0x2069);
|
||||
|
@ -164,7 +164,7 @@ UnicodeSet* initBidiControls(UErrorCode& status) {
|
|||
UnicodeSet* initAlpha(UErrorCode& status) {
|
||||
UnicodeSet* result = new UnicodeSet(UnicodeString("[:letter:]"), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
result->freeze();
|
||||
return result;
|
||||
|
@ -173,7 +173,7 @@ UnicodeSet* initAlpha(UErrorCode& status) {
|
|||
UnicodeSet* initDigits(UErrorCode& status) {
|
||||
UnicodeSet* result = new UnicodeSet(UnicodeString("[:number:]"), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
result->freeze();
|
||||
return result;
|
||||
|
@ -181,10 +181,14 @@ UnicodeSet* initDigits(UErrorCode& status) {
|
|||
|
||||
UnicodeSet* initNameStartChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet(*unisets::getImpl(unisets::ALPHA));
|
||||
UnicodeSet* isAlpha = unisets::gUnicodeSets[unisets::ALPHA] = initAlpha(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
UnicodeSet* result = new UnicodeSet(*isAlpha);
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
|
@ -209,16 +213,21 @@ UnicodeSet* initNameStartChars(UErrorCode& status) {
|
|||
|
||||
UnicodeSet* initNameChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UnicodeSet* nameStart = unisets::gUnicodeSets[unisets::NAME_START] = initNameStartChars(status);
|
||||
UnicodeSet* digit = unisets::gUnicodeSets[unisets::DIGIT] = initDigits(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->addAll(*unisets::getImpl(unisets::NAME_START));
|
||||
result->addAll(*unisets::getImpl(unisets::DIGIT));
|
||||
result->addAll(*nameStart);
|
||||
result->addAll(*digit);
|
||||
result->add(HYPHEN);
|
||||
result->add(PERIOD);
|
||||
result->add(0x00B7);
|
||||
|
@ -230,16 +239,21 @@ UnicodeSet* initNameChars(UErrorCode& status) {
|
|||
|
||||
UnicodeSet* initTextChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UnicodeSet* content = unisets::gUnicodeSets[unisets::CONTENT] = initContentChars(status);
|
||||
UnicodeSet* whitespace = unisets::gUnicodeSets[unisets::WHITESPACE] = initWhitespace(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->addAll(*unisets::getImpl(unisets::CONTENT));
|
||||
result->addAll(*unisets::getImpl(unisets::WHITESPACE));
|
||||
result->addAll(*content);
|
||||
result->addAll(*whitespace);
|
||||
result->add(PERIOD);
|
||||
result->add(AT);
|
||||
result->add(PIPE);
|
||||
|
@ -249,16 +263,31 @@ UnicodeSet* initTextChars(UErrorCode& status) {
|
|||
|
||||
UnicodeSet* initQuotedChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
unisets::gUnicodeSets[unisets::TEXT] = initTextChars(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->addAll(*unisets::getImpl(unisets::CONTENT));
|
||||
result->addAll(*unisets::getImpl(unisets::WHITESPACE));
|
||||
// content and whitespace were initialized by `initTextChars()`
|
||||
UnicodeSet* content = unisets::getImpl(unisets::CONTENT);
|
||||
if (content == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
result->addAll(*content);
|
||||
UnicodeSet* whitespace = unisets::getImpl(unisets::WHITESPACE);
|
||||
if (whitespace == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
result->addAll(*whitespace);
|
||||
result->add(PERIOD);
|
||||
result->add(AT);
|
||||
result->add(LEFT_CURLY_BRACE);
|
||||
|
@ -269,7 +298,7 @@ UnicodeSet* initQuotedChars(UErrorCode& status) {
|
|||
|
||||
UnicodeSet* initEscapableChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
|
@ -298,26 +327,44 @@ UBool U_CALLCONV cleanupMF2ParseUniSets() {
|
|||
|
||||
void U_CALLCONV initMF2ParseUniSets(UErrorCode& status) {
|
||||
ucln_i18n_registerCleanup(UCLN_I18N_MF2_UNISETS, cleanupMF2ParseUniSets);
|
||||
/*
|
||||
Each of the init functions initializes the UnicodeSets
|
||||
that it depends on.
|
||||
|
||||
gUnicodeSets[unisets::CONTENT] = initContentChars(status);
|
||||
gUnicodeSets[unisets::WHITESPACE] = initWhitespace(status);
|
||||
initBidiControls (no dependencies)
|
||||
|
||||
initEscapableChars (no dependencies)
|
||||
|
||||
initNameChars depends on
|
||||
initDigits
|
||||
initNameStartChars depends on
|
||||
initAlpha
|
||||
|
||||
initQuotedChars depends on
|
||||
initTextChars depends on
|
||||
initContentChars
|
||||
initWhitespace
|
||||
*/
|
||||
gUnicodeSets[unisets::BIDI] = initBidiControls(status);
|
||||
gUnicodeSets[unisets::ALPHA] = initAlpha(status);
|
||||
gUnicodeSets[unisets::DIGIT] = initDigits(status);
|
||||
gUnicodeSets[unisets::NAME_START] = initNameStartChars(status);
|
||||
gUnicodeSets[unisets::NAME_CHAR] = initNameChars(status);
|
||||
gUnicodeSets[unisets::TEXT] = initTextChars(status);
|
||||
gUnicodeSets[unisets::QUOTED] = initQuotedChars(status);
|
||||
gUnicodeSets[unisets::ESCAPABLE] = initEscapableChars(status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
cleanupMF2ParseUniSets();
|
||||
}
|
||||
}
|
||||
|
||||
const UnicodeSet* get(Key key) {
|
||||
UErrorCode localStatus = U_ZERO_ERROR;
|
||||
umtx_initOnce(gMF2ParseUniSetsInitOnce, &initMF2ParseUniSets, localStatus);
|
||||
if (U_FAILURE(localStatus)) {
|
||||
const UnicodeSet* get(Key key, UErrorCode& status) {
|
||||
umtx_initOnce(gMF2ParseUniSetsInitOnce, &initMF2ParseUniSets, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
return getImpl(key);
|
||||
UnicodeSet* result = getImpl(key);
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -74,7 +74,7 @@ namespace message2 {
|
|||
UNISETS_KEY_COUNT
|
||||
};
|
||||
|
||||
U_I18N_API const UnicodeSet* get(Key key);
|
||||
U_I18N_API const UnicodeSet* get(Key key, UErrorCode& status);
|
||||
}
|
||||
|
||||
// Parser class (private)
|
||||
|
@ -110,16 +110,16 @@ namespace message2 {
|
|||
StaticErrors& e,
|
||||
UnicodeString& normalizedInputRef,
|
||||
UErrorCode& status)
|
||||
: contentChars(unisets::get(unisets::CONTENT)),
|
||||
whitespaceChars(unisets::get(unisets::WHITESPACE)),
|
||||
bidiControlChars(unisets::get(unisets::BIDI)),
|
||||
alphaChars(unisets::get(unisets::ALPHA)),
|
||||
digitChars(unisets::get(unisets::DIGIT)),
|
||||
nameStartChars(unisets::get(unisets::NAME_START)),
|
||||
nameChars(unisets::get(unisets::NAME_CHAR)),
|
||||
textChars(unisets::get(unisets::TEXT)),
|
||||
quotedChars(unisets::get(unisets::QUOTED)),
|
||||
escapableChars(unisets::get(unisets::ESCAPABLE)),
|
||||
: contentChars(unisets::get(unisets::CONTENT, status)),
|
||||
whitespaceChars(unisets::get(unisets::WHITESPACE, status)),
|
||||
bidiControlChars(unisets::get(unisets::BIDI, status)),
|
||||
alphaChars(unisets::get(unisets::ALPHA, status)),
|
||||
digitChars(unisets::get(unisets::DIGIT, status)),
|
||||
nameStartChars(unisets::get(unisets::NAME_START, status)),
|
||||
nameChars(unisets::get(unisets::NAME_CHAR, status)),
|
||||
textChars(unisets::get(unisets::TEXT, status)),
|
||||
quotedChars(unisets::get(unisets::QUOTED, status)),
|
||||
escapableChars(unisets::get(unisets::ESCAPABLE, status)),
|
||||
source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
|
||||
(void) status;
|
||||
parseError.line = 0;
|
||||
|
@ -129,17 +129,6 @@ namespace message2 {
|
|||
parseError.postContext[0] = '\0';
|
||||
}
|
||||
|
||||
UnicodeSet initContentChars(UErrorCode& status);
|
||||
UnicodeSet initWhitespace(UErrorCode& status);
|
||||
UnicodeSet initBidiControls(UErrorCode& status);
|
||||
UnicodeSet initAlpha(UErrorCode& status);
|
||||
UnicodeSet initDigits(UErrorCode& status);
|
||||
UnicodeSet initNameStartChars(UErrorCode& status);
|
||||
UnicodeSet initNameChars(UErrorCode& status);
|
||||
UnicodeSet initTextChars(UErrorCode& status);
|
||||
UnicodeSet initQuotedChars(UErrorCode& status);
|
||||
UnicodeSet initEscapableChars(UErrorCode& status);
|
||||
|
||||
bool isContentChar(UChar32) const;
|
||||
bool isBidiControl(UChar32) const;
|
||||
bool isWhitespace(UChar32) const;
|
||||
|
|
Loading…
Add table
Reference in a new issue