diff --git a/icu4c/source/common/brkeng.cpp b/icu4c/source/common/brkeng.cpp index a4c88a4db6b..dbdd7839d94 100644 --- a/icu4c/source/common/brkeng.cpp +++ b/icu4c/source/common/brkeng.cpp @@ -78,7 +78,9 @@ int32_t UnhandledEngine::findBreaks( UText *text, int32_t /* startPos */, int32_t endPos, - UVector32 &/*foundBreaks*/ ) const { + UVector32 &/*foundBreaks*/, + UErrorCode &status) const { + if (U_FAILURE(status)) return 0; UChar32 c = utext_current32(text); while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { utext_next32(text); // TODO: recast loop to work with post-increment operations. diff --git a/icu4c/source/common/brkeng.h b/icu4c/source/common/brkeng.h index 155433b89a8..f6b64c83e25 100644 --- a/icu4c/source/common/brkeng.h +++ b/icu4c/source/common/brkeng.h @@ -68,12 +68,14 @@ class LanguageBreakEngine : public UMemory { * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. * @param foundBreaks A Vector of int32_t to receive the breaks. + * @param status Information on any errors encountered. * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - UVector32 &foundBreaks ) const = 0; + UVector32 &foundBreaks, + UErrorCode &status) const = 0; }; @@ -185,12 +187,14 @@ class UnhandledEngine : public LanguageBreakEngine { * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. * @param foundBreaks An allocated C array of the breaks found, if any + * @param status Information on any errors encountered. * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - UVector32 &foundBreaks ) const; + UVector32 &foundBreaks, + UErrorCode &status) const; /** *
Tell the engine to handle a particular character and break type.
diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp index 44285755f3f..f9aae5bc9ed 100644 --- a/icu4c/source/common/dictbe.cpp +++ b/icu4c/source/common/dictbe.cpp @@ -47,7 +47,9 @@ int32_t DictionaryBreakEngine::findBreaks( UText *text, int32_t startPos, int32_t endPos, - UVector32 &foundBreaks ) const { + UVector32 &foundBreaks, + UErrorCode& status) const { + if (U_FAILURE(status)) return 0; (void)startPos; // TODO: remove this param? int32_t result = 0; @@ -66,7 +68,7 @@ DictionaryBreakEngine::findBreaks( UText *text, } rangeStart = start; rangeEnd = current; - result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); + result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, status); utext_setNativeIndex(text, current); return result; @@ -227,7 +229,9 @@ int32_t ThaiBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const { + UVector32 &foundBreaks, + UErrorCode& status) const { + if (U_FAILURE(status)) return 0; utext_setNativeIndex(text, rangeStart); utext_moveIndex32(text, THAI_MIN_WORD_SPAN); if (utext_getNativeIndex(text) >= rangeEnd) { @@ -240,7 +244,6 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text, int32_t cpWordLength = 0; // Word Length in Code Points. int32_t cuWordLength = 0; // Word length in code units (UText native indexing) int32_t current; - UErrorCode status = U_ZERO_ERROR; PossibleWord words[THAI_LOOKAHEAD]; utext_setNativeIndex(text, rangeStart); @@ -465,7 +468,9 @@ int32_t LaoBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const { + UVector32 &foundBreaks, + UErrorCode& status) const { + if (U_FAILURE(status)) return 0; if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) { return 0; // Not enough characters for two words } @@ -474,11 +479,10 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text, int32_t cpWordLength = 0; int32_t cuWordLength = 0; int32_t current; - UErrorCode status = U_ZERO_ERROR; PossibleWord words[LAO_LOOKAHEAD]; - + utext_setNativeIndex(text, rangeStart); - + while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { cuWordLength = 0; cpWordLength = 0; @@ -657,7 +661,9 @@ int32_t BurmeseBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const { + UVector32 &foundBreaks, + UErrorCode& status ) const { + if (U_FAILURE(status)) return 0; if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) { return 0; // Not enough characters for two words } @@ -666,11 +672,10 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text, int32_t cpWordLength = 0; int32_t cuWordLength = 0; int32_t current; - UErrorCode status = U_ZERO_ERROR; PossibleWord words[BURMESE_LOOKAHEAD]; - + utext_setNativeIndex(text, rangeStart); - + while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { cuWordLength = 0; cpWordLength = 0; @@ -861,7 +866,9 @@ int32_t KhmerBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const { + UVector32 &foundBreaks, + UErrorCode& status ) const { + if (U_FAILURE(status)) return 0; if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) { return 0; // Not enough characters for two words } @@ -870,7 +877,6 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text, int32_t cpWordLength = 0; int32_t cuWordLength = 0; int32_t current; - UErrorCode status = U_ZERO_ERROR; PossibleWord words[KHMER_LOOKAHEAD]; utext_setNativeIndex(text, rangeStart); @@ -1110,7 +1116,9 @@ int32_t CjkBreakEngine::divideUpDictionaryRange( UText *inText, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const { + UVector32 &foundBreaks, + UErrorCode& status) const { + if (U_FAILURE(status)) return 0; if (rangeStart >= rangeEnd) { return 0; } @@ -1122,9 +1130,6 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, // If NULL then mapping is 1:1 LocalPointerFind any breaks within a run in the supplied text.
* * @param text A UText representing the text. The iterator is left at - * the end of the run of characters which the engine is capable of handling + * the end of the run of characters which the engine is capable of handling * that starts from the first character in the range. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. * @param foundBreaks vector of int32_t to receive the break positions + * @param status Information on any errors encountered. * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - UVector32 &foundBreaks ) const; + UVector32 &foundBreaks, + UErrorCode& status ) const; protected: @@ -96,12 +98,14 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * @param rangeStart The start of the range of dictionary characters * @param rangeEnd The end of the range of dictionary characters * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @param status Information on any errors encountered. * @return The number of breaks found */ virtual int32_t divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const = 0; + UVector32 &foundBreaks, + UErrorCode& status) const = 0; }; @@ -153,12 +157,14 @@ class ThaiBreakEngine : public DictionaryBreakEngine { * @param rangeStart The start of the range of dictionary characters * @param rangeEnd The end of the range of dictionary characters * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @param status Information on any errors encountered. * @return The number of breaks found */ virtual int32_t divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const; + UVector32 &foundBreaks, + UErrorCode& status) const; }; @@ -209,127 +215,133 @@ class LaoBreakEngine : public DictionaryBreakEngine { * @param rangeStart The start of the range of dictionary characters * @param rangeEnd The end of the range of dictionary characters * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @param status Information on any errors encountered. * @return The number of breaks found */ virtual int32_t divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const; + UVector32 &foundBreaks, + UErrorCode& status) const; + +}; + +/******************************************************************* + * BurmeseBreakEngine + */ + +/** + *BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a + * DictionaryMatcher and heuristics to determine Burmese-specific breaks.
+ * + *After it is constructed a BurmeseBreakEngine may be shared between + * threads without synchronization.
+ */ +class BurmeseBreakEngine : public DictionaryBreakEngine { + private: + /** + * The set of characters handled by this engine + * @internal + */ + + UnicodeSet fBurmeseWordSet; + UnicodeSet fEndWordSet; + UnicodeSet fBeginWordSet; + UnicodeSet fMarkSet; + DictionaryMatcher *fDictionary; + + public: + + /** + *Default constructor.
+ * + * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the + * engine is deleted. + */ + BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); + + /** + *Virtual destructor.
+ */ + virtual ~BurmeseBreakEngine(); + + protected: + /** + *Divide up a range of known dictionary characters.
+ * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @param status Information on any errors encountered. + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks, + UErrorCode& status) const; + +}; + +/******************************************************************* + * KhmerBreakEngine + */ + +/** + *KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a + * DictionaryMatcher and heuristics to determine Khmer-specific breaks.
+ * + *After it is constructed a KhmerBreakEngine may be shared between + * threads without synchronization.
+ */ +class KhmerBreakEngine : public DictionaryBreakEngine { + private: + /** + * The set of characters handled by this engine + * @internal + */ + + UnicodeSet fKhmerWordSet; + UnicodeSet fEndWordSet; + UnicodeSet fBeginWordSet; + UnicodeSet fMarkSet; + DictionaryMatcher *fDictionary; + + public: + + /** + *Default constructor.
+ * + * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the + * engine is deleted. + */ + KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); + + /** + *Virtual destructor.
+ */ + virtual ~KhmerBreakEngine(); + + protected: + /** + *Divide up a range of known dictionary characters.
+ * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @param status Information on any errors encountered. + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks, + UErrorCode& status) const; }; -/******************************************************************* - * BurmeseBreakEngine - */ - -/** - *BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a - * DictionaryMatcher and heuristics to determine Burmese-specific breaks.
- * - *After it is constructed a BurmeseBreakEngine may be shared between - * threads without synchronization.
- */ -class BurmeseBreakEngine : public DictionaryBreakEngine { - private: - /** - * The set of characters handled by this engine - * @internal - */ - - UnicodeSet fBurmeseWordSet; - UnicodeSet fEndWordSet; - UnicodeSet fBeginWordSet; - UnicodeSet fMarkSet; - DictionaryMatcher *fDictionary; - - public: - - /** - *Default constructor.
- * - * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the - * engine is deleted. - */ - BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); - - /** - *Virtual destructor.
- */ - virtual ~BurmeseBreakEngine(); - - protected: - /** - *Divide up a range of known dictionary characters.
- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const; - -}; - -/******************************************************************* - * KhmerBreakEngine - */ - -/** - *KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a - * DictionaryMatcher and heuristics to determine Khmer-specific breaks.
- * - *After it is constructed a KhmerBreakEngine may be shared between - * threads without synchronization.
- */ -class KhmerBreakEngine : public DictionaryBreakEngine { - private: - /** - * The set of characters handled by this engine - * @internal - */ - - UnicodeSet fKhmerWordSet; - UnicodeSet fEndWordSet; - UnicodeSet fBeginWordSet; - UnicodeSet fMarkSet; - DictionaryMatcher *fDictionary; - - public: - - /** - *Default constructor.
- * - * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the - * engine is deleted. - */ - KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); - - /** - *Virtual destructor.
- */ - virtual ~KhmerBreakEngine(); - - protected: - /** - *Divide up a range of known dictionary characters.
- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const; - -}; - #if !UCONFIG_NO_NORMALIZATION /******************************************************************* @@ -385,12 +397,14 @@ class CjkBreakEngine : public DictionaryBreakEngine { * @param rangeStart The start of the range of dictionary characters * @param rangeEnd The end of the range of dictionary characters * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @param status Information on any errors encountered. * @return The number of breaks found */ virtual int32_t divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, - UVector32 &foundBreaks ) const; + UVector32 &foundBreaks, + UErrorCode& status) const; }; diff --git a/icu4c/source/common/lstmbe.cpp b/icu4c/source/common/lstmbe.cpp index be7eee624e7..a9123e9d494 100644 --- a/icu4c/source/common/lstmbe.cpp +++ b/icu4c/source/common/lstmbe.cpp @@ -162,10 +162,16 @@ ConstArray2D::~ConstArray2D() class Array1D : public ReadArray1D { public: Array1D() : memory_(nullptr), data_(nullptr), d1_(0) {} - Array1D(int32_t d1) + Array1D(int32_t d1, UErrorCode &status) : memory_(uprv_malloc(d1 * sizeof(float))), data_((float*)memory_), d1_(d1) { - clear(); + if (U_SUCCESS(status)) { + if (memory_ == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + clear(); + } } virtual ~Array1D(); @@ -278,10 +284,16 @@ Array1D::~Array1D() class Array2D : public ReadArray2D { public: Array2D() : memory_(nullptr), data_(nullptr), d1_(0), d2_(0) {} - Array2D(int32_t d1, int32_t d2) + Array2D(int32_t d1, int32_t d2, UErrorCode &status) : memory_(uprv_malloc(d1 * d2 * sizeof(float))), data_((float*)memory_), d1_(d1), d2_(d2) { - clear(); + if (U_SUCCESS(status)) { + if (memory_ == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + clear(); + } } virtual ~Array2D(); @@ -366,8 +378,10 @@ LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status) int32_t embedding_size = ures_getInt(embeddings_res.getAlias(), &status); LocalUResourceBundlePointer hunits_res( ures_getByKey(rb, "hunits", nullptr, &status)); + if (U_FAILURE(status)) return; int32_t hunits = ures_getInt(hunits_res.getAlias(), &status); const UChar* type = ures_getStringByKey(rb, "type", nullptr, &status); + if (U_FAILURE(status)) return; if (u_strCompare(type, -1, u"codepoints", -1, false) == 0) { fType = CODE_POINTS; } else if (u_strCompare(type, -1, u"graphclust", -1, false) == 0) { @@ -375,15 +389,15 @@ LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status) } fName = ures_getStringByKey(rb, "model", nullptr, &status); fDataRes = ures_getByKey(rb, "data", nullptr, &status); + if (U_FAILURE(status)) return; int32_t data_len = 0; const int32_t* data = ures_getIntVector(fDataRes, &data_len, &status); + if (U_FAILURE(status)) return; LocalUResourceBundlePointer fDictRes( ures_getByKey(rb, "dict", nullptr, &status)); int32_t num_index = ures_getSize(fDictRes.getAlias()); fDict = uhash_open(uhash_hashUChars, uhash_compareUChars, nullptr, &status); - if (U_FAILURE(status)) { - return; - } + if (U_FAILURE(status)) return; ures_resetIterator(fDictRes.getAlias()); int32_t idx = 0; @@ -391,10 +405,9 @@ LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status) while(ures_hasNext(fDictRes.getAlias())) { const char *tempKey = nullptr; const UChar* str = ures_getNextString(fDictRes.getAlias(), nullptr, &tempKey, &status); + if (U_FAILURE(status)) return; uhash_putiAllowZero(fDict, (void*)str, idx++, &status); - if (U_FAILURE(status)) { - return; - } + if (U_FAILURE(status)) return; #ifdef LSTM_VECTORIZER_DEBUG printf("Assign ["); while (*str != 0x0000) { @@ -495,6 +508,7 @@ void CodePointsVectorizer::vectorize( { if (offsets.ensureCapacity(endPos - startPos, status) && indices.ensureCapacity(endPos - startPos, status)) { + if (U_FAILURE(status)) return; utext_setNativeIndex(text, startPos); int32_t current; UChar str[2] = {0, 0}; @@ -533,21 +547,16 @@ void GraphemeClusterVectorizer::vectorize( UText *text, int32_t startPos, int32_t endPos, UVector32 &offsets, UVector32 &indices, UErrorCode &status) const { - if (U_FAILURE(status)) { - return; - } + if (U_FAILURE(status)) return; if (!offsets.ensureCapacity(endPos - startPos, status) || !indices.ensureCapacity(endPos - startPos, status)) { return; } + if (U_FAILURE(status)) return; LocalPointer