From 6fd29e25c765bebfa5d0c35392d90a6dea8903d0 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Fri, 8 Feb 2008 09:10:22 +0000 Subject: [PATCH] ICU-6132 Recover from allocation errors more gracefully. X-SVN-Rev: 23399 --- icu4c/source/common/rbbiscan.cpp | 66 ++++++++++++++------------- icu4c/source/i18n/csdetect.cpp | 5 +- icu4c/source/i18n/inputext.cpp | 10 ++-- icu4c/source/i18n/inputext.h | 6 ++- icu4c/source/test/cintltst/ucsdetst.c | 5 ++ 5 files changed, 52 insertions(+), 40 deletions(-) diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index ad5781c8579..62760fc3e40 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -37,7 +37,7 @@ #include "uassert.h" -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // Unicode Set init strings for each of the character classes needed for parsing a rule file. // (Initialized with hex values for portability to EBCDIC based machines. @@ -46,7 +46,7 @@ // The sets are referred to by name in the rbbirpt.txt, which is the // source form of the state transition table for the RBBI rule parser. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ static const UChar gRuleSet_rule_char_pattern[] = { // [ ^ [ \ p { Z } \ u 0 0 2 0 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30, @@ -82,11 +82,11 @@ U_CDECL_END U_NAMESPACE_BEGIN -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // Constructor. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) { fRB = rb; @@ -174,11 +174,11 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // Destructor // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ RBBIRuleScanner::~RBBIRuleScanner() { delete fRuleSets[kRuleSet_rule_char-128]; delete fRuleSets[kRuleSet_white_space-128]; @@ -204,7 +204,7 @@ RBBIRuleScanner::~RBBIRuleScanner() { } -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // doParseAction Do some action during rule parsing. // Called by the parse state machine. @@ -217,7 +217,7 @@ RBBIRuleScanner::~RBBIRuleScanner() { // in some compilers, while at the same time avoiding multiple // definitions problems. I'm sure that there's a better way. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ UBool RBBIRuleScanner::doParseActions(int32_t action) { RBBINode *n = NULL; @@ -592,26 +592,28 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // Error Report a rule parse error. // Only report it if no previous error has been recorded. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ void RBBIRuleScanner::error(UErrorCode e) { if (U_SUCCESS(*fRB->fStatus)) { *fRB->fStatus = e; - fRB->fParseError->line = fLineNum; - fRB->fParseError->offset = fCharNum; - fRB->fParseError->preContext[0] = 0; - fRB->fParseError->preContext[0] = 0; + if (fRB->fParseError) { + fRB->fParseError->line = fLineNum; + fRB->fParseError->offset = fCharNum; + fRB->fParseError->preContext[0] = 0; + fRB->fParseError->preContext[0] = 0; + } } } -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // fixOpStack The parse stack holds partially assembled chunks of the parse tree. // An entry on the stack may be as small as a single setRef node, @@ -625,7 +627,7 @@ void RBBIRuleScanner::error(UErrorCode e) { // the precedence of the current operator, binds the operand left, // to the previously encountered operator. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) { RBBINode *n; // printNodeStack("entering fixOpStack()"); @@ -672,7 +674,7 @@ void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) { -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // findSetFor given a UnicodeString, // - find the corresponding Unicode Set (uset node) @@ -687,7 +689,7 @@ void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) { // just one element which is the char in question. // If the string is "any", return a set containing all chars. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) { RBBISetTableEl *el; @@ -779,12 +781,12 @@ static const UChar chLParen = 0x28; static const UChar chRParen = 0x29; -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // stripRules Return a rules string without unnecessary // characters. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { UnicodeString strippedRules; int rulesLength = rules.length(); @@ -806,13 +808,13 @@ UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { } -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // nextCharLL Low Level Next Char from rule input source. // Get a char from the input character iterator, // keep track of input position for error reporting. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ UChar32 RBBIRuleScanner::nextCharLL() { UChar32 ch; @@ -847,13 +849,13 @@ UChar32 RBBIRuleScanner::nextCharLL() { } -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // nextChar for rules scanning. At this level, we handle stripping // out comments and processing backslash character escapes. // The rest of the rules grammar is handled at the next level up. // -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { // Unicode Character constants needed for the processing done by nextChar(), @@ -931,14 +933,14 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { // putc(c.fChar, stdout); } -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // Parse RBBI rules. The state machine for rules parsing is here. // The state tables are hand-written in the file rbbirpt.txt, // and converted to the form used here by a perl // script rbbicst.pl // -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ void RBBIRuleScanner::parse() { uint16_t state; const RBBIRuleTableEl *tableEl; @@ -1108,11 +1110,11 @@ void RBBIRuleScanner::parse() { } -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // printNodeStack for debugging... // -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #ifdef RBBI_DEBUG void RBBIRuleScanner::printNodeStack(const char *title) { int i; @@ -1124,12 +1126,12 @@ void RBBIRuleScanner::printNodeStack(const char *title) { -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // pushNewNode create a new RBBINode of the specified type and push it // onto the stack of nodes. // -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { fNodeStackPtr++; if (fNodeStackPtr >= kStackSize) { @@ -1147,7 +1149,7 @@ RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // scanSet Construct a UnicodeSet from the text at the current scan // position. Advance the scan position to the first character @@ -1160,7 +1162,7 @@ RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { // that controls rule parsing. UnicodeSets, however, are parsed by // the UnicodeSet constructor, not by the RBBI rule parser. // -//--------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ void RBBIRuleScanner::scanSet() { UnicodeSet *uset; ParsePosition pos; diff --git a/icu4c/source/i18n/csdetect.cpp b/icu4c/source/i18n/csdetect.cpp index 08412477a0b..96076d43a5b 100644 --- a/icu4c/source/i18n/csdetect.cpp +++ b/icu4c/source/i18n/csdetect.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2007, International Business Machines + * Copyright (C) 2005-2008, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -164,7 +164,8 @@ void CharsetDetector::setRecognizers(UErrorCode &status) } CharsetDetector::CharsetDetector(UErrorCode &status) - : textIn(new InputText()), resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE) + : textIn(new InputText(status)), resultArray(NULL), + resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE) { if (U_FAILURE(status)) { return; diff --git a/icu4c/source/i18n/inputext.cpp b/icu4c/source/i18n/inputext.cpp index a36a931ab55..7df3df11fd6 100644 --- a/icu4c/source/i18n/inputext.cpp +++ b/icu4c/source/i18n/inputext.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2006, International Business Machines + * Copyright (C) 2005-2008, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -25,7 +25,7 @@ U_NAMESPACE_BEGIN #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type)) #define DELETE_ARRAY(array) uprv_free((void *) (array)) -InputText::InputText() +InputText::InputText(UErrorCode &status) : fInputBytes(NEW_ARRAY(uint8_t, BUFFER_SIZE)), // The text to be checked. Markup will have been // removed if appropriate. fByteStats(NEW_ARRAY(int16_t, 256)), // byte frequency statistics for the input text. @@ -33,8 +33,10 @@ InputText::InputText() fDeclaredEncoding(0), fRawInput(0), fRawLength(0) -{ - +{ + if (fInputBytes == NULL || fByteStats == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } } InputText::~InputText() diff --git a/icu4c/source/i18n/inputext.h b/icu4c/source/i18n/inputext.h index 1528f48e63f..0c5973d8eb2 100644 --- a/icu4c/source/i18n/inputext.h +++ b/icu4c/source/i18n/inputext.h @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2006, International Business Machines + * Copyright (C) 2005-2008, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -25,8 +25,10 @@ U_NAMESPACE_BEGIN class InputText : public UMemory { + // Prevent copying + InputText(const InputText &); public: - InputText(); + InputText(UErrorCode &status); ~InputText(); void setText(const char *in, int32_t len); diff --git a/icu4c/source/test/cintltst/ucsdetst.c b/icu4c/source/test/cintltst/ucsdetst.c index 83161961b1d..d5f82de9713 100644 --- a/icu4c/source/test/cintltst/ucsdetst.c +++ b/icu4c/source/test/cintltst/ucsdetst.c @@ -125,6 +125,11 @@ static void TestUTF8(void) bytes = extractBytes(s, sLength, "UTF-8", &byteLength); ucsdet_setText(csd, bytes, byteLength, &status); + if (U_FAILURE(status)) { + log_err("status is %s\n", u_errorName(status)); + goto bail; + } + match = ucsdet_detect(csd, &status); if (match == NULL) {