ICU-6132 Recover from allocation errors more gracefully.

X-SVN-Rev: 23399
2025-04-07 06:25:30 +00:00 · 2008-02-08 09:10:22 +00:00 · 2008-02-08 09:10:22 +00:00 · 6fd29e25c7
commit 6fd29e25c7
parent 3daa4c96d4
5 changed files with 52 additions and 40 deletions
--- a/icu4c/source/common/rbbiscan.cpp
+++ b/icu4c/source/common/rbbiscan.cpp
@ -37,7 +37,7 @@
 #include "uassert.h"


-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 // Unicode Set init strings for each of the character classes needed for parsing a rule file.
 //               (Initialized with hex values for portability to EBCDIC based machines.
@ -46,7 +46,7 @@
 //              The sets are referred to by name in the rbbirpt.txt, which is the
 //              source form of the state transition table for the RBBI rule parser.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 static const UChar gRuleSet_rule_char_pattern[]       = {
 //   [    ^      [    \     p     {      Z     }     \     u    0      0    2      0
    0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
@ -82,11 +82,11 @@ U_CDECL_END

 U_NAMESPACE_BEGIN

-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  Constructor.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
 {
    fRB                 = rb;
@ -174,11 +174,11 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)



-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  Destructor
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 RBBIRuleScanner::~RBBIRuleScanner() {
    delete fRuleSets[kRuleSet_rule_char-128];
    delete fRuleSets[kRuleSet_white_space-128];
@ -204,7 +204,7 @@ RBBIRuleScanner::~RBBIRuleScanner() {

 }

-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  doParseAction        Do some action during rule parsing.
 //                       Called by the parse state machine.
@ -217,7 +217,7 @@ RBBIRuleScanner::~RBBIRuleScanner() {
 //                              in some compilers, while at the same time avoiding multiple
 //                              definitions problems.  I'm sure that there's a better way.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 UBool RBBIRuleScanner::doParseActions(int32_t action)
 {
    RBBINode *n       = NULL;
@ -592,26 +592,28 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)



-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  Error         Report a rule parse error.
 //                Only report it if no previous error has been recorded.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 void RBBIRuleScanner::error(UErrorCode e) {
    if (U_SUCCESS(*fRB->fStatus)) {
        *fRB->fStatus = e;
-        fRB->fParseError->line  = fLineNum;
-        fRB->fParseError->offset = fCharNum;
-        fRB->fParseError->preContext[0] = 0;
-        fRB->fParseError->preContext[0] = 0;
+        if (fRB->fParseError) {
+            fRB->fParseError->line  = fLineNum;
+            fRB->fParseError->offset = fCharNum;
+            fRB->fParseError->preContext[0] = 0;
+            fRB->fParseError->preContext[0] = 0;
+        }
    }
 }




-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  fixOpStack   The parse stack holds partially assembled chunks of the parse tree.
 //               An entry on the stack may be as small as a single setRef node,
@ -625,7 +627,7 @@ void RBBIRuleScanner::error(UErrorCode e) {
 //               the precedence of the current operator, binds the operand left,
 //               to the previously encountered operator.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) {
    RBBINode *n;
    // printNodeStack("entering fixOpStack()");
@ -672,7 +674,7 @@ void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) {



-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //   findSetFor    given a UnicodeString,
 //                  - find the corresponding Unicode Set  (uset node)
@ -687,7 +689,7 @@ void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) {
 //                    just one element which is the char in question.
 //                 If the string is "any", return a set containing all chars.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) {

    RBBISetTableEl   *el;
@ -779,12 +781,12 @@ static const UChar      chLParen    = 0x28;
 static const UChar      chRParen    = 0x29;


-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  stripRules    Return a rules string without unnecessary
 //                characters.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
    UnicodeString strippedRules;
    int rulesLength = rules.length();
@ -806,13 +808,13 @@ UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
 }


-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  nextCharLL    Low Level Next Char from rule input source.
 //                Get a char from the input character iterator,
 //                keep track of input position for error reporting.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 UChar32  RBBIRuleScanner::nextCharLL() {
    UChar32  ch;

@ -847,13 +849,13 @@ UChar32  RBBIRuleScanner::nextCharLL() {
 }


-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //   nextChar     for rules scanning.  At this level, we handle stripping
 //                out comments and processing backslash character escapes.
 //                The rest of the rules grammar is handled at the next level up.
 //
-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {

    // Unicode Character constants needed for the processing done by nextChar(),
@ -931,14 +933,14 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
    // putc(c.fChar, stdout);
 }

-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  Parse RBBI rules.   The state machine for rules parsing is here.
 //                      The state tables are hand-written in the file rbbirpt.txt,
 //                      and converted to the form used here by a perl
 //                      script rbbicst.pl
 //
-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 void RBBIRuleScanner::parse() {
    uint16_t                state;
    const RBBIRuleTableEl  *tableEl;
@ -1108,11 +1110,11 @@ void RBBIRuleScanner::parse() {
 }


-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  printNodeStack     for debugging...
 //
-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 #ifdef RBBI_DEBUG
 void RBBIRuleScanner::printNodeStack(const char *title) {
    int i;
@ -1124,12 +1126,12 @@ void RBBIRuleScanner::printNodeStack(const char *title) {



-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  pushNewNode   create a new RBBINode of the specified type and push it
 //                onto the stack of nodes.
 //
-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 RBBINode  *RBBIRuleScanner::pushNewNode(RBBINode::NodeType  t) {
    fNodeStackPtr++;
    if (fNodeStackPtr >= kStackSize) {
@ -1147,7 +1149,7 @@ RBBINode  *RBBIRuleScanner::pushNewNode(RBBINode::NodeType  t) {



-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //  scanSet    Construct a UnicodeSet from the text at the current scan
 //             position.  Advance the scan position to the first character
@ -1160,7 +1162,7 @@ RBBINode  *RBBIRuleScanner::pushNewNode(RBBINode::NodeType  t) {
 //             that controls rule parsing.  UnicodeSets, however, are parsed by
 //             the UnicodeSet constructor, not by the RBBI rule parser.
 //
-//---------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 void RBBIRuleScanner::scanSet() {
    UnicodeSet    *uset;
    ParsePosition  pos;
--- a/icu4c/source/i18n/csdetect.cpp
+++ b/icu4c/source/i18n/csdetect.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2007, International Business Machines
+ *   Copyright (C) 2005-2008, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -164,7 +164,8 @@ void CharsetDetector::setRecognizers(UErrorCode &status)
 }

 CharsetDetector::CharsetDetector(UErrorCode &status)
-  : textIn(new InputText()), resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE)
+  : textIn(new InputText(status)), resultArray(NULL),
+    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE)
 {
    if (U_FAILURE(status)) {
        return;
--- a/icu4c/source/i18n/inputext.cpp
+++ b/icu4c/source/i18n/inputext.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2006, International Business Machines
+ *   Copyright (C) 2005-2008, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -25,7 +25,7 @@ U_NAMESPACE_BEGIN
 #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
 #define DELETE_ARRAY(array) uprv_free((void *) (array))

-InputText::InputText()
+InputText::InputText(UErrorCode &status)
    : fInputBytes(NEW_ARRAY(uint8_t, BUFFER_SIZE)), // The text to be checked.  Markup will have been
                                                 //   removed if appropriate.
      fByteStats(NEW_ARRAY(int16_t, 256)),       // byte frequency statistics for the input text.
@ -33,8 +33,10 @@ InputText::InputText()
      fDeclaredEncoding(0),
      fRawInput(0),
      fRawLength(0)
-{  
-
+{
+    if (fInputBytes == NULL || fByteStats == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
 }

 InputText::~InputText()
--- a/icu4c/source/i18n/inputext.h
+++ b/icu4c/source/i18n/inputext.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2006, International Business Machines
+ *   Copyright (C) 2005-2008, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -25,8 +25,10 @@ U_NAMESPACE_BEGIN

 class InputText : public UMemory
 {
+    // Prevent copying
+    InputText(const InputText &);
 public:
-    InputText();
+    InputText(UErrorCode &status);
    ~InputText();

    void setText(const char *in, int32_t len);
--- a/icu4c/source/test/cintltst/ucsdetst.c
+++ b/icu4c/source/test/cintltst/ucsdetst.c
@ -125,6 +125,11 @@ static void TestUTF8(void)
    bytes = extractBytes(s, sLength, "UTF-8", &byteLength);

    ucsdet_setText(csd, bytes, byteLength, &status);
+    if (U_FAILURE(status)) {
+        log_err("status is %s\n", u_errorName(status));
+        goto bail;
+    }
+
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {