From c612056f6a89f5df8aeaa5f3191e97ef60b1bdee Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Fri, 21 Mar 2003 00:40:25 +0000 Subject: [PATCH] ICU-2422 Regexp, general cleanup X-SVN-Rev: 11375 --- icu4c/source/i18n/rematch.cpp | 45 ++++++++++++-------- icu4c/source/i18n/repattrn.cpp | 71 ++++++++++++++----------------- icu4c/source/i18n/unicode/regex.h | 2 +- 3 files changed, 61 insertions(+), 57 deletions(-) diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp index 312ee607d3f..5dbec87d584 100644 --- a/icu4c/source/i18n/rematch.cpp +++ b/icu4c/source/i18n/rematch.cpp @@ -40,6 +40,10 @@ RegexMatcher::RegexMatcher(const RegexPattern *pat) { fDeferredStatus = U_ZERO_ERROR; fStack = new UVector32(fDeferredStatus); fData = fSmallData; + if (pat==NULL) { + fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR; + return; + } if (pat->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) { fData = (int32_t *)uprv_malloc(pat->fDataSize * sizeof(int32_t)); } @@ -61,11 +65,14 @@ RegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &inp fDeferredStatus = U_ZERO_ERROR; fStack = new UVector32(status); fData = fSmallData; + if (U_FAILURE(status)) { + return; + } if (fPattern->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) { fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t)); } if (fStack == NULL || fData == NULL) { - fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; + status = U_MEMORY_ALLOCATION_ERROR; } reset(input); } @@ -74,17 +81,21 @@ RegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &inp RegexMatcher::RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status) { UParseError pe; - fPattern = RegexPattern::compile(regexp, flags, pe, status); fPatternOwned = TRUE; fTraceDebug = FALSE; fDeferredStatus = U_ZERO_ERROR; fStack = new UVector32(status); fData = fSmallData; + fPattern = RegexPattern::compile(regexp, flags, pe, status); + if (U_FAILURE(status)) { + return; + } + if (fPattern->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) { fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t)); } if (fStack == NULL || fData == NULL) { - fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; + status = U_MEMORY_ALLOCATION_ERROR; } reset(); } @@ -287,9 +298,7 @@ UBool RegexMatcher::find() { // Start at the position of the last match end. (Will be zero if the // matcher has been reset. // - UErrorCode status = U_ZERO_ERROR; - - if (fPattern->fBadState) { + if (U_FAILURE(fDeferredStatus)) { return FALSE; } @@ -309,8 +318,8 @@ UBool RegexMatcher::find() { // No optimization was found. // Try a match at each input position. for (;;) { - MatchAt(startPos, status); - if (U_FAILURE(status)) { + MatchAt(startPos, fDeferredStatus); + if (U_FAILURE(fDeferredStatus)) { return FALSE; } if (fMatch) { @@ -332,8 +341,8 @@ UBool RegexMatcher::find() { if (startPos > 0) { return FALSE; } - MatchAt(startPos, status); - if (U_FAILURE(status)) { + MatchAt(startPos, fDeferredStatus); + if (U_FAILURE(fDeferredStatus)) { return FALSE; } return fMatch; @@ -347,8 +356,8 @@ UBool RegexMatcher::find() { int32_t pos = startPos; U16_NEXT(inputBuf, startPos, inputLen, c); // like c = inputBuf[startPos++]; if (fPattern->fInitialChars->contains(c)) { - MatchAt(pos, status); - if (U_FAILURE(status)) { + MatchAt(pos, fDeferredStatus); + if (U_FAILURE(fDeferredStatus)) { return FALSE; } if (fMatch) { @@ -372,8 +381,8 @@ UBool RegexMatcher::find() { int32_t pos = startPos; U16_NEXT(inputBuf, startPos, inputLen, c); // like c = inputBuf[startPos++]; if (c == theChar) { - MatchAt(pos, status); - if (U_FAILURE(status)) { + MatchAt(pos, fDeferredStatus); + if (U_FAILURE(fDeferredStatus)) { return FALSE; } if (fMatch) { @@ -391,8 +400,8 @@ UBool RegexMatcher::find() { { UChar32 c; if (startPos == 0) { - MatchAt(startPos, status); - if (U_FAILURE(status)) { + MatchAt(startPos, fDeferredStatus); + if (U_FAILURE(fDeferredStatus)) { return FALSE; } if (fMatch) { @@ -406,8 +415,8 @@ UBool RegexMatcher::find() { if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible (c == 0x0a || c==0x0c || c==0x85 ||c==0x2028 || c==0x2029 || c == 0x0d && startPos+1 < inputLen && inputBuf[startPos+1] != 0x0a)) { - MatchAt(startPos, status); - if (U_FAILURE(status)) { + MatchAt(startPos, fDeferredStatus); + if (U_FAILURE(fDeferredStatus)) { return FALSE; } if (fMatch) { diff --git a/icu4c/source/i18n/repattrn.cpp b/icu4c/source/i18n/repattrn.cpp index 25eddaac052..0f3d7a9a392 100644 --- a/icu4c/source/i18n/repattrn.cpp +++ b/icu4c/source/i18n/repattrn.cpp @@ -2,10 +2,10 @@ // file: repattrn.cpp // /* -********************************************************************** -* Copyright (C) 2002 International Business Machines Corporation * -* and others. All rights reserved. * -********************************************************************** +*************************************************************************** +* Copyright (C) 2002-2003 International Business Machines Corporation * +* and others. All rights reserved. * +*************************************************************************** */ #include "unicode/utypes.h" @@ -65,7 +65,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) { fPattern = other.fPattern; fFlags = other.fFlags; fLiteralText = other.fLiteralText; - fBadState = other.fBadState; + fDeferredStatus = other.fDeferredStatus; fMinMatchLen = other.fMinMatchLen; fMaxMatchLen = other.fMaxMatchLen; fMaxCaptureDigits = other.fMaxCaptureDigits; @@ -76,18 +76,14 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) { fInitialStringLen = other.fInitialStringLen; fInitialChars = new UnicodeSet(*other.fInitialChars); fInitialChar = other.fInitialChar; - if (fBadState) { - return *this; - } // Copy the pattern. It's just values, nothing deep to copy. - // TODO: something with status - UErrorCode status = U_ZERO_ERROR; - fCompiledPat->assign(*other.fCompiledPat, status); - fGroupMap->assign(*other.fGroupMap, status); + fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); + fGroupMap->assign(*other.fGroupMap, fDeferredStatus); // Note: do not copy fMatcher. It'll be created on first use if the // destination needs one. + // TODO: thread safety // Copy the Unicode Sets. // Could be made more efficient if the sets were reference counted and shared, @@ -95,16 +91,16 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) { // Note: init() already added an empty element zero to fSets int32_t i; for (i=1; isize(); i++) { + if (U_FAILURE(fDeferredStatus)) { + return *this; + } UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); UnicodeSet *newSet = new UnicodeSet(*sourceSet); if (newSet == NULL) { - fBadState = TRUE; + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; break; } - fSets->addElement(newSet, status); - } - if (U_FAILURE(status)) { - fBadState = TRUE; + fSets->addElement(newSet, fDeferredStatus); } return *this; } @@ -118,7 +114,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) { //-------------------------------------------------------------------------- void RegexPattern::init() { fFlags = 0; - fBadState = FALSE; + fDeferredStatus = U_ZERO_ERROR; fMinMatchLen = 0; fMaxMatchLen = -1; fMaxCaptureDigits = 1; @@ -132,19 +128,20 @@ void RegexPattern::init() { fInitialChars = NULL; fInitialChar = 0; - UErrorCode status=U_ZERO_ERROR; - // Init of a completely new RegexPattern. - fCompiledPat = new UVector32(status); - fGroupMap = new UVector32(status); - fSets = new UVector(status); - fInitialChars = new UnicodeSet; - if (U_FAILURE(status) || fCompiledPat == NULL || fSets == NULL || fInitialChars == NULL) { - fBadState = TRUE; + fCompiledPat = new UVector32(fDeferredStatus); + fGroupMap = new UVector32(fDeferredStatus); + fSets = new UVector(fDeferredStatus); + fInitialChars = new UnicodeSet; + if (U_FAILURE(fDeferredStatus)) { + return; + } + if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || fInitialChars == NULL) { + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; return; } // Slot zero of the vector of sets is reserved. Fill it here. - fSets->addElement((int32_t)0, status); + fSets->addElement((int32_t)0, fDeferredStatus); } @@ -205,8 +202,7 @@ RegexPattern *RegexPattern::clone() const { UBool RegexPattern::operator ==(const RegexPattern &other) const { UBool r = this->fFlags == other.fFlags && this->fPattern == other.fPattern && - this->fBadState == FALSE && - other.fBadState == FALSE; + this->fDeferredStatus == other.fDeferredStatus; return r; } @@ -243,8 +239,8 @@ RegexPattern *RegexPattern::compile( status = U_MEMORY_ALLOCATION_ERROR; return NULL; } - if (This->fBadState) { - status = U_REGEX_INVALID_STATE; + if (U_FAILURE(This->fDeferredStatus)) { + status = This->fDeferredStatus; return NULL; } This->fFlags = flags; @@ -283,20 +279,20 @@ uint32_t RegexPattern::flags() const { // //--------------------------------------------------------------------- RegexMatcher *RegexPattern::matcher(const UnicodeString &input, - UErrorCode &err) const { + UErrorCode &status) const { RegexMatcher *retMatcher = NULL; - if (U_FAILURE(err)) { + if (U_FAILURE(status)) { return NULL; } - if (fBadState) { - U_FAILURE(U_REGEX_INVALID_STATE); + if (U_FAILURE(fDeferredStatus)) { + status = fDeferredStatus; return NULL; } retMatcher = new RegexMatcher(this); if (retMatcher == NULL) { - err = U_MEMORY_ALLOCATION_ERROR; + status = U_MEMORY_ALLOCATION_ERROR; return NULL; } retMatcher->reset(input); @@ -571,7 +567,7 @@ void RegexPattern::dumpOp(int32_t index) const { - +// TODO: get rid of max match length void RegexPattern::dump() const { @@ -583,7 +579,6 @@ void RegexPattern::dump() const { REGEX_DUMP_DEBUG_PRINTF("%c", fPattern.charAt(i)); } REGEX_DUMP_DEBUG_PRINTF("\n"); - REGEX_DUMP_DEBUG_PRINTF("Pattern Valid?: %s\n" , fBadState? "no" : "yes"); REGEX_DUMP_DEBUG_PRINTF(" Min Match Length: %d\n", fMinMatchLen); REGEX_DUMP_DEBUG_PRINTF(" Max Match Length: %d\n", fMaxMatchLen); REGEX_DUMP_DEBUG_PRINTF(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h index fbc4f778ea1..802460d60b3 100644 --- a/icu4c/source/i18n/unicode/regex.h +++ b/icu4c/source/i18n/unicode/regex.h @@ -316,7 +316,7 @@ private: UVector *fSets; // Any UnicodeSets referenced from the pattern. - UBool fBadState; // True if some prior error has left this + UErrorCode fDeferredStatus; // status if some prior error has left this // RegexPattern in an unusable state. int32_t fMinMatchLen; // Minimum Match Length. All matches will have length