mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-6149 Merge regexp resource limit functions into the trunk
X-SVN-Rev: 23467
This commit is contained in:
parent
56c21905a4
commit
5e550c04ac
11 changed files with 1048 additions and 157 deletions
|
@ -729,6 +729,9 @@ typedef enum UErrorCode {
|
|||
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. */
|
||||
U_REGEX_MISSING_CLOSE_BRACKET, /**< Missing closing bracket on a bracket expression. */
|
||||
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
|
||||
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
|
||||
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
|
||||
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
|
||||
U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2007, International Business Machines
|
||||
* Copyright (C) 1997-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -156,7 +156,10 @@ _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
|
|||
"U_REGEX_SET_CONTAINS_STRING",
|
||||
"U_REGEX_OCTAL_TOO_BIG",
|
||||
"U_REGEX_MISSING_CLOSE_BRACKET",
|
||||
"U_REGEX_INVALID_RANGE"
|
||||
"U_REGEX_INVALID_RANGE",
|
||||
"U_REGEX_STACK_OVERFLOW",
|
||||
"U_REGEX_TIME_OUT",
|
||||
"U_REGEX_STOPPED_BY_CALLER"
|
||||
};
|
||||
|
||||
static const char * const
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1999-2003, International Business Machines Corporation and *
|
||||
* Copyright (C) 1999-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -26,6 +26,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector32)
|
|||
UVector32::UVector32(UErrorCode &status) :
|
||||
count(0),
|
||||
capacity(0),
|
||||
maxCapacity(0),
|
||||
elements(NULL)
|
||||
{
|
||||
_init(DEFUALT_CAPACITY, status);
|
||||
|
@ -34,6 +35,7 @@ UVector32::UVector32(UErrorCode &status) :
|
|||
UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) :
|
||||
count(0),
|
||||
capacity(0),
|
||||
maxCapacity(0),
|
||||
elements(0)
|
||||
{
|
||||
_init(initialCapacity, status);
|
||||
|
@ -46,6 +48,9 @@ void UVector32::_init(int32_t initialCapacity, UErrorCode &status) {
|
|||
if (initialCapacity < 1) {
|
||||
initialCapacity = DEFUALT_CAPACITY;
|
||||
}
|
||||
if (maxCapacity>0 && maxCapacity<initialCapacity) {
|
||||
initialCapacity = maxCapacity;
|
||||
}
|
||||
elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity);
|
||||
if (elements == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -187,23 +192,57 @@ int32_t UVector32::indexOf(int32_t key, int32_t startIndex) const {
|
|||
|
||||
|
||||
UBool UVector32::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
|
||||
if (capacity < minimumCapacity) {
|
||||
int32_t newCap = capacity * 2;
|
||||
if (newCap < minimumCapacity) {
|
||||
newCap = minimumCapacity;
|
||||
}
|
||||
int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*newCap);
|
||||
if (newElems == NULL) {
|
||||
// We keep the original contents on the memory failure on realloc.
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
elements = newElems;
|
||||
capacity = newCap;
|
||||
if (capacity >= minimumCapacity) {
|
||||
return TRUE;
|
||||
}
|
||||
if (maxCapacity>0 && minimumCapacity>maxCapacity) {
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
int32_t newCap = capacity * 2;
|
||||
if (newCap < minimumCapacity) {
|
||||
newCap = minimumCapacity;
|
||||
}
|
||||
if (maxCapacity > 0 && newCap > maxCapacity) {
|
||||
newCap = maxCapacity;
|
||||
}
|
||||
int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*newCap);
|
||||
if (newElems == NULL) {
|
||||
// We keep the original contents on the memory failure on realloc.
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
elements = newElems;
|
||||
capacity = newCap;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void UVector32::setMaxCapacity(int32_t limit) {
|
||||
U_ASSERT(limit >= 0);
|
||||
maxCapacity = limit;
|
||||
if (maxCapacity < 0) {
|
||||
maxCapacity = 0;
|
||||
}
|
||||
if (capacity <= maxCapacity || maxCapacity == 0) {
|
||||
// Current capacity is within the new limit.
|
||||
return;
|
||||
}
|
||||
|
||||
// New maximum capacity is smaller than the current size.
|
||||
// Realloc the storage to the new, smaller size.
|
||||
int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*maxCapacity);
|
||||
if (newElems == NULL) {
|
||||
// Realloc to smaller failed.
|
||||
// Just keep what we had. No need to call it a failure.
|
||||
return;
|
||||
}
|
||||
elements = newElems;
|
||||
capacity = maxCapacity;
|
||||
if (count > capacity) {
|
||||
count = capacity;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the size of this vector as follows: If newSize is smaller,
|
||||
* then truncate the array, possibly deleting held elements for i >=
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Copyright (C) 1999-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
@ -61,6 +61,8 @@ private:
|
|||
int32_t count;
|
||||
|
||||
int32_t capacity;
|
||||
|
||||
int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
|
||||
|
||||
int32_t* elements;
|
||||
|
||||
|
@ -161,6 +163,14 @@ public:
|
|||
*/
|
||||
int32_t *getBuffer() const;
|
||||
|
||||
/**
|
||||
* Set the maximum allowed buffer capacity for this vector/stack.
|
||||
* Default with no limit set is unlimited, go until malloc() fails.
|
||||
* A Limit of zero means unlimited capacity.
|
||||
* Units are vector elements (32 bits each), not bytes.
|
||||
*/
|
||||
void setMaxCapacity(int32_t limit);
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*/
|
||||
|
@ -221,7 +231,9 @@ inline void UVector32::addElement(int32_t elem, UErrorCode &status) {
|
|||
}
|
||||
|
||||
inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) {
|
||||
ensureCapacity(count+size, status);
|
||||
if (ensureCapacity(count+size, status) == FALSE) {
|
||||
return NULL;
|
||||
}
|
||||
int32_t *rp = elements+count;
|
||||
count += size;
|
||||
return rp;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 2002-2007 International Business Machines Corporation *
|
||||
* Copyright (C) 2002-2008 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
**************************************************************************
|
||||
*/
|
||||
|
@ -30,90 +30,69 @@
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// Default limit for the size of the back track stack, to avoid system
|
||||
// failures causedby heap exhaustion. Units are in 32 bit words, not bytes.
|
||||
// This value puts ICU's limits higher than most other regexp implementations,
|
||||
// which use recursion rather than the heap, and take more storage per
|
||||
// backtrack point.
|
||||
//
|
||||
static const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000;
|
||||
|
||||
// Time limit counter constant.
|
||||
// Time limits for expression evaluation are in terms of quanta of work by
|
||||
// the engine, each of which is 10,000 state saves.
|
||||
// This constant determines that state saves per tick number.
|
||||
static const int32_t TIMER_INITIAL_VALUE = 10000;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Constructor and Destructor
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
RegexMatcher::RegexMatcher(const RegexPattern *pat) {
|
||||
fPattern = pat;
|
||||
fPatternOwned = NULL;
|
||||
fInput = NULL;
|
||||
fTraceDebug = FALSE;
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
fStack = new UVector32(fDeferredStatus);
|
||||
fData = fSmallData;
|
||||
fWordBreakItr = NULL;
|
||||
fTransparentBounds = FALSE;
|
||||
fAnchoringBounds = TRUE;
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
init(fDeferredStatus);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return;
|
||||
}
|
||||
if (pat==NULL) {
|
||||
fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (pat->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
|
||||
fData = (int32_t *)uprv_malloc(pat->fDataSize * sizeof(int32_t));
|
||||
}
|
||||
if (fStack == NULL || fData == NULL) {
|
||||
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
reset(RegexStaticSets::gStaticSets->fEmptyString);
|
||||
fPattern = pat;
|
||||
init2(RegexStaticSets::gStaticSets->fEmptyString, fDeferredStatus);
|
||||
}
|
||||
|
||||
|
||||
|
||||
RegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
|
||||
uint32_t flags, UErrorCode &status) {
|
||||
UParseError pe;
|
||||
fPatternOwned = RegexPattern::compile(regexp, flags, pe, status);
|
||||
fPattern = fPatternOwned;
|
||||
fTraceDebug = FALSE;
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
fStack = new UVector32(status);
|
||||
fData = fSmallData;
|
||||
fWordBreakItr = NULL;
|
||||
fTransparentBounds = FALSE;
|
||||
fAnchoringBounds = TRUE;
|
||||
init(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
|
||||
fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t));
|
||||
}
|
||||
if (fStack == NULL || fData == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
reset(input);
|
||||
UParseError pe;
|
||||
fPatternOwned = RegexPattern::compile(regexp, flags, pe, status);
|
||||
fPattern = fPatternOwned;
|
||||
init2(input, status);
|
||||
}
|
||||
|
||||
|
||||
RegexMatcher::RegexMatcher(const UnicodeString ®exp,
|
||||
uint32_t flags, UErrorCode &status) {
|
||||
UParseError pe;
|
||||
fTraceDebug = FALSE;
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
fStack = new UVector32(status);
|
||||
fData = fSmallData;
|
||||
fPatternOwned = RegexPattern::compile(regexp, flags, pe, status);
|
||||
fPattern = fPatternOwned;
|
||||
fWordBreakItr = NULL;
|
||||
fTransparentBounds = FALSE;
|
||||
fAnchoringBounds = TRUE;
|
||||
init(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
|
||||
fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t));
|
||||
}
|
||||
if (fStack == NULL || fData == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
reset(RegexStaticSets::gStaticSets->fEmptyString);
|
||||
UParseError pe;
|
||||
fPatternOwned = RegexPattern::compile(regexp, flags, pe, status);
|
||||
fPattern = fPatternOwned;
|
||||
init2(RegexStaticSets::gStaticSets->fEmptyString, status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
RegexMatcher::~RegexMatcher() {
|
||||
delete fStack;
|
||||
if (fData != fSmallData) {
|
||||
|
@ -130,6 +109,79 @@ RegexMatcher::~RegexMatcher() {
|
|||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
// init() common initialization for use by all constructors.
|
||||
// Initialize all fields, get the object into a consistent state.
|
||||
// This must be done even when the initial status shows an error,
|
||||
// so that the object is initialized sufficiently well for the destructor
|
||||
// to run safely.
|
||||
//
|
||||
void RegexMatcher::init(UErrorCode &status) {
|
||||
fPattern = NULL;
|
||||
fPatternOwned = NULL;
|
||||
fInput = NULL;
|
||||
fFrameSize = 0;
|
||||
fRegionStart = 0;
|
||||
fRegionLimit = 0;
|
||||
fAnchorStart = 0;
|
||||
fAnchorLimit = 0;
|
||||
fLookStart = 0;
|
||||
fLookLimit = 0;
|
||||
fActiveStart = 0;
|
||||
fActiveLimit = 0;
|
||||
fTransparentBounds = FALSE;
|
||||
fAnchoringBounds = TRUE;
|
||||
fMatch = FALSE;
|
||||
fMatchStart = 0;
|
||||
fMatchEnd = 0;
|
||||
fLastMatchEnd = -1;
|
||||
fAppendPosition = 0;
|
||||
fHitEnd = FALSE;
|
||||
fRequireEnd = FALSE;
|
||||
fStack = NULL;
|
||||
fFrame = NULL;
|
||||
fTimeLimit = 0;
|
||||
fTime = 0;
|
||||
fTickCounter = 0;
|
||||
fStackLimit = DEFAULT_BACKTRACK_STACK_CAPACITY;
|
||||
fCallbackFn = NULL;
|
||||
fCallbackContext = NULL;
|
||||
fTraceDebug = FALSE;
|
||||
fDeferredStatus = status;
|
||||
fData = fSmallData;
|
||||
fWordBreakItr = NULL;
|
||||
|
||||
fStack = new UVector32(status);
|
||||
if (U_FAILURE(status)) {
|
||||
fDeferredStatus = status;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// init2() Common initialization for use by RegexMatcher constructors, part 2.
|
||||
// This handles the common setup to be done after the Pattern is available.
|
||||
//
|
||||
void RegexMatcher::init2(const UnicodeString &input, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
fDeferredStatus = status;
|
||||
return;
|
||||
}
|
||||
|
||||
if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
|
||||
fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t));
|
||||
if (fData == NULL) {
|
||||
status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
reset(input);
|
||||
setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status);
|
||||
if (U_FAILURE(status)) {
|
||||
fDeferredStatus = status;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const UChar BACKSLASH = 0x5c;
|
||||
|
@ -856,6 +908,8 @@ void RegexMatcher::resetPreserveRegion() {
|
|||
fMatch = FALSE;
|
||||
fHitEnd = FALSE;
|
||||
fRequireEnd = FALSE;
|
||||
fTime = 0;
|
||||
fTickCounter = TIMER_INITIAL_VALUE;
|
||||
resetStack();
|
||||
}
|
||||
|
||||
|
@ -1067,6 +1121,118 @@ RegexMatcher &RegexMatcher::useTransparentBounds(UBool b) {
|
|||
return *this;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// setTimeLimit
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
void RegexMatcher::setTimeLimit(int32_t limit, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
status = fDeferredStatus;
|
||||
return;
|
||||
}
|
||||
if (limit < 0) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
fTimeLimit = limit;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// getTimeLimit
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
int32_t RegexMatcher::getTimeLimit() const {
|
||||
return fTimeLimit;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// setStackLimit
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
void RegexMatcher::setStackLimit(int32_t limit, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
status = fDeferredStatus;
|
||||
return;
|
||||
}
|
||||
if (limit < 0) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Reset the matcher. This is needed here in case there is a current match
|
||||
// whose final stack frame (containing the match results, pointed to by fFrame)
|
||||
// would be lost by resizing to a smaller stack size.
|
||||
reset();
|
||||
|
||||
if (limit == 0) {
|
||||
// Unlimited stack expansion
|
||||
fStack->setMaxCapacity(0);
|
||||
} else {
|
||||
// Change the units of the limit from bytes to ints, and bump the size up
|
||||
// to be big enough to hold at least one stack frame for the pattern,
|
||||
// if it isn't there already.
|
||||
int32_t adjustedLimit = limit / sizeof(int32_t);
|
||||
if (adjustedLimit < fPattern->fFrameSize) {
|
||||
adjustedLimit = fPattern->fFrameSize;
|
||||
}
|
||||
fStack->setMaxCapacity(adjustedLimit);
|
||||
}
|
||||
fStackLimit = limit;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// getStackLimit
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
int32_t RegexMatcher::getStackLimit() const {
|
||||
return fStackLimit;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// setMatchCallback
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
void RegexMatcher::setMatchCallback(URegexMatchCallback callback,
|
||||
const void *context,
|
||||
UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
fCallbackFn = callback;
|
||||
fCallbackContext = context;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// getMatchCallback
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
void RegexMatcher::getMatchCallback(URegexMatchCallback &callback,
|
||||
const void *&context,
|
||||
UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
callback = fCallbackFn;
|
||||
context = fCallbackContext;
|
||||
}
|
||||
|
||||
|
||||
//================================================================================
|
||||
//
|
||||
|
@ -1187,6 +1353,31 @@ UBool RegexMatcher::isUWordBoundary(int32_t pos) {
|
|||
return returnVal;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// IncrementTime This function is called once each TIMER_INITIAL_VALUE state
|
||||
// saves. Increment the "time" counter, and call the
|
||||
// user callback function if there is one installed.
|
||||
//
|
||||
// If the match operation needs to be aborted, either for a time-out
|
||||
// or because the user callback asked for it, just set an error status.
|
||||
// The engine will pick that up and stop in its outer loop.
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
void RegexMatcher::IncrementTime(UErrorCode &status) {
|
||||
fTickCounter = TIMER_INITIAL_VALUE;
|
||||
fTime++;
|
||||
if (fCallbackFn != NULL) {
|
||||
if ((*fCallbackFn)(fCallbackContext, fTime) == FALSE) {
|
||||
status = U_REGEX_STOPPED_BY_CALLER;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (fTimeLimit > 0 && fTime >= fTimeLimit) {
|
||||
status = U_REGEX_TIME_OUT;
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// StateSave
|
||||
|
@ -1196,13 +1387,33 @@ UBool RegexMatcher::isUWordBoundary(int32_t pos) {
|
|||
// the newly created stack frame
|
||||
//
|
||||
// Note that reserveBlock() may grow the stack, resulting in the
|
||||
// whole thing being relocated in memory.
|
||||
// whole thing being relocated in memory.
|
||||
//
|
||||
// Parameters:
|
||||
// fp The top frame pointer when called. At return, a new
|
||||
// fame will be present
|
||||
// savePatIdx An index into the compiled pattern. Goes into the original
|
||||
// (not new) frame. If execution ever back-tracks out of the
|
||||
// new frame, this will be where we continue from in the pattern.
|
||||
// Return
|
||||
// The new frame pointer.
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) {
|
||||
inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status) {
|
||||
// push storage for a new frame.
|
||||
int32_t *newFP = fStack->reserveBlock(frameSize, status);
|
||||
fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack.
|
||||
int32_t *newFP = fStack->reserveBlock(fFrameSize, status);
|
||||
if (newFP == NULL) {
|
||||
// Failure on attempted stack expansion.
|
||||
// Stack function set some other error code, change it to a more
|
||||
// specific one for regular expressions.
|
||||
status = U_REGEX_STACK_OVERFLOW;
|
||||
// We need to return a writable stack frame, so just return the
|
||||
// previous frame. The match operation will stop quickly
|
||||
// becuase of the error status, after which the frame will never
|
||||
// be looked at again.
|
||||
return fp;
|
||||
}
|
||||
fp = (REStackFrame *)(newFP - fFrameSize); // in case of realloc of stack.
|
||||
|
||||
// New stack frame = copy of old top frame.
|
||||
int32_t *source = (int32_t *)fp;
|
||||
|
@ -1214,6 +1425,10 @@ inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatId
|
|||
}
|
||||
}
|
||||
|
||||
fTickCounter--;
|
||||
if (fTickCounter <= 0) {
|
||||
IncrementTime(status); // Re-initializes fTickCounter
|
||||
}
|
||||
fp->fPatIdx = savePatIdx;
|
||||
return (REStackFrame *)newFP;
|
||||
}
|
||||
|
@ -1262,7 +1477,6 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
}
|
||||
|
||||
// Cache frequently referenced items from the compiled pattern
|
||||
// in local variables.
|
||||
//
|
||||
int32_t *pat = fPattern->fCompiledPat->getBuffer();
|
||||
|
||||
|
@ -1271,8 +1485,8 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
|
||||
const UChar *inputBuf = fInput->getBuffer();
|
||||
|
||||
fFrameSize = fPattern->fFrameSize;
|
||||
REStackFrame *fp = resetStack();
|
||||
int32_t frameSize = fPattern->fFrameSize;
|
||||
|
||||
fp->fPatIdx = 0;
|
||||
fp->fInputIdx = startIdx;
|
||||
|
@ -1316,7 +1530,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
// Force a backtrack. In some circumstances, the pattern compiler
|
||||
// will notice that the pattern can't possibly match anything, and will
|
||||
// emit one of these at that point.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
|
||||
|
||||
|
@ -1330,7 +1544,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
} else {
|
||||
fHitEnd = TRUE;
|
||||
}
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
|
||||
|
||||
|
@ -1352,7 +1566,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
if (fp->fInputIdx + stringLen > fActiveLimit) {
|
||||
// No match. String is longer than the remaining input text.
|
||||
fHitEnd = TRUE; // TODO: See ticket 6074
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1370,7 +1584,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
}
|
||||
} else {
|
||||
// Match failed.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1380,7 +1594,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
|
||||
|
||||
case URX_STATE_SAVE:
|
||||
fp = StateSave(fp, opValue, frameSize, status);
|
||||
fp = StateSave(fp, opValue, status);
|
||||
break;
|
||||
|
||||
|
||||
|
@ -1389,7 +1603,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
// when we reach the end of the pattern.
|
||||
if (toEnd && fp->fInputIdx != fActiveLimit) {
|
||||
// The pattern matched, but not to the end of input. Try some more.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
isMatch = TRUE;
|
||||
|
@ -1401,26 +1615,26 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
// opValue+2 - the start of a capture group whose end
|
||||
// has not yet been reached (and might not ever be).
|
||||
case URX_START_CAPTURE:
|
||||
U_ASSERT(opValue >= 0 && opValue < frameSize-3);
|
||||
U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
|
||||
fp->fExtra[opValue+2] = fp->fInputIdx;
|
||||
break;
|
||||
|
||||
|
||||
case URX_END_CAPTURE:
|
||||
U_ASSERT(opValue >= 0 && opValue < frameSize-3);
|
||||
U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
|
||||
U_ASSERT(fp->fExtra[opValue+2] >= 0); // Start pos for this group must be set.
|
||||
fp->fExtra[opValue] = fp->fExtra[opValue+2]; // Tentative start becomes real.
|
||||
fp->fExtra[opValue+1] = fp->fInputIdx; // End position
|
||||
U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
|
||||
break;
|
||||
|
||||
|
||||
|
||||
case URX_DOLLAR: // $, test for End of line
|
||||
// or for position before new line at end of input
|
||||
if (fp->fInputIdx < fAnchorLimit-2) {
|
||||
// We are no where near the end of input. Fail.
|
||||
// This is the common case. Keep it first.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
if (fp->fInputIdx >= fAnchorLimit) {
|
||||
|
@ -1451,7 +1665,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
break; // At CR/LF at end of input. Success
|
||||
}
|
||||
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
|
||||
break;
|
||||
|
||||
|
@ -1475,7 +1689,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
}
|
||||
|
||||
// Not at end of input. Back-track out.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
|
||||
|
||||
|
@ -1499,7 +1713,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
}
|
||||
}
|
||||
// not at a new line. Fail.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1515,7 +1729,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
// If we are not positioned just before a new-line, the test fails; backtrack out.
|
||||
// It makes no difference where the new-line is within the input.
|
||||
if (inputBuf[fp->fInputIdx] != 0x0a) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1523,7 +1737,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
|
||||
case URX_CARET: // ^, test for start of line
|
||||
if (fp->fInputIdx != fAnchorStart) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1544,7 +1758,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
break;
|
||||
}
|
||||
// Not at the start of a line. Fail.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1561,7 +1775,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
UChar c = inputBuf[fp->fInputIdx - 1];
|
||||
if (c != 0x0a) {
|
||||
// Not at the start of a line. Back-track out.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1571,7 +1785,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
UBool success = isWordBoundary(fp->fInputIdx);
|
||||
success ^= (opValue != 0); // flip sense for \B
|
||||
if (!success) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1582,7 +1796,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
UBool success = isUWordBoundary(fp->fInputIdx);
|
||||
success ^= (opValue != 0); // flip sense for \B
|
||||
if (!success) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1592,7 +1806,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
{
|
||||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1603,7 +1817,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
if (success) {
|
||||
fp->fInputIdx = fInput->moveIndex32(fp->fInputIdx, 1);
|
||||
} else {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1611,7 +1825,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
|
||||
case URX_BACKSLASH_G: // Test for position at end of previous match
|
||||
if (!((fMatch && fp->fInputIdx==fMatchEnd) || fMatch==FALSE && fp->fInputIdx==fActiveStart)) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1625,7 +1839,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
|
|||
// Fail if at end of input
|
||||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1703,7 +1917,7 @@ GC_Done:
|
|||
|
||||
case URX_BACKSLASH_Z: // Test for end of Input
|
||||
if (fp->fInputIdx < fAnchorLimit) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
} else {
|
||||
fHitEnd = TRUE;
|
||||
fRequireEnd = TRUE;
|
||||
|
@ -1721,7 +1935,7 @@ GC_Done:
|
|||
// 1: success if input char is not in set.
|
||||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1742,7 +1956,7 @@ GC_Done:
|
|||
}
|
||||
}
|
||||
if (!success) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1754,7 +1968,7 @@ GC_Done:
|
|||
// the predefined sets (Word Characters, for example)
|
||||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1773,7 +1987,7 @@ GC_Done:
|
|||
}
|
||||
}
|
||||
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1781,7 +1995,7 @@ GC_Done:
|
|||
case URX_SETREF:
|
||||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
// There is input left. Pick up one char and test it for set membership.
|
||||
|
@ -1801,7 +2015,7 @@ GC_Done:
|
|||
}
|
||||
}
|
||||
// the character wasn't in the set. Back track out.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
|
||||
|
||||
|
@ -1811,7 +2025,7 @@ GC_Done:
|
|||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
// At end of input. Match failed. Backtrack out.
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
// There is input left. Advance over one char, unless we've hit end-of-line
|
||||
|
@ -1820,7 +2034,7 @@ GC_Done:
|
|||
if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible
|
||||
((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
|
||||
// End of line in normal mode. . does not match.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1833,7 +2047,7 @@ GC_Done:
|
|||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
// At end of input. Match failed. Backtrack out.
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
// There is input left. Advance over one char, except if we are
|
||||
|
@ -1858,7 +2072,7 @@ GC_Done:
|
|||
if (fp->fInputIdx >= fActiveLimit) {
|
||||
// At end of input. Match failed. Backtrack out.
|
||||
fHitEnd = TRUE;
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
// There is input left. Advance over one char, unless we've hit end-of-line
|
||||
|
@ -1866,7 +2080,7 @@ GC_Done:
|
|||
U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
|
||||
if (c == 0x0a) {
|
||||
// End of line in normal mode. '.' does not match the \n
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1882,8 +2096,8 @@ GC_Done:
|
|||
|
||||
case URX_JMP_SAV:
|
||||
U_ASSERT(opValue < fPattern->fCompiledPat->size());
|
||||
fp = StateSave(fp, fp->fPatIdx, frameSize, status); // State save to loc following current
|
||||
fp->fPatIdx = opValue; // Then JMP.
|
||||
fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current
|
||||
fp->fPatIdx = opValue; // Then JMP.
|
||||
break;
|
||||
|
||||
case URX_JMP_SAV_X:
|
||||
|
@ -1896,12 +2110,12 @@ GC_Done:
|
|||
int32_t stoOp = pat[opValue-1];
|
||||
U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC);
|
||||
int32_t frameLoc = URX_VAL(stoOp);
|
||||
U_ASSERT(frameLoc >= 0 && frameLoc < frameSize);
|
||||
U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize);
|
||||
int32_t prevInputIdx = fp->fExtra[frameLoc];
|
||||
U_ASSERT(prevInputIdx <= fp->fInputIdx);
|
||||
if (prevInputIdx < fp->fInputIdx) {
|
||||
// The match did make progress. Repeat the loop.
|
||||
fp = StateSave(fp, fp->fPatIdx, frameSize, status); // State save to loc following current
|
||||
fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current
|
||||
fp->fPatIdx = opValue;
|
||||
fp->fExtra[frameLoc] = fp->fInputIdx;
|
||||
}
|
||||
|
@ -1912,7 +2126,7 @@ GC_Done:
|
|||
|
||||
case URX_CTR_INIT:
|
||||
{
|
||||
U_ASSERT(opValue >= 0 && opValue < frameSize-2);
|
||||
U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
|
||||
fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
|
||||
|
||||
// Pick up the three extra operands that CTR_INIT has, and
|
||||
|
@ -1927,10 +2141,10 @@ GC_Done:
|
|||
U_ASSERT(loopLoc>fp->fPatIdx);
|
||||
|
||||
if (minCount == 0) {
|
||||
fp = StateSave(fp, loopLoc+1, frameSize, status);
|
||||
fp = StateSave(fp, loopLoc+1, status);
|
||||
}
|
||||
if (maxCount == 0) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1953,7 +2167,7 @@ GC_Done:
|
|||
break;
|
||||
}
|
||||
if (*pCounter >= minCount) {
|
||||
fp = StateSave(fp, fp->fPatIdx, frameSize, status);
|
||||
fp = StateSave(fp, fp->fPatIdx, status);
|
||||
}
|
||||
fp->fPatIdx = opValue + 4; // Loop back.
|
||||
}
|
||||
|
@ -1962,7 +2176,7 @@ GC_Done:
|
|||
case URX_CTR_INIT_NG:
|
||||
{
|
||||
// Initialize a non-greedy loop
|
||||
U_ASSERT(opValue >= 0 && opValue < frameSize-2);
|
||||
U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
|
||||
fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
|
||||
|
||||
// Pick up the three extra operands that CTR_INIT has, and
|
||||
|
@ -1978,7 +2192,7 @@ GC_Done:
|
|||
|
||||
if (minCount == 0) {
|
||||
if (maxCount != 0) {
|
||||
fp = StateSave(fp, fp->fPatIdx, frameSize, status);
|
||||
fp = StateSave(fp, fp->fPatIdx, status);
|
||||
}
|
||||
fp->fPatIdx = loopLoc+1; // Continue with stuff after repeated block
|
||||
}
|
||||
|
@ -2017,7 +2231,7 @@ GC_Done:
|
|||
// Fall into the following pattern, but first do
|
||||
// a state save to the top of the loop, so that a failure
|
||||
// in the following pattern will try another iteration of the loop.
|
||||
fp = StateSave(fp, opValue + 4, frameSize, status);
|
||||
fp = StateSave(fp, opValue + 4, status);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -2032,12 +2246,12 @@ GC_Done:
|
|||
U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
|
||||
int32_t newStackSize = fData[opValue];
|
||||
U_ASSERT(newStackSize <= fStack->size());
|
||||
int32_t *newFP = fStack->getBuffer() + newStackSize - frameSize;
|
||||
int32_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
|
||||
if (newFP == (int32_t *)fp) {
|
||||
break;
|
||||
}
|
||||
int32_t i;
|
||||
for (i=0; i<frameSize; i++) {
|
||||
for (i=0; i<fFrameSize; i++) {
|
||||
newFP[i] = ((int32_t *)fp)[i];
|
||||
}
|
||||
fp = (REStackFrame *)newFP;
|
||||
|
@ -2048,14 +2262,14 @@ GC_Done:
|
|||
case URX_BACKREF:
|
||||
case URX_BACKREF_I:
|
||||
{
|
||||
U_ASSERT(opValue < frameSize);
|
||||
U_ASSERT(opValue < fFrameSize);
|
||||
int32_t groupStartIdx = fp->fExtra[opValue];
|
||||
int32_t groupEndIdx = fp->fExtra[opValue+1];
|
||||
U_ASSERT(groupStartIdx <= groupEndIdx);
|
||||
int32_t len = groupEndIdx-groupStartIdx;
|
||||
if (groupStartIdx < 0) {
|
||||
// This capture group has not participated in the match thus far,
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize); // FAIL, no match.
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match.
|
||||
}
|
||||
|
||||
if (len == 0) {
|
||||
|
@ -2085,14 +2299,14 @@ GC_Done:
|
|||
if (haveMatch) {
|
||||
fp->fInputIdx += len; // Match. Advance current input position.
|
||||
} else {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize); // FAIL, no match.
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match.
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case URX_STO_INP_LOC:
|
||||
{
|
||||
U_ASSERT(opValue >= 0 && opValue < frameSize);
|
||||
U_ASSERT(opValue >= 0 && opValue < fFrameSize);
|
||||
fp->fExtra[opValue] = fp->fInputIdx;
|
||||
}
|
||||
break;
|
||||
|
@ -2102,13 +2316,13 @@ GC_Done:
|
|||
int32_t instrOperandLoc = fp->fPatIdx;
|
||||
fp->fPatIdx += 1;
|
||||
int32_t dataLoc = URX_VAL(pat[instrOperandLoc]);
|
||||
U_ASSERT(dataLoc >= 0 && dataLoc < frameSize);
|
||||
U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize);
|
||||
int32_t savedInputIdx = fp->fExtra[dataLoc];
|
||||
U_ASSERT(savedInputIdx <= fp->fInputIdx);
|
||||
if (savedInputIdx < fp->fInputIdx) {
|
||||
fp->fPatIdx = opValue; // JMP
|
||||
} else {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize); // FAIL, no progress in loop.
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no progress in loop.
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -2137,9 +2351,9 @@ GC_Done:
|
|||
// Copy the current top frame back to the new (cut back) top frame.
|
||||
// This makes the capture groups from within the look-ahead
|
||||
// expression available.
|
||||
int32_t *newFP = fStack->getBuffer() + newStackSize - frameSize;
|
||||
int32_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
|
||||
int32_t i;
|
||||
for (i=0; i<frameSize; i++) {
|
||||
for (i=0; i<fFrameSize; i++) {
|
||||
newFP[i] = ((int32_t *)fp)[i];
|
||||
}
|
||||
fp = (REStackFrame *)newFP;
|
||||
|
@ -2164,7 +2378,7 @@ GC_Done:
|
|||
} else {
|
||||
fHitEnd = TRUE;
|
||||
}
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
|
||||
case URX_STRING_I:
|
||||
|
@ -2195,7 +2409,7 @@ GC_Done:
|
|||
fHitEnd = TRUE; // See ticket 6074
|
||||
}
|
||||
// No match. Back up matching to a saved state
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -2249,7 +2463,7 @@ GC_Done:
|
|||
// We have tried all potential match starting points without
|
||||
// getting a match. Backtrack out, and out of the
|
||||
// Look Behind altogether.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
int32_t restoreInputLen = fData[opValue+3];
|
||||
U_ASSERT(restoreInputLen >= fActiveLimit);
|
||||
U_ASSERT(restoreInputLen <= fInput->length());
|
||||
|
@ -2259,7 +2473,7 @@ GC_Done:
|
|||
|
||||
// Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
|
||||
// (successful match will fall off the end of the loop.)
|
||||
fp = StateSave(fp, fp->fPatIdx-3, frameSize, status);
|
||||
fp = StateSave(fp, fp->fPatIdx-3, status);
|
||||
fp->fInputIdx = *lbStartIdx;
|
||||
}
|
||||
break;
|
||||
|
@ -2274,7 +2488,7 @@ GC_Done:
|
|||
// FAIL out of here, which will take us back to the LB_CONT, which
|
||||
// will retry the match starting at another position or fail
|
||||
// the look-behind altogether, whichever is appropriate.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2333,7 +2547,7 @@ GC_Done:
|
|||
|
||||
// Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
|
||||
// (successful match will cause a FAIL out of the loop altogether.)
|
||||
fp = StateSave(fp, fp->fPatIdx-4, frameSize, status);
|
||||
fp = StateSave(fp, fp->fPatIdx-4, status);
|
||||
fp->fInputIdx = *lbStartIdx;
|
||||
}
|
||||
break;
|
||||
|
@ -2348,7 +2562,7 @@ GC_Done:
|
|||
// FAIL out of here, which will take us back to the LB_CONT, which
|
||||
// will retry the match starting at another position or succeed
|
||||
// the look-behind altogether, whichever is appropriate.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2372,7 +2586,7 @@ GC_Done:
|
|||
|
||||
// FAIL, which will take control back to someplace
|
||||
// prior to entering the look-behind test.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -2423,14 +2637,14 @@ GC_Done:
|
|||
int32_t loopcOp = pat[fp->fPatIdx];
|
||||
U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
|
||||
int32_t stackLoc = URX_VAL(loopcOp);
|
||||
U_ASSERT(stackLoc >= 0 && stackLoc < frameSize);
|
||||
U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
|
||||
fp->fExtra[stackLoc] = fp->fInputIdx;
|
||||
fp->fInputIdx = ix;
|
||||
|
||||
// Save State to the URX_LOOP_C op that follows this one,
|
||||
// so that match failures in the following code will return to there.
|
||||
// Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
|
||||
fp = StateSave(fp, fp->fPatIdx, frameSize, status);
|
||||
fp = StateSave(fp, fp->fPatIdx, status);
|
||||
fp->fPatIdx++;
|
||||
}
|
||||
break;
|
||||
|
@ -2486,14 +2700,14 @@ GC_Done:
|
|||
int32_t loopcOp = pat[fp->fPatIdx];
|
||||
U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
|
||||
int32_t stackLoc = URX_VAL(loopcOp);
|
||||
U_ASSERT(stackLoc >= 0 && stackLoc < frameSize);
|
||||
U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
|
||||
fp->fExtra[stackLoc] = fp->fInputIdx;
|
||||
fp->fInputIdx = ix;
|
||||
|
||||
// Save State to the URX_LOOP_C op that follows this one,
|
||||
// so that match failures in the following code will return to there.
|
||||
// Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
|
||||
fp = StateSave(fp, fp->fPatIdx, frameSize, status);
|
||||
fp = StateSave(fp, fp->fPatIdx, status);
|
||||
fp->fPatIdx++;
|
||||
}
|
||||
break;
|
||||
|
@ -2501,7 +2715,7 @@ GC_Done:
|
|||
|
||||
case URX_LOOP_C:
|
||||
{
|
||||
U_ASSERT(opValue>=0 && opValue<frameSize);
|
||||
U_ASSERT(opValue>=0 && opValue<fFrameSize);
|
||||
int32_t terminalIdx = fp->fExtra[opValue];
|
||||
U_ASSERT(terminalIdx <= fp->fInputIdx);
|
||||
if (terminalIdx == fp->fInputIdx) {
|
||||
|
@ -2528,7 +2742,7 @@ GC_Done:
|
|||
}
|
||||
|
||||
|
||||
fp = StateSave(fp, fp->fPatIdx-1, frameSize, status);
|
||||
fp = StateSave(fp, fp->fPatIdx-1, status);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -2541,6 +2755,7 @@ GC_Done:
|
|||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
isMatch = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1003,6 +1003,102 @@ public:
|
|||
int32_t destCapacity,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Set a processing time limit for match operations with this Matcher.
|
||||
*
|
||||
* Some patterns, when matching certain strings, can run in exponential time.
|
||||
* For practical purposes, the match operation may appear to be in an
|
||||
* infinite loop.
|
||||
* When a limit is set a match operation will fail with an error if the
|
||||
* limit is exceeded.
|
||||
* <p>
|
||||
* The units of the limit are steps of the match engine.
|
||||
* Correspondence with actual processor time will depend on the speed
|
||||
* of the processor and the details of the specific pattern, but will
|
||||
* typically be on the order of milliseconds.
|
||||
* <p>
|
||||
* By default, the matching time is not limited.
|
||||
* <p>
|
||||
*
|
||||
* @param limit The limit value, or 0 for no limit.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
virtual void setTimeLimit(int32_t limit, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Get the time limit, if any, for match operations made with this Matcher.
|
||||
*
|
||||
* @return the maximum allowed time for a match, in units of processing steps.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
virtual int32_t getTimeLimit() const;
|
||||
|
||||
/**
|
||||
* Set the amount of heap storage avaliable for use by the match backtracking stack.
|
||||
* The matcher is also reset, discarding any results from previous matches.
|
||||
* <p>
|
||||
* ICU uses a backtracking regular expression engine, with the backtrack stack
|
||||
* maintained on the heap. This function sets the limit to the amount of memory
|
||||
* that can be used for this purpose. A backtracking stack overflow will
|
||||
* result in an error from the match operation that caused it.
|
||||
* <p>
|
||||
* A limit is desirable because a malicious or poorly designed pattern can use
|
||||
* excessive memory, potentially crashing the process. A limit is enabled
|
||||
* by default.
|
||||
* <p>
|
||||
* @param limit The maximum size, in bytes, of the matching backtrack stack.
|
||||
* A value of zero means no limit.
|
||||
* The limit must be greater or equal to zero.
|
||||
*
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
*
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
virtual void setStackLimit(int32_t limit, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Get the size of the heap storage available for use by the back tracking stack.
|
||||
*
|
||||
* @return the maximum backtracking stack size, in bytes, or zero if the
|
||||
* stack size is unlimited.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
virtual int32_t getStackLimit() const;
|
||||
|
||||
|
||||
/**
|
||||
* Set a callback function for use with this Matcher.
|
||||
* During matching operations the function will be called periodically,
|
||||
* giving the application the opportunity to terminate a long-running
|
||||
* match.
|
||||
*
|
||||
* @param callback A pointer to the user-supplied callback function.
|
||||
* @param context User context pointer. The value supplied at the
|
||||
* time the callback function is set will be saved
|
||||
* and passed to the callback each time that it is called.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
virtual void setMatchCallback(URegexMatchCallback callback,
|
||||
const void *context,
|
||||
UErrorCode &status);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Get the callback function for this URegularExpression.
|
||||
*
|
||||
* @param callback Out paramater, receives a pointer to the user-supplied
|
||||
* callback function.
|
||||
* @param context Out parameter, receives the user context pointer that
|
||||
* was set when uregex_setMatchCallback() was called.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
virtual void getMatchCallback(URegexMatchCallback &callback,
|
||||
const void *&context,
|
||||
UErrorCode &status);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -1030,10 +1126,13 @@ public:
|
|||
private:
|
||||
// Constructors and other object boilerplate are private.
|
||||
// Instances of RegexMatcher can not be assigned, copied, cloned, etc.
|
||||
RegexMatcher(); // default constructor not implemented
|
||||
RegexMatcher(); // default constructor not implemented
|
||||
RegexMatcher(const RegexPattern *pat);
|
||||
RegexMatcher(const RegexMatcher &other);
|
||||
RegexMatcher &operator =(const RegexMatcher &rhs);
|
||||
void init(UErrorCode &status); // Common initialization
|
||||
void init2(const UnicodeString &s, UErrorCode &e); // Common initialization, part 2.
|
||||
|
||||
friend class RegexPattern;
|
||||
friend class RegexCImpl;
|
||||
public:
|
||||
|
@ -1050,8 +1149,8 @@ private:
|
|||
UBool isWordBoundary(int32_t pos); // perform Perl-like \b test
|
||||
UBool isUWordBoundary(int32_t pos); // perform RBBI based \b test
|
||||
REStackFrame *resetStack();
|
||||
inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx,
|
||||
int32_t frameSize, UErrorCode &status);
|
||||
inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status);
|
||||
void IncrementTime(UErrorCode &status);
|
||||
|
||||
|
||||
const RegexPattern *fPattern;
|
||||
|
@ -1059,6 +1158,7 @@ private:
|
|||
// should delete it when through.
|
||||
|
||||
const UnicodeString *fInput; // The text being matched. Is never NULL.
|
||||
int32_t fFrameSize; // The size of a frame in the backtrack stack.
|
||||
|
||||
int32_t fRegionStart; // Start of the input region, default = 0.
|
||||
int32_t fRegionLimit; // End of input region, default to input.length.
|
||||
|
@ -1101,9 +1201,25 @@ private:
|
|||
int32_t *fData; // Data area for use by the compiled pattern.
|
||||
int32_t fSmallData[8]; // Use this for data if it's enough.
|
||||
|
||||
int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
|
||||
// match engine run. Zero for unlimited.
|
||||
|
||||
int32_t fTime; // Match time, accumulates while matching.
|
||||
int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves.
|
||||
// Kept separately from fTime to keep as much
|
||||
// code as possible out of the inline
|
||||
// StateSave function.
|
||||
|
||||
int32_t fStackLimit; // Maximum memory size to use for the backtrack
|
||||
// stack, in bytes. Zero for unlimited.
|
||||
|
||||
URegexMatchCallback fCallbackFn; // Pointer to match progress callback funct.
|
||||
// NULL if there is no callback.
|
||||
const void *fCallbackContext; // User Context ptr for callback function.
|
||||
|
||||
UBool fTraceDebug; // Set true for debug tracing of match engine.
|
||||
|
||||
UErrorCode fDeferredStatus; // Save error state if that cannot be immediately
|
||||
UErrorCode fDeferredStatus; // Save error state that cannot be immediately
|
||||
// reported, or that permanently disables this matcher.
|
||||
|
||||
RuleBasedBreakIterator *fWordBreakItr;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2004-2007, International Business Machines
|
||||
* Copyright (C) 2004-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: regex.h
|
||||
|
@ -843,5 +843,145 @@ uregex_split( URegularExpression *regexp,
|
|||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Set a processing time limit for match operations with this URegularExpression.
|
||||
*
|
||||
* Some patterns, when matching certain strings, can run in exponential time.
|
||||
* For practical purposes, the match operation may appear to be in an
|
||||
* infinite loop.
|
||||
* When a limit is set a match operation will fail with an error if the
|
||||
* limit is exceeded.
|
||||
* <p>
|
||||
* The units of the limit are steps of the match engine.
|
||||
* Correspondence with actual processor time will depend on the speed
|
||||
* of the processor and the details of the specific pattern, but will
|
||||
* typically be on the order of milliseconds.
|
||||
* <p>
|
||||
* By default, the matching time is not limited.
|
||||
* <p>
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param limit The limit value, or 0 for no limit.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_setTimeLimit(URegularExpression *regexp,
|
||||
int32_t limit,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Get the time limit for for matches with this URegularExpression.
|
||||
* A return value of zero indicates that there is no limit.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return the maximum allowed time for a match, in units of processing steps.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
uregex_getTimeLimit(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Set the amount of heap storage avaliable for use by the match backtracking stack.
|
||||
* <p>
|
||||
* ICU uses a backtracking regular expression engine, with the backtrack stack
|
||||
* maintained on the heap. This function sets the limit to the amount of memory
|
||||
* that can be used for this purpose. A backtracking stack overflow will
|
||||
* result in an error from the match operation that caused it.
|
||||
* <p>
|
||||
* A limit is desirable because a malicious or poorly designed pattern can use
|
||||
* excessive memory, potentially crashing the process. A limit is enabled
|
||||
* by default.
|
||||
* <p>
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param limit The maximum size, in bytes, of the matching backtrack stack.
|
||||
* A value of -1 means no limit.
|
||||
* The limit must be greater than zero, or -1.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
*
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_setStackLimit(URegularExpression *regexp,
|
||||
int32_t limit,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Get the size of the heap storage available for use by the back tracking stack.
|
||||
*
|
||||
* @return the maximum backtracking stack size, in bytes, or zero if the
|
||||
* stack size is unlimited.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
uregex_getStackLimit(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Function pointer for a regular expression matching callback function.
|
||||
* When set, a callback function will be called periodically during matching
|
||||
* operations. If the call back function returns FALSE, the matching
|
||||
* operation will be terminated early.
|
||||
*
|
||||
* Note: the callback function must not call other functions on this
|
||||
* URegularExpression.
|
||||
*
|
||||
* @param context context pointer. The callback function will be invoked
|
||||
* with the context specified at the time that
|
||||
* uregex_setMatchCallback() is called.
|
||||
* @param steps the accumulated processing time, in match steps,
|
||||
* for this matching operation.
|
||||
* @return TRUE to continue the matching operation.
|
||||
* FALSE to terminate the matching operation.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
typedef UBool (U_EXPORT2 *URegexMatchCallback) (
|
||||
const void *context,
|
||||
int32_t steps);
|
||||
|
||||
/**
|
||||
* Set a callback function for this URegularExpression.
|
||||
* During matching operations the function will be called periodically,
|
||||
* giving the application the opportunity to terminate a long-running
|
||||
* match.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param callback A pointer to the user-supplied callback function.
|
||||
* @param context User context pointer. The value supplied at the
|
||||
* time the callback function is set will be saved
|
||||
* and passed to the callback each time that it is called.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_setMatchCallback(URegularExpression *regexp,
|
||||
URegexMatchCallback callback,
|
||||
const void *context,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Get the callback function for this URegularExpression.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param callback Out paramater, receives a pointer to the user-supplied
|
||||
* callback function.
|
||||
* @param context Out parameter, receives the user context pointer that
|
||||
* was set when uregex_setMatchCallback() was called.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @draft ICU 4.0
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_getMatchCallback(const URegularExpression *regexp,
|
||||
URegexMatchCallback *callback,
|
||||
const void **context,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
|
||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
||||
#endif /* UREGEX_H */
|
||||
|
|
|
@ -74,7 +74,6 @@ static UBool validateRE(const URegularExpression *re, UErrorCode *status, UBool
|
|||
return FALSE;
|
||||
}
|
||||
if (re == NULL || re->fMagic != REXP_MAGIC) {
|
||||
// U_ASSERT(FALSE);
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -630,6 +629,103 @@ uregex_requireEnd(const URegularExpression *regexp,
|
|||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_setTimeLimit
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI void U_EXPORT2
|
||||
uregex_setTimeLimit(URegularExpression *regexp,
|
||||
int32_t limit,
|
||||
UErrorCode *status) {
|
||||
if (validateRE(regexp, status)) {
|
||||
regexp->fMatcher->setTimeLimit(limit, *status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_getTimeLimit
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uregex_getTimeLimit(const URegularExpression *regexp,
|
||||
UErrorCode *status) {
|
||||
int32_t retVal = 0;
|
||||
if (validateRE(regexp, status)) {
|
||||
retVal = regexp->fMatcher->getTimeLimit();
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_setStackLimit
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI void U_EXPORT2
|
||||
uregex_setStackLimit(URegularExpression *regexp,
|
||||
int32_t limit,
|
||||
UErrorCode *status) {
|
||||
if (validateRE(regexp, status)) {
|
||||
regexp->fMatcher->setStackLimit(limit, *status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_getStackLimit
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uregex_getStackLimit(const URegularExpression *regexp,
|
||||
UErrorCode *status) {
|
||||
int32_t retVal = 0;
|
||||
if (validateRE(regexp, status)) {
|
||||
retVal = regexp->fMatcher->getStackLimit();
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_setMatchCallback
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI void U_EXPORT2
|
||||
uregex_setMatchCallback(URegularExpression *regexp,
|
||||
URegexMatchCallback callback,
|
||||
const void *context,
|
||||
UErrorCode *status) {
|
||||
if (validateRE(regexp, status)) {
|
||||
regexp->fMatcher->setMatchCallback(callback, context, *status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_getMatchCallback
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI void U_EXPORT2
|
||||
uregex_getMatchCallback(const URegularExpression *regexp,
|
||||
URegexMatchCallback *callback,
|
||||
const void **context,
|
||||
UErrorCode *status) {
|
||||
if (validateRE(regexp, status)) {
|
||||
regexp->fMatcher->getMatchCallback(*callback, *context, *status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_replaceAll
|
||||
|
|
|
@ -100,7 +100,31 @@ void addURegexTest(TestNode** root)
|
|||
addTest(root, &TestBug4315, "regex/TestBug4315");
|
||||
}
|
||||
|
||||
/*
|
||||
* Call back function and context struct used for testing
|
||||
* regular expression user callbacks. This test is mostly the same as
|
||||
* the corresponding C++ test in intltest.
|
||||
*/
|
||||
typedef struct callBackContext {
|
||||
int32_t maxCalls;
|
||||
int32_t numCalls;
|
||||
int32_t lastSteps;
|
||||
} callBackContext;
|
||||
|
||||
static UBool U_EXPORT2 U_CALLCONV
|
||||
TestCallbackFn(const void *context, int32_t steps) {
|
||||
callBackContext *info = (callBackContext *)context;
|
||||
if (info->lastSteps+1 != steps) {
|
||||
log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
|
||||
}
|
||||
info->lastSteps = steps;
|
||||
info->numCalls++;
|
||||
return (info->numCalls < info->maxCalls);
|
||||
}
|
||||
|
||||
/*
|
||||
* Regular Expression C API Tests
|
||||
*/
|
||||
static void TestRegexCAPI(void) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
URegularExpression *re;
|
||||
|
@ -1144,8 +1168,72 @@ static void TestRegexCAPI(void) {
|
|||
uregex_close(re);
|
||||
}
|
||||
|
||||
/*
|
||||
* set/getTimeLimit
|
||||
*/
|
||||
TEST_SETUP("abc$", "abcdef", 0);
|
||||
TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
|
||||
uregex_setTimeLimit(re, 1000, &status);
|
||||
TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
uregex_setTimeLimit(re, -1, &status);
|
||||
TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
|
||||
status = U_ZERO_ERROR;
|
||||
TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
|
||||
TEST_TEARDOWN;
|
||||
|
||||
/*
|
||||
* set/get Stack Limit
|
||||
*/
|
||||
TEST_SETUP("abc$", "abcdef", 0);
|
||||
TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
|
||||
uregex_setStackLimit(re, 40000, &status);
|
||||
TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
uregex_setStackLimit(re, -1, &status);
|
||||
TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
|
||||
status = U_ZERO_ERROR;
|
||||
TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
|
||||
TEST_TEARDOWN;
|
||||
|
||||
|
||||
/*
|
||||
* Get/Set callback functions
|
||||
* This test is copied from intltest regex/Callbacks
|
||||
* The pattern and test data will run long enough to cause the callback
|
||||
* to be invoked. The nested '+' operators give exponential time
|
||||
* behavior with increasing string length.
|
||||
*/
|
||||
TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
|
||||
callBackContext cbInfo = {4, 0, 0};
|
||||
const void *pContext = &cbInfo;
|
||||
URegexMatchCallback returnedFn = &TestCallbackFn;
|
||||
|
||||
/* Getting the callback fn when it hasn't been set must return NULL */
|
||||
uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(returnedFn == NULL);
|
||||
TEST_ASSERT(pContext == NULL);
|
||||
|
||||
/* Set thecallback and do a match. */
|
||||
/* The callback function should record that it has been called. */
|
||||
uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(cbInfo.numCalls == 0);
|
||||
TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(cbInfo.numCalls > 0);
|
||||
|
||||
/* Getting the callback should return the values that were set above. */
|
||||
uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
|
||||
TEST_ASSERT(returnedFn == &TestCallbackFn);
|
||||
TEST_ASSERT(pContext == &cbInfo);
|
||||
|
||||
TEST_TEARDOWN;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void TestBug4315(void) {
|
||||
UErrorCode theICUError = U_ZERO_ERROR;
|
||||
URegularExpression *theRegEx;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2002-2007, International Business Machines Corporation and
|
||||
* Copyright (c) 2002-2008, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -66,7 +66,9 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
|||
case 6: name = "PerlTests";
|
||||
if (exec) PerlTests();
|
||||
break;
|
||||
|
||||
case 7: name = "Callbacks";
|
||||
if (exec) Callbacks();
|
||||
break;
|
||||
|
||||
default: name = "";
|
||||
break; //needed to end loop
|
||||
|
@ -837,6 +839,90 @@ void RegexTest::API_Match() {
|
|||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// Time Outs.
|
||||
// Note: These tests will need to be changed when the regexp engine is
|
||||
// able to detect and cut short the exponential time behavior on
|
||||
// this type of match.
|
||||
//
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Enough 'a's in the string to cause the match to time out.
|
||||
// (Each on additonal 'a' doubles the time)
|
||||
UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa");
|
||||
RegexMatcher matcher("(a+)+b", testString, 0, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(matcher.getTimeLimit() == 0);
|
||||
matcher.setTimeLimit(100, status);
|
||||
REGEX_ASSERT(matcher.getTimeLimit() == 100);
|
||||
REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
|
||||
REGEX_ASSERT(status == U_REGEX_TIME_OUT);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Few enough 'a's to slip in under the time limit.
|
||||
UnicodeString testString("aaaaaaaaaaaaaaaaaa");
|
||||
RegexMatcher matcher("(a+)+b", testString, 0, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
matcher.setTimeLimit(100, status);
|
||||
REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
|
||||
REGEX_CHECK_STATUS;
|
||||
}
|
||||
|
||||
//
|
||||
// Stack Limits
|
||||
//
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A'
|
||||
|
||||
// Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations
|
||||
// of the '+', and makes the stack frames larger.
|
||||
RegexMatcher matcher("(A)+A$", testString, 0, status);
|
||||
|
||||
// With the default stack, this match should fail to run
|
||||
REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
|
||||
REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
|
||||
|
||||
// With unlimited stack, it should run
|
||||
status = U_ZERO_ERROR;
|
||||
matcher.setStackLimit(0, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(matcher.lookingAt(status) == TRUE);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(matcher.getStackLimit() == 0);
|
||||
|
||||
// With a limited stack, it the match should fail
|
||||
status = U_ZERO_ERROR;
|
||||
matcher.setStackLimit(10000, status);
|
||||
REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
|
||||
REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
|
||||
REGEX_ASSERT(matcher.getStackLimit() == 10000);
|
||||
}
|
||||
|
||||
// A pattern that doesn't save state should work with
|
||||
// a minimal sized stack
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString testString = "abc";
|
||||
RegexMatcher matcher("abc", testString, 0, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
matcher.setStackLimit(30, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(matcher.matches(status) == TRUE);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(matcher.getStackLimit() == 30);
|
||||
|
||||
// Negative stack sizes should fail
|
||||
status = U_ZERO_ERROR;
|
||||
matcher.setStackLimit(1000, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
matcher.setStackLimit(-1, status);
|
||||
REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
|
||||
REGEX_ASSERT(matcher.getStackLimit() == 1000);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -2299,6 +2385,98 @@ void RegexTest::PerlTests() {
|
|||
}
|
||||
|
||||
|
||||
//
|
||||
// Callbacks() Test the callback function.
|
||||
// When set, callbacks occur periodically during matching operations,
|
||||
// giving the application code the ability to abort the operation
|
||||
// before it's normal completion.
|
||||
//
|
||||
|
||||
struct callBackContext {
|
||||
RegexTest *test;
|
||||
int32_t maxCalls;
|
||||
int32_t numCalls;
|
||||
int32_t lastSteps;
|
||||
void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;};
|
||||
};
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV
|
||||
testCallBackFn(const void *context, int32_t steps) {
|
||||
callBackContext *info = (callBackContext *)context;
|
||||
if (info->lastSteps+1 != steps) {
|
||||
info->test->errln("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
|
||||
}
|
||||
info->lastSteps = steps;
|
||||
info->numCalls++;
|
||||
return (info->numCalls < info->maxCalls);
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
void RegexTest::Callbacks() {
|
||||
{
|
||||
// Getter returns NULLs if no callback has been set
|
||||
|
||||
// The variables that the getter will fill in.
|
||||
// Init to non-null values so that the action of the getter can be seen.
|
||||
const void *returnedContext = &returnedContext;
|
||||
URegexMatchCallback returnedFn = &testCallBackFn;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RegexMatcher matcher("x", 0, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
matcher.getMatchCallback(returnedFn, returnedContext, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(returnedFn == NULL);
|
||||
REGEX_ASSERT(returnedContext == NULL);
|
||||
}
|
||||
|
||||
{
|
||||
// Set and Get work
|
||||
callBackContext cbInfo = {this, 0, 0, 0};
|
||||
const void *returnedContext;
|
||||
URegexMatchCallback returnedFn;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RegexMatcher matcher("((.)+\\2)+x", 0, status); // A pattern that can run long.
|
||||
REGEX_CHECK_STATUS;
|
||||
matcher.setMatchCallback(testCallBackFn, &cbInfo, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
matcher.getMatchCallback(returnedFn, returnedContext, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(returnedFn == &testCallBackFn);
|
||||
REGEX_ASSERT(returnedContext == &cbInfo);
|
||||
|
||||
// A short-running match shouldn't invoke the callback
|
||||
status = U_ZERO_ERROR;
|
||||
cbInfo.reset(1);
|
||||
UnicodeString s = "xxx";
|
||||
matcher.reset(s);
|
||||
REGEX_ASSERT(matcher.matches(status));
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(cbInfo.numCalls == 0);
|
||||
|
||||
// A medium-length match that runs long enough to invoke the
|
||||
// callback, but not so long that the callback aborts it.
|
||||
status = U_ZERO_ERROR;
|
||||
cbInfo.reset(4);
|
||||
s = "aaaaaaaaaaaaaaaaaaab";
|
||||
matcher.reset(s);
|
||||
REGEX_ASSERT(matcher.matches(status)==FALSE);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(cbInfo.numCalls > 0);
|
||||
|
||||
// A longer running match that the callback function will abort.
|
||||
status = U_ZERO_ERROR;
|
||||
cbInfo.reset(4);
|
||||
s = "aaaaaaaaaaaaaaaaaaaaaaab";
|
||||
matcher.reset(s);
|
||||
REGEX_ASSERT(matcher.matches(status)==FALSE);
|
||||
REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
|
||||
REGEX_ASSERT(cbInfo.numCalls == 4);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2002-2007, International Business Machines Corporation and
|
||||
* Copyright (c) 2002-2008, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -30,6 +30,7 @@ public:
|
|||
virtual void Extended();
|
||||
virtual void Errors();
|
||||
virtual void PerlTests();
|
||||
virtual void Callbacks();
|
||||
|
||||
// The following functions are internal to the regexp tests.
|
||||
virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line);
|
||||
|
|
Loading…
Add table
Reference in a new issue