diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp index 92bad256de3..61c84388de9 100644 --- a/icu4c/source/i18n/rematch.cpp +++ b/icu4c/source/i18n/rematch.cpp @@ -216,6 +216,8 @@ void RegexMatcher::init(UErrorCode &status) { fStackLimit = DEFAULT_BACKTRACK_STACK_CAPACITY; fCallbackFn = NULL; fCallbackContext = NULL; + fFindProgressCallbackFn = NULL; + fFindProgressCallbackContext = NULL; fTraceDebug = FALSE; fDeferredStatus = status; fData = fSmallData; @@ -548,7 +550,6 @@ int32_t RegexMatcher::end(int32_t group, UErrorCode &err) const { } - //-------------------------------------------------------------------------------- // // find() @@ -641,6 +642,8 @@ UBool RegexMatcher::find() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testStartLimit the last time through. + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } U_ASSERT(FALSE); @@ -687,6 +690,8 @@ UBool RegexMatcher::find() { return FALSE; } startPos = pos; + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } U_ASSERT(FALSE); @@ -718,6 +723,8 @@ UBool RegexMatcher::find() { return FALSE; } startPos = pos; + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } U_ASSERT(FALSE); @@ -764,6 +771,8 @@ UBool RegexMatcher::find() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testStartLimit the last time through. + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } else { for (;;) { @@ -792,6 +801,8 @@ UBool RegexMatcher::find() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testStartLimit the last time through. + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } } @@ -925,6 +936,8 @@ UBool RegexMatcher::findUsingChunk() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testLen the last time through. + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } U_ASSERT(FALSE); @@ -964,6 +977,8 @@ UBool RegexMatcher::findUsingChunk() { fHitEnd = TRUE; return FALSE; } + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } U_ASSERT(FALSE); @@ -991,6 +1006,8 @@ UBool RegexMatcher::findUsingChunk() { fHitEnd = TRUE; return FALSE; } + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } U_ASSERT(FALSE); @@ -1030,6 +1047,8 @@ UBool RegexMatcher::findUsingChunk() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testLen the last time through. + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } else { for (;;) { @@ -1056,6 +1075,8 @@ UBool RegexMatcher::findUsingChunk() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testLen the last time through. + if (ReportFindProgress(startPos, fDeferredStatus) == FALSE) + return FALSE; } } } @@ -1889,7 +1910,6 @@ RegexMatcher &RegexMatcher::reset(UText *input) { return *this; }*/ - RegexMatcher &RegexMatcher::reset(int32_t position, UErrorCode &status) { if (U_FAILURE(status)) { return *this; @@ -2330,6 +2350,38 @@ void RegexMatcher::getMatchCallback(URegexMatchCallback *&callback, } +//-------------------------------------------------------------------------------- +// +// setMatchCallback +// +//-------------------------------------------------------------------------------- +void RegexMatcher::setFindProgressCallback(URegexFindProgressCallback *callback, + const void *context, + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + fFindProgressCallbackFn = callback; + fFindProgressCallbackContext = context; +} + + +//-------------------------------------------------------------------------------- +// +// getMatchCallback +// +//-------------------------------------------------------------------------------- +void RegexMatcher::getFindProgressCallback(URegexFindProgressCallback *&callback, + const void *&context, + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + callback = fFindProgressCallbackFn; + context = fFindProgressCallbackContext; +} + + //================================================================================ // // Code following this point in this file is the internal @@ -2517,6 +2569,29 @@ void RegexMatcher::IncrementTime(UErrorCode &status) { } } +//-------------------------------------------------------------------------------- +// +// ReportFindProgress This function is called once for each advance in the target +// string from the find() function, and calls the user progress callback +// function if there is one installed. +// +// NOTE: +// +// If the match operation needs to be aborted because the user +// callback asked for it, just set an error status. +// The engine will pick that up and stop in its outer loop. +// +//-------------------------------------------------------------------------------- +UBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) { + if (fFindProgressCallbackFn != NULL) { + if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) { + status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/; + return FALSE; + } + } + return TRUE; +} + //-------------------------------------------------------------------------------- // // StateSave diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h index 672f7ea5e01..56f8de6a186 100644 --- a/icu4c/source/i18n/unicode/regex.h +++ b/icu4c/source/i18n/unicode/regex.h @@ -1548,6 +1548,40 @@ public: UErrorCode &status); + /** + * Set a progress callback function for use with find operations on this Matcher. + * During find operations, the callback will be invoked after each return from a + * match attempt, giving the application the opportunity to terminate a long-running + * find operation. + * + * @param callback A pointer to the user-supplied callback function. + * @param context User context pointer. The value supplied at the + * time the callback function is set will be saved + * and passed to the callback each time that it is called. + * @param status A reference to a UErrorCode to receive any errors. + * @draft ICU 4.6 + */ + virtual void setFindProgressCallback(URegexFindProgressCallback *callback, + const void *context, + UErrorCode &status); + + + /** + * Get the find progress callback function for this URegularExpression. + * + * @param callback Out paramater, receives a pointer to the user-supplied + * callback function. + * @param context Out parameter, receives the user context pointer that + * was set when uregex_setFindProgressCallback() was called. + * @param status A reference to a UErrorCode to receive any errors. + * @draft ICU 4.6 + */ + virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, + const void *&context, + UErrorCode &status); + + + /** * setTrace Debug function, enable/disable tracing of the matching engine. * For internal ICU development use only. DO NO USE!!!! @@ -1598,6 +1632,7 @@ private: REStackFrame *resetStack(); inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status); void IncrementTime(UErrorCode &status); + UBool ReportFindProgress(int64_t matchIndex, UErrorCode &status); int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const; @@ -1673,6 +1708,11 @@ private: // NULL if there is no callback. const void *fCallbackContext; // User Context ptr for callback function. + URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct. + // NULL if there is no callback. + const void *fFindProgressCallbackContext; // User Context ptr for callback function. + + UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility. UBool fTraceDebug; // Set true for debug tracing of match engine. diff --git a/icu4c/source/i18n/unicode/uregex.h b/icu4c/source/i18n/unicode/uregex.h index 0e8787de46a..8cc527fcf4c 100644 --- a/icu4c/source/i18n/unicode/uregex.h +++ b/icu4c/source/i18n/unicode/uregex.h @@ -1256,6 +1256,67 @@ uregex_getMatchCallback(const URegularExpression *regexp, UErrorCode *status); +/** + * Function pointer for a regular expression find/findNext callback function. + * When set, a callback function will be called during a find operation after each + * attempt at a match. If the call back function returns FALSE, the find + * operation will be terminated early. + * + * Note: the callback function must not call other functions on this + * URegularExpression + * + * @param context context pointer. The callback function will be invoked + * with the context specified at the time that + * uregex_setFindProgressCallback() is called. + * @param matchIndex the next index at which a match attempt will be attempted for this + * find operation. If this callback interrupts the search, this is the + * index at which a find/findNext operation may be re-initiated. + * @return TRUE to continue the matching operation. + * FALSE to terminate the matching operation. + * @draft ICU 4.6 + */ +U_CDECL_BEGIN +typedef UBool U_CALLCONV URegexFindProgressCallback ( + const void *context, + int64_t matchIndex); +U_CDECL_END + +/** + * During find operations, this callback will be invoked after each return from a + * match attempt, specifying the next index at which a match operation is about to be attempted, + * giving the application the opportunity to terminate a long-running find operation. + * + * @param regexp The compiled regular expression. + * @param callback A pointer to the user-supplied callback function. + * @param context User context pointer. The value supplied at the + * time the callback function is set will be saved + * and passed to the callback each time that it is called. + * @param status A reference to a UErrorCode to receive any errors. + * @draft ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_setFindProgressCallback(URegularExpression *regexp, + URegexFindProgressCallback *callback, + const void *context, + UErrorCode *status); + + +/** + * Get the callback function for this URegularExpression. + * + * @param regexp The compiled regular expression. + * @param callback Out paramater, receives a pointer to the user-supplied + * callback function. + * @param context Out parameter, receives the user context pointer that + * was set when uregex_setFindProgressCallback() was called. + * @param status A reference to a UErrorCode to receive any errors. + * @draft ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_getFindProgressCallback(const URegularExpression *regexp, + URegexFindProgressCallback **callback, + const void **context, + UErrorCode *status); #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ #endif /* UREGEX_H */ diff --git a/icu4c/source/i18n/uregex.cpp b/icu4c/source/i18n/uregex.cpp index 37c567d6c13..1d5c65b5c6e 100644 --- a/icu4c/source/i18n/uregex.cpp +++ b/icu4c/source/i18n/uregex.cpp @@ -997,6 +997,40 @@ uregex_getMatchCallback(const URegularExpression *regexp2, } +//------------------------------------------------------------------------------ +// +// uregex_setMatchProgressCallback +// +//------------------------------------------------------------------------------ +U_CAPI void U_EXPORT2 +uregex_setFindProgressCallback(URegularExpression *regexp2, + URegexFindProgressCallback *callback, + const void *context, + UErrorCode *status) { + RegularExpression *regexp = (RegularExpression*)regexp2; + if (validateRE(regexp, status)) { + regexp->fMatcher->setFindProgressCallback(callback, context, *status); + } +} + + +//------------------------------------------------------------------------------ +// +// uregex_getMatchCallback +// +//------------------------------------------------------------------------------ +U_CAPI void U_EXPORT2 +uregex_getFindProgressCallback(const URegularExpression *regexp2, + URegexFindProgressCallback **callback, + const void **context, + UErrorCode *status) { + RegularExpression *regexp = (RegularExpression*)regexp2; + if (validateRE(regexp, status)) { + regexp->fMatcher->getFindProgressCallback(*callback, *context, *status); + } +} + + //------------------------------------------------------------------------------ // // uregex_replaceAll diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index 98748658744..1548f250ff4 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -77,30 +77,33 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch case 7: name = "Callbacks"; if (exec) Callbacks(); break; - case 8: name = "Bug 6149"; + case 8: name = "FindProgressCallbacks"; + if (exec) FindProgressCallbacks(); + break; + case 9: name = "Bug 6149"; if (exec) Bug6149(); break; - case 9: name = "UTextBasic"; + case 10: name = "UTextBasic"; if (exec) UTextBasic(); break; - case 10: name = "API_Match_UTF8"; + case 11: name = "API_Match_UTF8"; if (exec) API_Match_UTF8(); break; - case 11: name = "API_Replace_UTF8"; + case 12: name = "API_Replace_UTF8"; if (exec) API_Replace_UTF8(); break; - case 12: name = "API_Pattern_UTF8"; + case 13: name = "API_Pattern_UTF8"; if (exec) API_Pattern_UTF8(); break; - case 13: name = "PerlTestsUTF8"; + case 14: name = "PerlTestsUTF8"; if (exec) PerlTestsUTF8(); break; - case 14: name = "PreAllocatedUTextCAPI"; + case 15: name = "PreAllocatedUTextCAPI"; if (exec) PreAllocatedUTextCAPI(); break; - case 15: name = "Bug 7651"; - if (exec) Bug7651(); - break; + case 16: name = "Bug 7651"; + if (exec) Bug7651(); + break; default: name = ""; break; //needed to end loop @@ -4411,6 +4414,114 @@ void RegexTest::Callbacks() { } +// +// FindProgressCallbacks() Test the find "progress" callback function. +// When set, the find progress callback will be invoked during a find operations +// after each return from a match attempt, giving the application the opportunity +// to terminate a long-running find operation before it's normal completion. +// + +struct progressCallBackContext { + RegexTest *test; + int64_t lastIndex; + int32_t maxCalls; + int32_t numCalls; + void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; +}; + +U_CDECL_BEGIN +static UBool U_CALLCONV +testProgressCallBackFn(const void *context, int64_t matchIndex) { + progressCallBackContext *info = (progressCallBackContext *)context; + info->numCalls++; + info->lastIndex = matchIndex; +// info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls); + return (info->numCalls < info->maxCalls); +} +U_CDECL_END + +void RegexTest::FindProgressCallbacks() { + { + // Getter returns NULLs if no callback has been set + + // The variables that the getter will fill in. + // Init to non-null values so that the action of the getter can be seen. + const void *returnedContext = &returnedContext; + URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; + + UErrorCode status = U_ZERO_ERROR; + RegexMatcher matcher("x", 0, status); + REGEX_CHECK_STATUS; + matcher.getFindProgressCallback(returnedFn, returnedContext, status); + REGEX_CHECK_STATUS; + REGEX_ASSERT(returnedFn == NULL); + REGEX_ASSERT(returnedContext == NULL); + } + + { + // Set and Get work + progressCallBackContext cbInfo = {this, 0, 0, 0}; + const void *returnedContext; + URegexFindProgressCallback *returnedFn; + UErrorCode status = U_ZERO_ERROR; + RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. + REGEX_CHECK_STATUS; + matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status); + REGEX_CHECK_STATUS; + matcher.getFindProgressCallback(returnedFn, returnedContext, status); + REGEX_CHECK_STATUS; + REGEX_ASSERT(returnedFn == testProgressCallBackFn); + REGEX_ASSERT(returnedContext == &cbInfo); + + // A short-running match should NOT invoke the callback. + status = U_ZERO_ERROR; + cbInfo.reset(100); + UnicodeString s = "abxxx"; + matcher.reset(s); +#if 0 + matcher.setTrace(TRUE); +#endif + REGEX_ASSERT(matcher.find(0, status)); + REGEX_CHECK_STATUS; + REGEX_ASSERT(cbInfo.numCalls == 0); + + // A medium running match that causes matcher.find() to invoke our callback for each index. + status = U_ZERO_ERROR; + s = "aaaaaaaaaaaaaaaaaaab"; + cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string + matcher.reset(s); + REGEX_ASSERT(matcher.find(0, status)==FALSE); + REGEX_CHECK_STATUS; + REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); + + // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point. + status = U_ZERO_ERROR; + UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; + cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string + matcher.reset(s1); + REGEX_ASSERT(matcher.find(0, status)==FALSE); + REGEX_CHECK_STATUS; + REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); + +#if 0 + // Now a match that will succeed, but after an interruption + status = U_ZERO_ERROR; + UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; + cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string + matcher.reset(s2); + REGEX_ASSERT(matcher.find(0, status)==FALSE); + REGEX_CHECK_STATUS; + // Now retry the match from where left off + cbInfo.maxCalls = 100; // No callback limit + REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); + REGEX_CHECK_STATUS; +#endif + } + + +} + + //--------------------------------------------------------------------------- // // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable @@ -4619,6 +4730,7 @@ void RegexTest::PreAllocatedUTextCAPI () { void RegexTest::Bug7651() { UnicodeString pattern1("((?