ICU-6656 URegex_replaceAll(), fix incorrect size return value when output buffer is too small

X-SVN-Rev: 25221
This commit is contained in:
Andy Heninger 2009-01-08 06:55:22 +00:00
parent 2264a8a9ab
commit 04c57ff523
2 changed files with 78 additions and 37 deletions

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2004-2008, International Business Machines
* Copyright (C) 2004-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: regex.cpp
@ -749,12 +749,27 @@ uregex_replaceAll(URegularExpression *regexp,
}
int32_t len = 0;
uregex_reset(regexp, 0, status);
while (uregex_findNext(regexp, status)) {
len += uregex_appendReplacement(regexp, replacementText, replacementLength,
// Note: Seperate error code variables for findNext() and appendReplacement()
// are used so that destination buffer overflow errors
// in appendReplacement won't stop findNext() from working.
// appendReplacement() and appendTail() special case incoming buffer
// overflow errors, continuing to return the correct length.
UErrorCode findStatus = *status;
while (uregex_findNext(regexp, &findStatus)) {
len += uregex_appendReplacement(regexp, replacementText, replacementLength,
&destBuf, &destCapacity, status);
}
len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
if (U_FAILURE(findStatus)) {
// If anything went wrong with the findNext(), make that error trump
// whatever may have happened with the append() operations.
// Errors in findNext() are not expected.
*status = findStatus;
}
return len;
}
@ -868,7 +883,7 @@ int32_t RegexCImpl::appendReplacement(URegularExpression *regexp,
// A series of appendReplacements, appendTail need to correctly preflight
// the buffer size when an overflow happens somewhere in the middle.
UBool pendingBufferOverflow = FALSE;
if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity == 0) {
if (*status == U_BUFFER_OVERFLOW_ERROR && *destCapacity == 0) {
pendingBufferOverflow = TRUE;
*status = U_ZERO_ERROR;
}
@ -1078,19 +1093,11 @@ int32_t RegexCImpl::appendTail(URegularExpression *regexp,
UErrorCode *status)
{
if (destCapacity == NULL || destBuf == NULL ||
*destBuf == NULL && *destCapacity > 0 ||
*destCapacity < 0)
{
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
// If we come in with a buffer overflow error, don't suppress the operation.
// A series of appendReplacements, appendTail need to correctly preflight
// the buffer size when an overflow happens somewhere in the middle.
UBool pendingBufferOverflow = FALSE;
if (*status == U_BUFFER_OVERFLOW_ERROR && *destCapacity == 0) {
if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
pendingBufferOverflow = TRUE;
*status = U_ZERO_ERROR;
}
@ -1098,6 +1105,15 @@ int32_t RegexCImpl::appendTail(URegularExpression *regexp,
if (validateRE(regexp, status) == FALSE) {
return 0;
}
if (destCapacity == NULL || destBuf == NULL ||
*destBuf == NULL && *destCapacity > 0 ||
*destCapacity < 0)
{
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
RegexMatcher *m = regexp->fMatcher;
int32_t srcIdx;

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2004-2008, International Business Machines Corporation and
* Copyright (c) 2004-2009, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -807,64 +807,71 @@ static void TestRegexCAPI(void) {
* replaceAll()
*/
{
UChar text1[80];
UChar text2[80];
UChar replText[80];
UChar text1[80]; /* "Replace xaax x1x x...x." */
UChar text2[80]; /* "No match Here" */
UChar replText[80]; /* "<$1>" */
UChar replText2[80]; /* "<<$1>>" */
const char * pattern = "x(.*?)x";
const char * expectedResult = "Replace <aa> <1> <...>.";
const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
UChar buf[80];
int32_t resultSz;
int32_t resultSize;
int32_t expectedResultSize;
int32_t expectedResultSize2;
int32_t i;
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
expectedResultSize = u_strlen(text1);
u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
expectedResultSize = strlen(expectedResult);
expectedResultSize2 = strlen(expectedResult2);
status = U_ZERO_ERROR;
re = uregex_openC("x(.*?)x", 0, NULL, &status);
re = uregex_openC(pattern, 0, NULL, &status);
TEST_ASSERT_SUCCESS(status);
/* Normal case, with match */
uregex_setText(re, text1, -1, &status);
resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, TRUE);
TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
TEST_ASSERT_STRING(expectedResult, buf, TRUE);
TEST_ASSERT(resultSize == expectedResultSize);
/* No match. Text should copy to output with no changes. */
status = U_ZERO_ERROR;
uregex_setText(re, text2, -1, &status);
resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("No match here.", buf, TRUE);
TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
TEST_ASSERT(resultSize == u_strlen(text2));
/* Match, output just fills buffer, no termination warning. */
status = U_ZERO_ERROR;
uregex_setText(re, text1, -1, &status);
memset(buf, -1, sizeof(buf));
resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
TEST_ASSERT_STRING(expectedResult, buf, FALSE);
TEST_ASSERT(resultSize == expectedResultSize);
TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
/* Do the replaceFirst again, without first resetting anything.
* Should give the same results.
*/
status = U_ZERO_ERROR;
memset(buf, -1, sizeof(buf));
resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
/* NULL buffer, zero buffer length */
status = U_ZERO_ERROR;
resultSz = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
/* Buffer too small. Try every size, which will tickle edge cases
* in uregex_appendReplacement (used by replaceAll) */
@ -872,15 +879,33 @@ static void TestRegexCAPI(void) {
char expected[80];
status = U_ZERO_ERROR;
memset(buf, -1, sizeof(buf));
resultSz = uregex_replaceAll(re, replText, -1, buf, i, &status);
resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
strcpy(expected, "Replace <aa> <1> <...>.");
strcpy(expected, expectedResult);
expected[i] = 0;
TEST_ASSERT_STRING(expected, buf, FALSE);
TEST_ASSERT(resultSz == expectedResultSize);
TEST_ASSERT(resultSize == expectedResultSize);
TEST_ASSERT(buf[i] == (UChar)0xffff);
}
/* Buffer too small. Same as previous test, except this time the replacement
* text is longer than the match capture group, making the length of the complete
* replacement longer than the original string.
*/
for (i=0; i<expectedResultSize2; i++) {
char expected[80];
status = U_ZERO_ERROR;
memset(buf, -1, sizeof(buf));
resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
strcpy(expected, expectedResult2);
expected[i] = 0;
TEST_ASSERT_STRING(expected, buf, FALSE);
TEST_ASSERT(resultSize == expectedResultSize2);
TEST_ASSERT(buf[i] == (UChar)0xffff);
}
uregex_close(re);
}