mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-6656 URegex_replaceAll(), fix incorrect size return value when output buffer is too small
X-SVN-Rev: 25221
This commit is contained in:
parent
2264a8a9ab
commit
04c57ff523
2 changed files with 78 additions and 37 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2004-2008, International Business Machines
|
||||
* Copyright (C) 2004-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: regex.cpp
|
||||
|
@ -749,12 +749,27 @@ uregex_replaceAll(URegularExpression *regexp,
|
|||
}
|
||||
|
||||
int32_t len = 0;
|
||||
|
||||
uregex_reset(regexp, 0, status);
|
||||
while (uregex_findNext(regexp, status)) {
|
||||
len += uregex_appendReplacement(regexp, replacementText, replacementLength,
|
||||
|
||||
// Note: Seperate error code variables for findNext() and appendReplacement()
|
||||
// are used so that destination buffer overflow errors
|
||||
// in appendReplacement won't stop findNext() from working.
|
||||
// appendReplacement() and appendTail() special case incoming buffer
|
||||
// overflow errors, continuing to return the correct length.
|
||||
UErrorCode findStatus = *status;
|
||||
while (uregex_findNext(regexp, &findStatus)) {
|
||||
len += uregex_appendReplacement(regexp, replacementText, replacementLength,
|
||||
&destBuf, &destCapacity, status);
|
||||
}
|
||||
len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
|
||||
|
||||
if (U_FAILURE(findStatus)) {
|
||||
// If anything went wrong with the findNext(), make that error trump
|
||||
// whatever may have happened with the append() operations.
|
||||
// Errors in findNext() are not expected.
|
||||
*status = findStatus;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
@ -868,7 +883,7 @@ int32_t RegexCImpl::appendReplacement(URegularExpression *regexp,
|
|||
// A series of appendReplacements, appendTail need to correctly preflight
|
||||
// the buffer size when an overflow happens somewhere in the middle.
|
||||
UBool pendingBufferOverflow = FALSE;
|
||||
if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity == 0) {
|
||||
if (*status == U_BUFFER_OVERFLOW_ERROR && *destCapacity == 0) {
|
||||
pendingBufferOverflow = TRUE;
|
||||
*status = U_ZERO_ERROR;
|
||||
}
|
||||
|
@ -1078,19 +1093,11 @@ int32_t RegexCImpl::appendTail(URegularExpression *regexp,
|
|||
UErrorCode *status)
|
||||
{
|
||||
|
||||
if (destCapacity == NULL || destBuf == NULL ||
|
||||
*destBuf == NULL && *destCapacity > 0 ||
|
||||
*destCapacity < 0)
|
||||
{
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If we come in with a buffer overflow error, don't suppress the operation.
|
||||
// A series of appendReplacements, appendTail need to correctly preflight
|
||||
// the buffer size when an overflow happens somewhere in the middle.
|
||||
UBool pendingBufferOverflow = FALSE;
|
||||
if (*status == U_BUFFER_OVERFLOW_ERROR && *destCapacity == 0) {
|
||||
if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
|
||||
pendingBufferOverflow = TRUE;
|
||||
*status = U_ZERO_ERROR;
|
||||
}
|
||||
|
@ -1098,6 +1105,15 @@ int32_t RegexCImpl::appendTail(URegularExpression *regexp,
|
|||
if (validateRE(regexp, status) == FALSE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (destCapacity == NULL || destBuf == NULL ||
|
||||
*destBuf == NULL && *destCapacity > 0 ||
|
||||
*destCapacity < 0)
|
||||
{
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
RegexMatcher *m = regexp->fMatcher;
|
||||
|
||||
int32_t srcIdx;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2004-2008, International Business Machines Corporation and
|
||||
* Copyright (c) 2004-2009, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -807,64 +807,71 @@ static void TestRegexCAPI(void) {
|
|||
* replaceAll()
|
||||
*/
|
||||
{
|
||||
UChar text1[80];
|
||||
UChar text2[80];
|
||||
UChar replText[80];
|
||||
UChar text1[80]; /* "Replace xaax x1x x...x." */
|
||||
UChar text2[80]; /* "No match Here" */
|
||||
UChar replText[80]; /* "<$1>" */
|
||||
UChar replText2[80]; /* "<<$1>>" */
|
||||
const char * pattern = "x(.*?)x";
|
||||
const char * expectedResult = "Replace <aa> <1> <...>.";
|
||||
const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
|
||||
UChar buf[80];
|
||||
int32_t resultSz;
|
||||
int32_t resultSize;
|
||||
int32_t expectedResultSize;
|
||||
int32_t expectedResultSize2;
|
||||
int32_t i;
|
||||
|
||||
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
|
||||
u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
|
||||
u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
|
||||
expectedResultSize = u_strlen(text1);
|
||||
u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
|
||||
expectedResultSize = strlen(expectedResult);
|
||||
expectedResultSize2 = strlen(expectedResult2);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_openC("x(.*?)x", 0, NULL, &status);
|
||||
re = uregex_openC(pattern, 0, NULL, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
/* Normal case, with match */
|
||||
uregex_setText(re, text1, -1, &status);
|
||||
resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
|
||||
resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, TRUE);
|
||||
TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
|
||||
TEST_ASSERT_STRING(expectedResult, buf, TRUE);
|
||||
TEST_ASSERT(resultSize == expectedResultSize);
|
||||
|
||||
/* No match. Text should copy to output with no changes. */
|
||||
status = U_ZERO_ERROR;
|
||||
uregex_setText(re, text2, -1, &status);
|
||||
resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
|
||||
resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT_STRING("No match here.", buf, TRUE);
|
||||
TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
|
||||
TEST_ASSERT(resultSize == u_strlen(text2));
|
||||
|
||||
/* Match, output just fills buffer, no termination warning. */
|
||||
status = U_ZERO_ERROR;
|
||||
uregex_setText(re, text1, -1, &status);
|
||||
memset(buf, -1, sizeof(buf));
|
||||
resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
|
||||
resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
|
||||
TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
|
||||
TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
|
||||
TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
|
||||
TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
|
||||
TEST_ASSERT_STRING(expectedResult, buf, FALSE);
|
||||
TEST_ASSERT(resultSize == expectedResultSize);
|
||||
TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
|
||||
|
||||
/* Do the replaceFirst again, without first resetting anything.
|
||||
* Should give the same results.
|
||||
*/
|
||||
status = U_ZERO_ERROR;
|
||||
memset(buf, -1, sizeof(buf));
|
||||
resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
|
||||
resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
|
||||
TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
|
||||
TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
|
||||
TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
|
||||
TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
|
||||
TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
|
||||
TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
|
||||
|
||||
/* NULL buffer, zero buffer length */
|
||||
status = U_ZERO_ERROR;
|
||||
resultSz = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
|
||||
resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
|
||||
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
|
||||
TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
|
||||
TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
|
||||
|
||||
/* Buffer too small. Try every size, which will tickle edge cases
|
||||
* in uregex_appendReplacement (used by replaceAll) */
|
||||
|
@ -872,15 +879,33 @@ static void TestRegexCAPI(void) {
|
|||
char expected[80];
|
||||
status = U_ZERO_ERROR;
|
||||
memset(buf, -1, sizeof(buf));
|
||||
resultSz = uregex_replaceAll(re, replText, -1, buf, i, &status);
|
||||
resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
|
||||
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
|
||||
strcpy(expected, "Replace <aa> <1> <...>.");
|
||||
strcpy(expected, expectedResult);
|
||||
expected[i] = 0;
|
||||
TEST_ASSERT_STRING(expected, buf, FALSE);
|
||||
TEST_ASSERT(resultSz == expectedResultSize);
|
||||
TEST_ASSERT(resultSize == expectedResultSize);
|
||||
TEST_ASSERT(buf[i] == (UChar)0xffff);
|
||||
}
|
||||
|
||||
/* Buffer too small. Same as previous test, except this time the replacement
|
||||
* text is longer than the match capture group, making the length of the complete
|
||||
* replacement longer than the original string.
|
||||
*/
|
||||
for (i=0; i<expectedResultSize2; i++) {
|
||||
char expected[80];
|
||||
status = U_ZERO_ERROR;
|
||||
memset(buf, -1, sizeof(buf));
|
||||
resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
|
||||
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
|
||||
strcpy(expected, expectedResult2);
|
||||
expected[i] = 0;
|
||||
TEST_ASSERT_STRING(expected, buf, FALSE);
|
||||
TEST_ASSERT(resultSize == expectedResultSize2);
|
||||
TEST_ASSERT(buf[i] == (UChar)0xffff);
|
||||
}
|
||||
|
||||
|
||||
uregex_close(re);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue