ICU-11469 Regular Expressions, remove old tech preview functions.

X-SVN-Rev: 36953
This commit is contained in:
Andy Heninger 2015-01-14 00:03:29 +00:00
parent 069313c959
commit 22c8c94d14
6 changed files with 226 additions and 260 deletions

View file

@ -1,6 +1,6 @@
/*
**************************************************************************
* Copyright (C) 2002-2014 International Business Machines Corporation *
* Copyright (C) 2002-2015 International Business Machines Corporation *
* and others. All rights reserved. *
**************************************************************************
*/
@ -1175,97 +1175,32 @@ UText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UE
UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
UnicodeString result;
if (U_FAILURE(status)) {
int64_t groupStart = start64(groupNum, status);
int64_t groupEnd = end64(groupNum, status);
if (U_FAILURE(status) || groupStart == -1 || groupStart == groupEnd) {
return result;
}
UText resultText = UTEXT_INITIALIZER;
utext_openUnicodeString(&resultText, &result, &status);
group(groupNum, &resultText, status);
utext_close(&resultText);
// Get the group length using a utext_extract preflight.
// UText is actually pretty efficient at this when underlying encoding is UTF-16.
int32_t length = utext_extract(fInputText, groupStart, groupEnd, NULL, 0, &status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
return result;
}
status = U_ZERO_ERROR;
UChar *buf = result.getBuffer(length);
if (buf == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
int32_t extractLength = utext_extract(fInputText, groupStart, groupEnd, buf, length, &status);
result.releaseBuffer(extractLength);
U_ASSERT(length == extractLength);
}
return result;
}
// Return deep (mutable) clone
// Technology Preview (as an API), but note that the UnicodeString API is implemented
// using this function.
UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const {
if (U_FAILURE(status)) {
return dest;
}
if (U_FAILURE(fDeferredStatus)) {
status = fDeferredStatus;
} else if (fMatch == FALSE) {
status = U_REGEX_INVALID_STATE;
} else if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
status = U_INDEX_OUTOFBOUNDS_ERROR;
}
if (U_FAILURE(status)) {
return dest;
}
int64_t s, e;
if (groupNum == 0) {
s = fMatchStart;
e = fMatchEnd;
} else {
int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
U_ASSERT(groupOffset < fPattern->fFrameSize);
U_ASSERT(groupOffset >= 0);
s = fFrame->fExtra[groupOffset];
e = fFrame->fExtra[groupOffset+1];
}
if (s < 0) {
// A capture group wasn't part of the match
if (dest) {
utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
return dest;
} else {
return utext_openUChars(NULL, NULL, 0, &status);
}
}
U_ASSERT(s <= e);
if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
U_ASSERT(e <= fInputLength);
if (dest) {
utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents+s, (int32_t)(e-s), &status);
} else {
UText groupText = UTEXT_INITIALIZER;
utext_openUChars(&groupText, fInputText->chunkContents+s, e-s, &status);
dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
utext_close(&groupText);
}
} else {
int32_t len16;
if (UTEXT_USES_U16(fInputText)) {
len16 = (int32_t)(e-s);
} else {
UErrorCode lengthStatus = U_ZERO_ERROR;
len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus);
}
UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
if (groupChars == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return dest;
}
utext_extract(fInputText, s, e, groupChars, len16+1, &status);
if (dest) {
utext_replace(dest, 0, utext_nativeLength(dest), groupChars, len16, &status);
} else {
UText groupText = UTEXT_INITIALIZER;
utext_openUChars(&groupText, groupChars, len16, &status);
dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
utext_close(&groupText);
}
uprv_free(groupChars);
}
return dest;
}
//--------------------------------------------------------------------------------
//
@ -2001,6 +1936,67 @@ void RegexMatcher::setTrace(UBool state) {
/**
* UText, replace entire contents of the destination UText with a substring of the source UText.
*
* @param src The source UText
* @param dest The destination UText. Must be writable.
* May be NULL, in which case a new UText will be allocated.
* @param start Start index of source substring.
* @param limit Limit index of source substring.
* @param status An error code.
*/
static UText *utext_extract_replace(UText *src, UText *dest, int64_t start, int64_t limit, UErrorCode *status) {
if (U_FAILURE(*status)) {
return dest;
}
if (start == limit) {
if (dest) {
utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, status);
return dest;
} else {
return utext_openUChars(NULL, NULL, 0, status);
}
}
int32_t length = utext_extract(src, start, limit, NULL, 0, status);
if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) {
return dest;
}
*status = U_ZERO_ERROR;
MaybeStackArray<UChar, 40> buffer;
if (length >= buffer.getCapacity()) {
UChar *newBuf = buffer.resize(length+1); // Leave space for terminating Nul.
if (newBuf == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
}
utext_extract(src, start, limit, buffer.getAlias(), length+1, status);
if (dest) {
utext_replace(dest, 0, utext_nativeLength(dest), buffer.getAlias(), length, status);
return dest;
}
// Caller did not provide a prexisting UText.
// Open a new one, and have it adopt the text buffer storage.
if (U_FAILURE(*status)) {
return NULL;
}
int32_t ownedLength = 0;
UChar *ownedBuf = buffer.orphanOrClone(length+1, ownedLength);
if (ownedBuf == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
UText *result = utext_openUChars(NULL, ownedBuf, length, status);
if (U_FAILURE(*status)) {
uprv_free(ownedBuf);
return NULL;
}
result->providerProperties |= (1 << UTEXT_PROVIDER_OWNS_TEXT);
return result;
}
//---------------------------------------------------------------------
//
// split
@ -2167,7 +2163,8 @@ int32_t RegexMatcher::split(UText *input,
break;
}
i++;
dest[i] = group(groupNum, dest[i], status);
dest[i] = utext_extract_replace(fInputText, dest[i],
start64(groupNum, status), end64(groupNum, status), &status);
}
if (nextOutputStringStart == fActiveLimit) {

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2014, International Business Machines
* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: regex.h
@ -896,24 +896,6 @@ public:
*/
virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
/**
* Returns a string containing the text captured by the given group
* during the previous match operation. Group(0) is the entire match.
*
* @param groupNum the capture group number
* @param dest A mutable UText in which the matching text is placed.
* If NULL, a new UText will be created (which may not be mutable).
* @param status A reference to a UErrorCode to receive any errors.
* Possible errors are U_REGEX_INVALID_STATE if no match
* has been attempted or the last match failed.
* @return A string containing the matched input text. If a pre-allocated UText
* was provided, it will always be used and returned.
*
* @internal ICU 4.4 technology preview
*/
virtual UText *group(int32_t groupNum, UText *dest, UErrorCode &status) const;
/**
* Returns the index in the input string of the start of the text matched
* during the previous match operation.

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2004-2013, International Business Machines
* Copyright (C) 2004-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uregex.h
@ -659,31 +659,6 @@ uregex_groupUText(URegularExpression *regexp,
int64_t *groupLength,
UErrorCode *status);
#ifndef U_HIDE_INTERNAL_API
/** Extract the string for the specified matching expression or subexpression.
* Group #0 is the complete string of matched text.
* Group #1 is the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group to extract. Group 0 is the complete
* match. The value of this parameter must be
* less than or equal to the number of capture groups in
* the pattern.
* @param dest Mutable UText to receive the matching string data.
* If NULL, a new UText will be created (which may not be mutable).
* @param status A reference to a UErrorCode to receive any errors.
* @return The matching string data. If a pre-allocated UText was provided,
* it will always be used and returned.
*
* @internal ICU 4.4 technology preview
*/
U_INTERNAL UText * U_EXPORT2
uregex_groupUTextDeep(URegularExpression *regexp,
int32_t groupNum,
UText *dest,
UErrorCode *status);
#endif /* U_HIDE_INTERNAL_API */
/**
* Returns the index in the input string of the start of the text matched by the
* specified capture group during the previous match operation. Return -1 if

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2004-2014, International Business Machines
* Copyright (C) 2004-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uregex.cpp
@ -647,7 +647,7 @@ uregex_group(URegularExpression *regexp2,
if (destCapacity == 0 || regexp->fText != NULL) {
// If preflighting or if we already have the text as UChars,
// this is a little cheaper than going through uregex_groupUTextDeep()
// this is a little cheaper than extracting from the UText
//
// Pick up the range of characters from the matcher
@ -680,14 +680,18 @@ uregex_group(URegularExpression *regexp2,
}
return fullLength;
} else {
int32_t result = 0;
UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
if (U_SUCCESS(*status)) {
result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
int64_t start = regexp->fMatcher->start64(groupNum, *status);
int64_t limit = regexp->fMatcher->end64(groupNum, *status);
if (U_FAILURE(*status)) {
return 0;
}
utext_close(groupText);
return result;
// Note edge cases:
// Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
// Zero Length Match: start == end.
int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
return length;
}
}
@ -711,49 +715,6 @@ uregex_groupUText(URegularExpression *regexp2,
return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
}
//------------------------------------------------------------------------------
//
// uregex_groupUTextDeep
//
//------------------------------------------------------------------------------
U_CAPI UText * U_EXPORT2
uregex_groupUTextDeep(URegularExpression *regexp2,
int32_t groupNum,
UText *dest,
UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2;
if (validateRE(regexp, TRUE, status) == FALSE) {
UErrorCode emptyTextStatus = U_ZERO_ERROR;
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
}
if (regexp->fText != NULL) {
//
// Pick up the range of characters from the matcher
// and use our already-extracted characters
//
int32_t startIx = regexp->fMatcher->start(groupNum, *status);
int32_t endIx = regexp->fMatcher->end (groupNum, *status);
if (U_FAILURE(*status)) {
UErrorCode emptyTextStatus = U_ZERO_ERROR;
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
}
if (dest) {
utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
} else {
UText groupText = UTEXT_INITIALIZER;
utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
utext_close(&groupText);
}
return dest;
} else {
return regexp->fMatcher->group(groupNum, dest, *status);
}
}
//------------------------------------------------------------------------------
//
// uregex_start

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2004-2014, International Business Machines Corporation and
* Copyright (c) 2004-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -1754,16 +1754,14 @@ static void TestUTextAPI(void) {
}
/*
* group()
* groupUText()
*/
{
UChar text1[80];
UText *actual;
UBool result;
const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
int64_t groupLen = 0;
UChar groupBuf[20];
u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
@ -1775,58 +1773,38 @@ static void TestUTextAPI(void) {
result = uregex_find(re, 0, &status);
TEST_ASSERT(result==TRUE);
/* Capture Group 0, the full match. Should succeed. */
status = U_ZERO_ERROR;
actual = uregex_groupUTextDeep(re, 0, NULL, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
utext_close(actual);
/* Capture Group 0 with shallow clone API. Should succeed. */
status = U_ZERO_ERROR;
{
int64_t group_len;
int32_t len16;
UErrorCode shallowStatus = U_ZERO_ERROR;
int64_t nativeIndex;
UChar *groupChars;
UText groupText = UTEXT_INITIALIZER;
actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
TEST_ASSERT_SUCCESS(status);
actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
nativeIndex = utext_getNativeIndex(actual);
/* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */
/* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */
len16 = (int32_t)group_len;
groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
utext_close(&groupText);
free(groupChars);
}
TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
utext_close(actual);
/* Capture group #1. Should succeed. */
status = U_ZERO_ERROR;
actual = uregex_groupUTextDeep(re, 1, NULL, &status);
actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_UTEXT(str_interior, actual);
TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
/* (within the string text1) */
TEST_ASSERT(10 == groupLen); /* length of " interior " */
utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
utext_close(actual);
/* Capture group out of range. Error. */
status = U_ZERO_ERROR;
actual = uregex_groupUTextDeep(re, 2, NULL, &status);
actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
TEST_ASSERT(utext_nativeLength(actual) == 0);
utext_close(actual);
uregex_close(re);
}
/*

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2002-2014, International Business Machines Corporation and
* Copyright (c) 2002-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -38,6 +38,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "cmemory.h"
#include "cstring.h"
#include "uinvchar.h"
@ -239,7 +240,12 @@ if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}
#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};}
// expected: const char * , restricted to invariant characters.
// actual: const UnicodeString &
#define REGEX_ASSERT_UNISTR(expected, actual) { \
if (UnicodeString(expected, -1, US_INV) != (actual)) { \
errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s, %s) failed \n", \
__FILE__, __LINE__, expected, extractToAssertBuf(actual));};}
static UBool testUTextEqual(UText *uta, UText *utb) {
@ -2050,47 +2056,72 @@ void RegexTest::API_Match_UTF8() {
utext_close(&destText);
utext_openUnicodeString(&destText, &dest, &status);
result = matcher->group(0, NULL, status);
int64_t length;
result = matcher->group(0, NULL, length, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
utext_close(result);
result = matcher->group(0, &destText, status);
result = matcher->group(0, &destText, length, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(result == &destText);
REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
REGEX_ASSERT(utext_getNativeIndex(result) == 0);
REGEX_ASSERT(length == 10);
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
result = matcher->group(1, NULL, status);
// Capture Group 1 == "234567"
result = matcher->group(1, NULL, length, status);
REGEX_CHECK_STATUS;
const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
REGEX_ASSERT(utext_getNativeIndex(result) == 2);
REGEX_ASSERT(length == 6);
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
utext_close(result);
result = matcher->group(1, &destText, status);
result = matcher->group(1, &destText, length, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(result == &destText);
REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
result = matcher->group(2, NULL, status);
REGEX_CHECK_STATUS;
const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
REGEX_ASSERT_UTEXT_UTF8(str_45, result);
REGEX_ASSERT(utext_getNativeIndex(result) == 2);
REGEX_ASSERT(length == 6);
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
utext_close(result);
result = matcher->group(2, &destText, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(result == &destText);
REGEX_ASSERT_UTEXT_UTF8(str_45, result);
result = matcher->group(3, NULL, status);
// Capture Group 2 == "45"
result = matcher->group(2, NULL, length, status);
REGEX_CHECK_STATUS;
const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
REGEX_ASSERT_UTEXT_UTF8(str_89, result);
REGEX_ASSERT(utext_getNativeIndex(result) == 4);
REGEX_ASSERT(length == 2);
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
utext_close(result);
result = matcher->group(3, &destText, status);
result = matcher->group(2, &destText, length, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(result == &destText);
REGEX_ASSERT_UTEXT_UTF8(str_89, result);
REGEX_ASSERT(utext_getNativeIndex(result) == 4);
REGEX_ASSERT(length == 2);
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
utext_close(result);
// Capture Group 3 == "89"
result = matcher->group(3, NULL, length, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(utext_getNativeIndex(result) == 8);
REGEX_ASSERT(length == 2);
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
utext_close(result);
result = matcher->group(3, &destText, length, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(result == &destText);
REGEX_ASSERT(utext_getNativeIndex(result) == 8);
REGEX_ASSERT(length == 2);
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
utext_close(result);
// Capture Group number out of range.
status = U_ZERO_ERROR;
REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
status = U_ZERO_ERROR;
REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
status = U_ZERO_ERROR;
matcher->reset();
REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
@ -3068,6 +3099,37 @@ void RegexTest::API_Pattern_UTF8() {
delete pat1;
//
// split of a UText based string, with library allocating output UTexts.
//
{
status = U_ZERO_ERROR;
RegexMatcher matcher(UnicodeString("(:)"), 0, status);
UnicodeString stringToSplit("first:second:third");
UText *textToSplit = utext_openUnicodeString(NULL, &stringToSplit, &status);
REGEX_CHECK_STATUS;
UText *splits[10] = {NULL};
int32_t numFields = matcher.split(textToSplit, splits, UPRV_LENGTHOF(splits), status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(numFields == 5);
REGEX_ASSERT_UTEXT_INVARIANT("first", splits[0]);
REGEX_ASSERT_UTEXT_INVARIANT(":", splits[1]);
REGEX_ASSERT_UTEXT_INVARIANT("second", splits[2]);
REGEX_ASSERT_UTEXT_INVARIANT(":", splits[3]);
REGEX_ASSERT_UTEXT_INVARIANT("third", splits[4]);
REGEX_ASSERT(splits[5] == NULL);
for (int i=0; i<UPRV_LENGTHOF(splits); i++) {
if (splits[i]) {
utext_close(splits[i]);
splits[i] = NULL;
}
}
utext_close(textToSplit);
}
//
// RegexPattern::pattern() and patternText()
//
@ -3079,7 +3141,7 @@ void RegexTest::API_Pattern_UTF8() {
regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);
pat1 = RegexPattern::compile(&re1, pe, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*");
REGEX_ASSERT_UNISTR("(Hello, world)*", pat1->pattern());
REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));
delete pat1;
@ -4995,7 +5057,11 @@ void RegexTest::PreAllocatedUTextCAPI () {
UChar text1[80];
UText *actual;
UBool result;
u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2);
int64_t length = 0;
u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
// 012345678901234567890123456789012345678901234567
// 0 1 2 3 4
status = U_ZERO_ERROR;
re = uregex_openC("abc(.*?)def", 0, NULL, &status);
@ -5005,26 +5071,29 @@ void RegexTest::PreAllocatedUTextCAPI () {
result = uregex_find(re, 0, &status);
REGEX_ASSERT(result==TRUE);
/* Capture Group 0, the full match. Should succeed. */
/* Capture Group 0, the full match. Should succeed. "abc interior def" */
status = U_ZERO_ERROR;
actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);
actual = uregex_groupUText(re, 0, &bufferText, &length, &status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(actual == &bufferText);
REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);
REGEX_ASSERT(utext_getNativeIndex(actual) == 6);
REGEX_ASSERT(length == 16);
REGEX_ASSERT(utext_nativeLength(actual) == 47);
/* Capture group #1. Should succeed. */
/* Capture group #1. Should succeed, matching " interior ". */
status = U_ZERO_ERROR;
actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);
actual = uregex_groupUText(re, 1, &bufferText, &length, &status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(actual == &bufferText);
REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);
REGEX_ASSERT(utext_getNativeIndex(actual) == 9); // position of " interior "
REGEX_ASSERT(length == 10);
REGEX_ASSERT(utext_nativeLength(actual) == 47);
/* Capture group out of range. Error. */
status = U_ZERO_ERROR;
actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
actual = uregex_groupUText(re, 2, &bufferText, &length, &status);
REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
REGEX_ASSERT(actual == &bufferText);
uregex_close(re);
}
@ -5037,10 +5106,12 @@ void RegexTest::PreAllocatedUTextCAPI () {
UChar text2[80];
UText replText = UTEXT_INITIALIZER;
UText *result;
status = U_ZERO_ERROR;
utext_openUnicodeString(&bufferText, &buffer, &status);
status = U_ZERO_ERROR;
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)/2);
regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
re = uregex_openC("x(.*?)x", 0, NULL, &status);
@ -5048,7 +5119,9 @@ void RegexTest::PreAllocatedUTextCAPI () {
/* Normal case, with match */
uregex_setText(re, text1, -1, &status);
REGEX_CHECK_STATUS;
utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
REGEX_CHECK_STATUS;
result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(result == &bufferText);