mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-09 07:22:11 +00:00
ICU-11469 Regular Expressions, remove old tech preview functions.
X-SVN-Rev: 36953
This commit is contained in:
parent
069313c959
commit
22c8c94d14
6 changed files with 226 additions and 260 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 2002-2014 International Business Machines Corporation *
|
||||
* Copyright (C) 2002-2015 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
**************************************************************************
|
||||
*/
|
||||
|
@ -1175,97 +1175,32 @@ UText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UE
|
|||
|
||||
UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
|
||||
UnicodeString result;
|
||||
if (U_FAILURE(status)) {
|
||||
int64_t groupStart = start64(groupNum, status);
|
||||
int64_t groupEnd = end64(groupNum, status);
|
||||
if (U_FAILURE(status) || groupStart == -1 || groupStart == groupEnd) {
|
||||
return result;
|
||||
}
|
||||
UText resultText = UTEXT_INITIALIZER;
|
||||
utext_openUnicodeString(&resultText, &result, &status);
|
||||
group(groupNum, &resultText, status);
|
||||
utext_close(&resultText);
|
||||
|
||||
// Get the group length using a utext_extract preflight.
|
||||
// UText is actually pretty efficient at this when underlying encoding is UTF-16.
|
||||
int32_t length = utext_extract(fInputText, groupStart, groupEnd, NULL, 0, &status);
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
return result;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
UChar *buf = result.getBuffer(length);
|
||||
if (buf == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
int32_t extractLength = utext_extract(fInputText, groupStart, groupEnd, buf, length, &status);
|
||||
result.releaseBuffer(extractLength);
|
||||
U_ASSERT(length == extractLength);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// Return deep (mutable) clone
|
||||
// Technology Preview (as an API), but note that the UnicodeString API is implemented
|
||||
// using this function.
|
||||
UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return dest;
|
||||
}
|
||||
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
status = fDeferredStatus;
|
||||
} else if (fMatch == FALSE) {
|
||||
status = U_REGEX_INVALID_STATE;
|
||||
} else if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return dest;
|
||||
}
|
||||
|
||||
int64_t s, e;
|
||||
if (groupNum == 0) {
|
||||
s = fMatchStart;
|
||||
e = fMatchEnd;
|
||||
} else {
|
||||
int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
|
||||
U_ASSERT(groupOffset < fPattern->fFrameSize);
|
||||
U_ASSERT(groupOffset >= 0);
|
||||
s = fFrame->fExtra[groupOffset];
|
||||
e = fFrame->fExtra[groupOffset+1];
|
||||
}
|
||||
|
||||
if (s < 0) {
|
||||
// A capture group wasn't part of the match
|
||||
if (dest) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
|
||||
return dest;
|
||||
} else {
|
||||
return utext_openUChars(NULL, NULL, 0, &status);
|
||||
}
|
||||
}
|
||||
U_ASSERT(s <= e);
|
||||
|
||||
if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
|
||||
U_ASSERT(e <= fInputLength);
|
||||
if (dest) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents+s, (int32_t)(e-s), &status);
|
||||
} else {
|
||||
UText groupText = UTEXT_INITIALIZER;
|
||||
utext_openUChars(&groupText, fInputText->chunkContents+s, e-s, &status);
|
||||
dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
|
||||
utext_close(&groupText);
|
||||
}
|
||||
} else {
|
||||
int32_t len16;
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
len16 = (int32_t)(e-s);
|
||||
} else {
|
||||
UErrorCode lengthStatus = U_ZERO_ERROR;
|
||||
len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus);
|
||||
}
|
||||
UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
|
||||
if (groupChars == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return dest;
|
||||
}
|
||||
utext_extract(fInputText, s, e, groupChars, len16+1, &status);
|
||||
|
||||
if (dest) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), groupChars, len16, &status);
|
||||
} else {
|
||||
UText groupText = UTEXT_INITIALIZER;
|
||||
utext_openUChars(&groupText, groupChars, len16, &status);
|
||||
dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
|
||||
utext_close(&groupText);
|
||||
}
|
||||
|
||||
uprv_free(groupChars);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
|
@ -2001,6 +1936,67 @@ void RegexMatcher::setTrace(UBool state) {
|
|||
|
||||
|
||||
|
||||
/**
|
||||
* UText, replace entire contents of the destination UText with a substring of the source UText.
|
||||
*
|
||||
* @param src The source UText
|
||||
* @param dest The destination UText. Must be writable.
|
||||
* May be NULL, in which case a new UText will be allocated.
|
||||
* @param start Start index of source substring.
|
||||
* @param limit Limit index of source substring.
|
||||
* @param status An error code.
|
||||
*/
|
||||
static UText *utext_extract_replace(UText *src, UText *dest, int64_t start, int64_t limit, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return dest;
|
||||
}
|
||||
if (start == limit) {
|
||||
if (dest) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, status);
|
||||
return dest;
|
||||
} else {
|
||||
return utext_openUChars(NULL, NULL, 0, status);
|
||||
}
|
||||
}
|
||||
int32_t length = utext_extract(src, start, limit, NULL, 0, status);
|
||||
if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) {
|
||||
return dest;
|
||||
}
|
||||
*status = U_ZERO_ERROR;
|
||||
MaybeStackArray<UChar, 40> buffer;
|
||||
if (length >= buffer.getCapacity()) {
|
||||
UChar *newBuf = buffer.resize(length+1); // Leave space for terminating Nul.
|
||||
if (newBuf == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
utext_extract(src, start, limit, buffer.getAlias(), length+1, status);
|
||||
if (dest) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), buffer.getAlias(), length, status);
|
||||
return dest;
|
||||
}
|
||||
|
||||
// Caller did not provide a prexisting UText.
|
||||
// Open a new one, and have it adopt the text buffer storage.
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
int32_t ownedLength = 0;
|
||||
UChar *ownedBuf = buffer.orphanOrClone(length+1, ownedLength);
|
||||
if (ownedBuf == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
UText *result = utext_openUChars(NULL, ownedBuf, length, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
uprv_free(ownedBuf);
|
||||
return NULL;
|
||||
}
|
||||
result->providerProperties |= (1 << UTEXT_PROVIDER_OWNS_TEXT);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
//
|
||||
// split
|
||||
|
@ -2167,7 +2163,8 @@ int32_t RegexMatcher::split(UText *input,
|
|||
break;
|
||||
}
|
||||
i++;
|
||||
dest[i] = group(groupNum, dest[i], status);
|
||||
dest[i] = utext_extract_replace(fInputText, dest[i],
|
||||
start64(groupNum, status), end64(groupNum, status), &status);
|
||||
}
|
||||
|
||||
if (nextOutputStringStart == fActiveLimit) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2014, International Business Machines
|
||||
* Copyright (C) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: regex.h
|
||||
|
@ -896,24 +896,6 @@ public:
|
|||
*/
|
||||
virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* Returns a string containing the text captured by the given group
|
||||
* during the previous match operation. Group(0) is the entire match.
|
||||
*
|
||||
* @param groupNum the capture group number
|
||||
* @param dest A mutable UText in which the matching text is placed.
|
||||
* If NULL, a new UText will be created (which may not be mutable).
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* Possible errors are U_REGEX_INVALID_STATE if no match
|
||||
* has been attempted or the last match failed.
|
||||
* @return A string containing the matched input text. If a pre-allocated UText
|
||||
* was provided, it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
*/
|
||||
virtual UText *group(int32_t groupNum, UText *dest, UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the start of the text matched
|
||||
* during the previous match operation.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2004-2013, International Business Machines
|
||||
* Copyright (C) 2004-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uregex.h
|
||||
|
@ -659,31 +659,6 @@ uregex_groupUText(URegularExpression *regexp,
|
|||
int64_t *groupLength,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Extract the string for the specified matching expression or subexpression.
|
||||
* Group #0 is the complete string of matched text.
|
||||
* Group #1 is the text matched by the first set of capturing parentheses.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param groupNum The capture group to extract. Group 0 is the complete
|
||||
* match. The value of this parameter must be
|
||||
* less than or equal to the number of capture groups in
|
||||
* the pattern.
|
||||
* @param dest Mutable UText to receive the matching string data.
|
||||
* If NULL, a new UText will be created (which may not be mutable).
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return The matching string data. If a pre-allocated UText was provided,
|
||||
* it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
*/
|
||||
U_INTERNAL UText * U_EXPORT2
|
||||
uregex_groupUTextDeep(URegularExpression *regexp,
|
||||
int32_t groupNum,
|
||||
UText *dest,
|
||||
UErrorCode *status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the start of the text matched by the
|
||||
* specified capture group during the previous match operation. Return -1 if
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2004-2014, International Business Machines
|
||||
* Copyright (C) 2004-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uregex.cpp
|
||||
|
@ -647,7 +647,7 @@ uregex_group(URegularExpression *regexp2,
|
|||
|
||||
if (destCapacity == 0 || regexp->fText != NULL) {
|
||||
// If preflighting or if we already have the text as UChars,
|
||||
// this is a little cheaper than going through uregex_groupUTextDeep()
|
||||
// this is a little cheaper than extracting from the UText
|
||||
|
||||
//
|
||||
// Pick up the range of characters from the matcher
|
||||
|
@ -680,14 +680,18 @@ uregex_group(URegularExpression *regexp2,
|
|||
}
|
||||
return fullLength;
|
||||
} else {
|
||||
int32_t result = 0;
|
||||
UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
|
||||
int64_t start = regexp->fMatcher->start64(groupNum, *status);
|
||||
int64_t limit = regexp->fMatcher->end64(groupNum, *status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
utext_close(groupText);
|
||||
return result;
|
||||
// Note edge cases:
|
||||
// Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
|
||||
// Zero Length Match: start == end.
|
||||
int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
|
||||
return length;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -711,49 +715,6 @@ uregex_groupUText(URegularExpression *regexp2,
|
|||
return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_groupUTextDeep
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI UText * U_EXPORT2
|
||||
uregex_groupUTextDeep(URegularExpression *regexp2,
|
||||
int32_t groupNum,
|
||||
UText *dest,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, TRUE, status) == FALSE) {
|
||||
UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
||||
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
||||
}
|
||||
|
||||
if (regexp->fText != NULL) {
|
||||
//
|
||||
// Pick up the range of characters from the matcher
|
||||
// and use our already-extracted characters
|
||||
//
|
||||
int32_t startIx = regexp->fMatcher->start(groupNum, *status);
|
||||
int32_t endIx = regexp->fMatcher->end (groupNum, *status);
|
||||
if (U_FAILURE(*status)) {
|
||||
UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
||||
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
||||
}
|
||||
|
||||
if (dest) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[startIx], endIx - startIx, status);
|
||||
} else {
|
||||
UText groupText = UTEXT_INITIALIZER;
|
||||
utext_openUChars(&groupText, ®exp->fText[startIx], endIx - startIx, status);
|
||||
dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
|
||||
utext_close(&groupText);
|
||||
}
|
||||
|
||||
return dest;
|
||||
} else {
|
||||
return regexp->fMatcher->group(groupNum, dest, *status);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_start
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2004-2014, International Business Machines Corporation and
|
||||
* Copyright (c) 2004-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -1754,16 +1754,14 @@ static void TestUTextAPI(void) {
|
|||
}
|
||||
|
||||
/*
|
||||
* group()
|
||||
* groupUText()
|
||||
*/
|
||||
{
|
||||
UChar text1[80];
|
||||
UText *actual;
|
||||
UBool result;
|
||||
|
||||
const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
|
||||
const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
|
||||
|
||||
int64_t groupLen = 0;
|
||||
UChar groupBuf[20];
|
||||
|
||||
u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
|
||||
|
||||
|
@ -1775,58 +1773,38 @@ static void TestUTextAPI(void) {
|
|||
result = uregex_find(re, 0, &status);
|
||||
TEST_ASSERT(result==TRUE);
|
||||
|
||||
/* Capture Group 0, the full match. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUTextDeep(re, 0, NULL, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
|
||||
utext_close(actual);
|
||||
|
||||
/* Capture Group 0 with shallow clone API. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
{
|
||||
int64_t group_len;
|
||||
int32_t len16;
|
||||
UErrorCode shallowStatus = U_ZERO_ERROR;
|
||||
int64_t nativeIndex;
|
||||
UChar *groupChars;
|
||||
UText groupText = UTEXT_INITIALIZER;
|
||||
actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
|
||||
TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
|
||||
utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
|
||||
|
||||
nativeIndex = utext_getNativeIndex(actual);
|
||||
/* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */
|
||||
/* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */
|
||||
len16 = (int32_t)group_len;
|
||||
|
||||
groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
|
||||
utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
|
||||
|
||||
utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
|
||||
|
||||
TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
|
||||
utext_close(&groupText);
|
||||
free(groupChars);
|
||||
}
|
||||
TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
|
||||
utext_close(actual);
|
||||
|
||||
/* Capture group #1. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUTextDeep(re, 1, NULL, &status);
|
||||
|
||||
actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT_UTEXT(str_interior, actual);
|
||||
TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
|
||||
/* (within the string text1) */
|
||||
TEST_ASSERT(10 == groupLen); /* length of " interior " */
|
||||
utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
|
||||
TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
|
||||
|
||||
utext_close(actual);
|
||||
|
||||
/* Capture group out of range. Error. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUTextDeep(re, 2, NULL, &status);
|
||||
actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
|
||||
TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
TEST_ASSERT(utext_nativeLength(actual) == 0);
|
||||
utext_close(actual);
|
||||
|
||||
uregex_close(re);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2002-2014, International Business Machines Corporation and
|
||||
* Copyright (c) 2002-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -38,6 +38,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uinvchar.h"
|
||||
|
||||
|
@ -239,7 +240,12 @@ if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
|
|||
#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
|
||||
errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}
|
||||
|
||||
#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};}
|
||||
// expected: const char * , restricted to invariant characters.
|
||||
// actual: const UnicodeString &
|
||||
#define REGEX_ASSERT_UNISTR(expected, actual) { \
|
||||
if (UnicodeString(expected, -1, US_INV) != (actual)) { \
|
||||
errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s, %s) failed \n", \
|
||||
__FILE__, __LINE__, expected, extractToAssertBuf(actual));};}
|
||||
|
||||
|
||||
static UBool testUTextEqual(UText *uta, UText *utb) {
|
||||
|
@ -2050,47 +2056,72 @@ void RegexTest::API_Match_UTF8() {
|
|||
utext_close(&destText);
|
||||
utext_openUnicodeString(&destText, &dest, &status);
|
||||
|
||||
result = matcher->group(0, NULL, status);
|
||||
int64_t length;
|
||||
result = matcher->group(0, NULL, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
|
||||
utext_close(result);
|
||||
result = matcher->group(0, &destText, status);
|
||||
result = matcher->group(0, &destText, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(result == &destText);
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
|
||||
REGEX_ASSERT(utext_getNativeIndex(result) == 0);
|
||||
REGEX_ASSERT(length == 10);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
|
||||
|
||||
result = matcher->group(1, NULL, status);
|
||||
// Capture Group 1 == "234567"
|
||||
result = matcher->group(1, NULL, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
|
||||
REGEX_ASSERT(utext_getNativeIndex(result) == 2);
|
||||
REGEX_ASSERT(length == 6);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
|
||||
utext_close(result);
|
||||
result = matcher->group(1, &destText, status);
|
||||
|
||||
result = matcher->group(1, &destText, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(result == &destText);
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
|
||||
|
||||
result = matcher->group(2, NULL, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_45, result);
|
||||
REGEX_ASSERT(utext_getNativeIndex(result) == 2);
|
||||
REGEX_ASSERT(length == 6);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
|
||||
utext_close(result);
|
||||
result = matcher->group(2, &destText, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(result == &destText);
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_45, result);
|
||||
|
||||
result = matcher->group(3, NULL, status);
|
||||
// Capture Group 2 == "45"
|
||||
result = matcher->group(2, NULL, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_89, result);
|
||||
REGEX_ASSERT(utext_getNativeIndex(result) == 4);
|
||||
REGEX_ASSERT(length == 2);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
|
||||
utext_close(result);
|
||||
result = matcher->group(3, &destText, status);
|
||||
|
||||
result = matcher->group(2, &destText, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(result == &destText);
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_89, result);
|
||||
REGEX_ASSERT(utext_getNativeIndex(result) == 4);
|
||||
REGEX_ASSERT(length == 2);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
|
||||
utext_close(result);
|
||||
|
||||
// Capture Group 3 == "89"
|
||||
result = matcher->group(3, NULL, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(utext_getNativeIndex(result) == 8);
|
||||
REGEX_ASSERT(length == 2);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
|
||||
utext_close(result);
|
||||
|
||||
result = matcher->group(3, &destText, length, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(result == &destText);
|
||||
REGEX_ASSERT(utext_getNativeIndex(result) == 8);
|
||||
REGEX_ASSERT(length == 2);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
|
||||
utext_close(result);
|
||||
|
||||
// Capture Group number out of range.
|
||||
status = U_ZERO_ERROR;
|
||||
REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
status = U_ZERO_ERROR;
|
||||
REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
status = U_ZERO_ERROR;
|
||||
matcher->reset();
|
||||
REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
|
||||
|
||||
|
@ -3068,6 +3099,37 @@ void RegexTest::API_Pattern_UTF8() {
|
|||
delete pat1;
|
||||
|
||||
|
||||
//
|
||||
// split of a UText based string, with library allocating output UTexts.
|
||||
//
|
||||
{
|
||||
status = U_ZERO_ERROR;
|
||||
RegexMatcher matcher(UnicodeString("(:)"), 0, status);
|
||||
UnicodeString stringToSplit("first:second:third");
|
||||
UText *textToSplit = utext_openUnicodeString(NULL, &stringToSplit, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
|
||||
UText *splits[10] = {NULL};
|
||||
int32_t numFields = matcher.split(textToSplit, splits, UPRV_LENGTHOF(splits), status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(numFields == 5);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("first", splits[0]);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT(":", splits[1]);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("second", splits[2]);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT(":", splits[3]);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("third", splits[4]);
|
||||
REGEX_ASSERT(splits[5] == NULL);
|
||||
|
||||
for (int i=0; i<UPRV_LENGTHOF(splits); i++) {
|
||||
if (splits[i]) {
|
||||
utext_close(splits[i]);
|
||||
splits[i] = NULL;
|
||||
}
|
||||
}
|
||||
utext_close(textToSplit);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// RegexPattern::pattern() and patternText()
|
||||
//
|
||||
|
@ -3079,7 +3141,7 @@ void RegexTest::API_Pattern_UTF8() {
|
|||
regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);
|
||||
pat1 = RegexPattern::compile(&re1, pe, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*");
|
||||
REGEX_ASSERT_UNISTR("(Hello, world)*", pat1->pattern());
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));
|
||||
delete pat1;
|
||||
|
||||
|
@ -4995,7 +5057,11 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
|||
UChar text1[80];
|
||||
UText *actual;
|
||||
UBool result;
|
||||
u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2);
|
||||
int64_t length = 0;
|
||||
|
||||
u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
|
||||
// 012345678901234567890123456789012345678901234567
|
||||
// 0 1 2 3 4
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_openC("abc(.*?)def", 0, NULL, &status);
|
||||
|
@ -5005,26 +5071,29 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
|||
result = uregex_find(re, 0, &status);
|
||||
REGEX_ASSERT(result==TRUE);
|
||||
|
||||
/* Capture Group 0, the full match. Should succeed. */
|
||||
/* Capture Group 0, the full match. Should succeed. "abc interior def" */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);
|
||||
actual = uregex_groupUText(re, 0, &bufferText, &length, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(actual == &bufferText);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);
|
||||
REGEX_ASSERT(utext_getNativeIndex(actual) == 6);
|
||||
REGEX_ASSERT(length == 16);
|
||||
REGEX_ASSERT(utext_nativeLength(actual) == 47);
|
||||
|
||||
/* Capture group #1. Should succeed. */
|
||||
/* Capture group #1. Should succeed, matching " interior ". */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);
|
||||
actual = uregex_groupUText(re, 1, &bufferText, &length, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(actual == &bufferText);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);
|
||||
REGEX_ASSERT(utext_getNativeIndex(actual) == 9); // position of " interior "
|
||||
REGEX_ASSERT(length == 10);
|
||||
REGEX_ASSERT(utext_nativeLength(actual) == 47);
|
||||
|
||||
/* Capture group out of range. Error. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
|
||||
actual = uregex_groupUText(re, 2, &bufferText, &length, &status);
|
||||
REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
REGEX_ASSERT(actual == &bufferText);
|
||||
|
||||
uregex_close(re);
|
||||
|
||||
}
|
||||
|
@ -5037,10 +5106,12 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
|||
UChar text2[80];
|
||||
UText replText = UTEXT_INITIALIZER;
|
||||
UText *result;
|
||||
status = U_ZERO_ERROR;
|
||||
utext_openUnicodeString(&bufferText, &buffer, &status);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
|
||||
u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
|
||||
u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
|
||||
u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)/2);
|
||||
regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
|
||||
|
||||
re = uregex_openC("x(.*?)x", 0, NULL, &status);
|
||||
|
@ -5048,7 +5119,9 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
|||
|
||||
/* Normal case, with match */
|
||||
uregex_setText(re, text1, -1, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(result == &bufferText);
|
||||
|
|
Loading…
Add table
Reference in a new issue