mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-7813 64bit regex API, 7675: UText-based Regex to use native indexes, 7764: Improved UText-regex API error handling, 7855: UText regex group API returns shallow clone, 7851: Set region and start position, 7763: Inline regex progress callback function.
X-SVN-Rev: 28647
This commit is contained in:
parent
efa8bfba9e
commit
751473d25a
7 changed files with 681 additions and 319 deletions
|
@ -471,7 +471,7 @@ UnicodeString &RegexMatcher::appendTail(UnicodeString &dest) {
|
|||
utext_openUnicodeString(&resultText, &dest, &status);
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
appendTail(&resultText);
|
||||
appendTail(&resultText, status);
|
||||
utext_close(&resultText);
|
||||
}
|
||||
|
||||
|
@ -481,9 +481,25 @@ UnicodeString &RegexMatcher::appendTail(UnicodeString &dest) {
|
|||
//
|
||||
// appendTail, UText mode
|
||||
//
|
||||
UText *RegexMatcher::appendTail(UText *dest) {
|
||||
UText *RegexMatcher::appendTail(UText *dest, UErrorCode &status) {
|
||||
UBool bailOut = FALSE;
|
||||
if (U_FAILURE(status)) {
|
||||
bailOut = TRUE;
|
||||
}
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
status = fDeferredStatus;
|
||||
bailOut = TRUE;
|
||||
}
|
||||
|
||||
if (bailOut) {
|
||||
// dest must not be NULL
|
||||
if (dest) {
|
||||
utext_replace(dest, utext_nativeLength(dest), utext_nativeLength(dest), NULL, 0, &status);
|
||||
return dest;
|
||||
}
|
||||
}
|
||||
|
||||
if (fInputLength > fAppendPosition) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
|
||||
int64_t destLen = utext_nativeLength(dest);
|
||||
utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition,
|
||||
|
@ -522,9 +538,11 @@ int32_t RegexMatcher::end(UErrorCode &err) const {
|
|||
return end(0, err);
|
||||
}
|
||||
|
||||
int64_t RegexMatcher::end64(UErrorCode &err) const {
|
||||
return end64(0, err);
|
||||
}
|
||||
|
||||
|
||||
int32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
|
||||
int64_t RegexMatcher::end64(int32_t group, UErrorCode &err) const {
|
||||
if (U_FAILURE(err)) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -548,13 +566,11 @@ int32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
|
|||
e = fFrame->fExtra[groupOffset + 1];
|
||||
}
|
||||
|
||||
if (e == -1 || UTEXT_USES_U16(fInputText)) {
|
||||
return (int32_t)e;
|
||||
} else {
|
||||
// !!!: Would like a better way to do this!
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
return utext_extract(fInputText, 0, e, NULL, 0, &status);
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
int32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
|
||||
return (int32_t)end64(group, err);
|
||||
}
|
||||
|
||||
|
||||
|
@ -650,7 +666,7 @@ UBool RegexMatcher::find() {
|
|||
// Note that it's perfectly OK for a pattern to have a zero-length
|
||||
// match at the end of a string, so we must make sure that the loop
|
||||
// runs with startPos == testStartLimit the last time through.
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
U_ASSERT(FALSE);
|
||||
|
@ -698,7 +714,7 @@ UBool RegexMatcher::find() {
|
|||
return FALSE;
|
||||
}
|
||||
startPos = pos;
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -731,7 +747,7 @@ UBool RegexMatcher::find() {
|
|||
return FALSE;
|
||||
}
|
||||
startPos = pos;
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -779,7 +795,7 @@ UBool RegexMatcher::find() {
|
|||
// Note that it's perfectly OK for a pattern to have a zero-length
|
||||
// match at the end of a string, so we must make sure that the loop
|
||||
// runs with startPos == testStartLimit the last time through.
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
} else {
|
||||
|
@ -809,7 +825,7 @@ UBool RegexMatcher::find() {
|
|||
// Note that it's perfectly OK for a pattern to have a zero-length
|
||||
// match at the end of a string, so we must make sure that the loop
|
||||
// runs with startPos == testStartLimit the last time through.
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -825,7 +841,7 @@ UBool RegexMatcher::find() {
|
|||
|
||||
|
||||
|
||||
UBool RegexMatcher::find(int32_t start, UErrorCode &status) {
|
||||
UBool RegexMatcher::find(int64_t start, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -840,25 +856,8 @@ UBool RegexMatcher::find(int32_t start, UErrorCode &status) {
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
UBool couldFindStart = TRUE;
|
||||
int64_t nativeStart;
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
nativeStart = start;
|
||||
} else {
|
||||
UTEXT_SETNATIVEINDEX(fInputText, 0);
|
||||
int32_t i = 0;
|
||||
while (i < start) {
|
||||
UChar32 c = UTEXT_NEXT32(fInputText);
|
||||
if (c != U_SENTINEL) {
|
||||
i += U16_LENGTH(c);
|
||||
} else {
|
||||
couldFindStart = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nativeStart = UTEXT_GETNATIVEINDEX(fInputText);
|
||||
}
|
||||
if (!couldFindStart || nativeStart < fActiveStart || nativeStart > fActiveLimit) {
|
||||
int64_t nativeStart = start;
|
||||
if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -944,7 +943,7 @@ UBool RegexMatcher::findUsingChunk() {
|
|||
// Note that it's perfectly OK for a pattern to have a zero-length
|
||||
// match at the end of a string, so we must make sure that the loop
|
||||
// runs with startPos == testLen the last time through.
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
U_ASSERT(FALSE);
|
||||
|
@ -985,7 +984,7 @@ UBool RegexMatcher::findUsingChunk() {
|
|||
fHitEnd = TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -1014,7 +1013,7 @@ UBool RegexMatcher::findUsingChunk() {
|
|||
fHitEnd = TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -1055,7 +1054,7 @@ UBool RegexMatcher::findUsingChunk() {
|
|||
// Note that it's perfectly OK for a pattern to have a zero-length
|
||||
// match at the end of a string, so we must make sure that the loop
|
||||
// runs with startPos == testLen the last time through.
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
} else {
|
||||
|
@ -1083,7 +1082,7 @@ UBool RegexMatcher::findUsingChunk() {
|
|||
// Note that it's perfectly OK for a pattern to have a zero-length
|
||||
// match at the end of a string, so we must make sure that the loop
|
||||
// runs with startPos == testLen the last time through.
|
||||
if (ReportFindProgress(startPos, fDeferredStatus) == FALSE)
|
||||
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -1108,11 +1107,59 @@ UnicodeString RegexMatcher::group(UErrorCode &status) const {
|
|||
return group(0, status);
|
||||
}
|
||||
|
||||
UText *RegexMatcher::group(UText *dest, MatcherDestIsUTextFlag /*flag*/, UErrorCode &status) const {
|
||||
return group(0, dest, status);
|
||||
// Return immutable shallow clone
|
||||
UText *RegexMatcher::group(UText *dest, int64_t &group_len, UErrorCode &status) const {
|
||||
return group(0, dest, group_len, status);
|
||||
}
|
||||
|
||||
// Return immutable shallow clone
|
||||
UText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const {
|
||||
group_len = 0;
|
||||
UBool bailOut = FALSE;
|
||||
if (U_FAILURE(status)) {
|
||||
return dest;
|
||||
}
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
status = fDeferredStatus;
|
||||
bailOut = TRUE;
|
||||
}
|
||||
if (fMatch == FALSE) {
|
||||
status = U_REGEX_INVALID_STATE;
|
||||
bailOut = TRUE;
|
||||
}
|
||||
if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
bailOut = TRUE;
|
||||
}
|
||||
|
||||
if (bailOut) {
|
||||
return (dest) ? dest : utext_openUChars(NULL, NULL, 0, &status);
|
||||
}
|
||||
|
||||
int64_t s, e;
|
||||
if (groupNum == 0) {
|
||||
s = fMatchStart;
|
||||
e = fMatchEnd;
|
||||
} else {
|
||||
int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
|
||||
U_ASSERT(groupOffset < fPattern->fFrameSize);
|
||||
U_ASSERT(groupOffset >= 0);
|
||||
s = fFrame->fExtra[groupOffset];
|
||||
e = fFrame->fExtra[groupOffset+1];
|
||||
}
|
||||
|
||||
if (s < 0) {
|
||||
// A capture group wasn't part of the match
|
||||
return utext_clone(dest, fInputText, FALSE, TRUE, &status);
|
||||
}
|
||||
U_ASSERT(s <= e);
|
||||
group_len = e - s;
|
||||
|
||||
dest = utext_clone(dest, fInputText, FALSE, TRUE, &status);
|
||||
if (dest)
|
||||
UTEXT_SETNATIVEINDEX(dest, s);
|
||||
return dest;
|
||||
}
|
||||
|
||||
UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
|
||||
UnicodeString result;
|
||||
|
@ -1127,6 +1174,9 @@ UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
|
|||
}
|
||||
|
||||
|
||||
// Return deep (mutable) clone
|
||||
// Technology Preview (as an API), but note that the UnicodeString API is implemented
|
||||
// using this function.
|
||||
UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const {
|
||||
UBool bailOut = FALSE;
|
||||
if (U_FAILURE(status)) {
|
||||
|
@ -1372,8 +1422,25 @@ UText *RegexMatcher::inputText() const {
|
|||
// getInput() -- like inputText(), but makes a clone or copies into another UText
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
UText *RegexMatcher::getInput (UText *dest) const {
|
||||
UErrorCode status = U_ZERO_ERROR; // ignored
|
||||
UText *RegexMatcher::getInput (UText *dest, UErrorCode &status) const {
|
||||
UBool bailOut = FALSE;
|
||||
if (U_FAILURE(status)) {
|
||||
return dest;
|
||||
}
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
status = fDeferredStatus;
|
||||
bailOut = TRUE;
|
||||
}
|
||||
|
||||
if (bailOut) {
|
||||
if (dest) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
|
||||
return dest;
|
||||
} else {
|
||||
return utext_clone(NULL, fInputText, FALSE, TRUE, &status);
|
||||
}
|
||||
}
|
||||
|
||||
if (dest) {
|
||||
if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
|
||||
utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents, (int32_t)fInputLength, &status);
|
||||
|
@ -1462,7 +1529,7 @@ UBool RegexMatcher::lookingAt(UErrorCode &status) {
|
|||
}
|
||||
|
||||
|
||||
UBool RegexMatcher::lookingAt(int32_t start, UErrorCode &status) {
|
||||
UBool RegexMatcher::lookingAt(int64_t start, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -1485,24 +1552,8 @@ UBool RegexMatcher::lookingAt(int32_t start, UErrorCode &status) {
|
|||
}
|
||||
|
||||
int64_t nativeStart;
|
||||
UBool couldFindStart = TRUE;
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
nativeStart = start;
|
||||
} else {
|
||||
UTEXT_SETNATIVEINDEX(fInputText, 0);
|
||||
int32_t i = 0;
|
||||
while (i < start) {
|
||||
UChar32 c = UTEXT_NEXT32(fInputText);
|
||||
if (c != U_SENTINEL) {
|
||||
i += U16_LENGTH(c);
|
||||
} else {
|
||||
couldFindStart = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nativeStart = UTEXT_GETNATIVEINDEX(fInputText);
|
||||
}
|
||||
if (!couldFindStart || nativeStart < fActiveStart || nativeStart > fActiveLimit) {
|
||||
nativeStart = start;
|
||||
if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -1550,7 +1601,7 @@ UBool RegexMatcher::matches(UErrorCode &status) {
|
|||
}
|
||||
|
||||
|
||||
UBool RegexMatcher::matches(int32_t start, UErrorCode &status) {
|
||||
UBool RegexMatcher::matches(int64_t start, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -1573,24 +1624,8 @@ UBool RegexMatcher::matches(int32_t start, UErrorCode &status) {
|
|||
}
|
||||
|
||||
int64_t nativeStart;
|
||||
UBool couldFindStart = TRUE;
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
nativeStart = start;
|
||||
} else {
|
||||
UTEXT_SETNATIVEINDEX(fInputText, 0);
|
||||
int32_t i = 0;
|
||||
while (i < start) {
|
||||
UChar32 c = UTEXT_NEXT32(fInputText);
|
||||
if (c != U_SENTINEL) {
|
||||
i += U16_LENGTH(c);
|
||||
} else {
|
||||
couldFindStart = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nativeStart = UTEXT_GETNATIVEINDEX(fInputText);
|
||||
}
|
||||
if (!couldFindStart || nativeStart < fActiveStart || nativeStart > fActiveLimit) {
|
||||
nativeStart = start;
|
||||
if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -1621,65 +1656,38 @@ const RegexPattern &RegexMatcher::pattern() const {
|
|||
// region
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
RegexMatcher &RegexMatcher::region(int32_t start, int32_t limit, UErrorCode &status) {
|
||||
RegexMatcher &RegexMatcher::region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
if (start>limit || start<0 || limit<0) {
|
||||
if (regionStart>regionLimit || regionStart<0 || regionLimit<0) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
int64_t nativeStart;
|
||||
int32_t i = 0;
|
||||
UBool couldFindStart = TRUE;
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
nativeStart = start;
|
||||
couldFindStart = (nativeStart <= fInputLength);
|
||||
} else {
|
||||
UTEXT_SETNATIVEINDEX(fInputText, 0);
|
||||
while (i < start) {
|
||||
UChar32 c = UTEXT_NEXT32(fInputText);
|
||||
if (c != U_SENTINEL) {
|
||||
i += U16_LENGTH(c);
|
||||
} else {
|
||||
couldFindStart = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nativeStart = UTEXT_GETNATIVEINDEX(fInputText);
|
||||
}
|
||||
int64_t nativeLimit = nativeStart;
|
||||
|
||||
if (!couldFindStart) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
UBool couldFindLimit = TRUE;
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
nativeLimit = limit;
|
||||
couldFindLimit = (nativeLimit <= fInputLength);
|
||||
} else {
|
||||
while (i < limit) {
|
||||
UChar32 c = UTEXT_NEXT32(fInputText);
|
||||
if (c != U_SENTINEL) {
|
||||
i += U16_LENGTH(c);
|
||||
} else {
|
||||
couldFindLimit = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nativeLimit = UTEXT_GETNATIVEINDEX(fInputText);
|
||||
}
|
||||
if (!couldFindLimit) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
int64_t nativeStart = regionStart;
|
||||
int64_t nativeLimit = regionLimit;
|
||||
if (nativeStart > fInputLength || nativeLimit > fInputLength) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
this->reset();
|
||||
if (startIndex == -1)
|
||||
this->reset();
|
||||
else
|
||||
resetPreserveRegion();
|
||||
|
||||
fRegionStart = nativeStart;
|
||||
fRegionLimit = nativeLimit;
|
||||
fActiveStart = nativeStart;
|
||||
fActiveLimit = nativeLimit;
|
||||
|
||||
if (startIndex != -1) {
|
||||
if (startIndex < fActiveStart || startIndex > fActiveLimit) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
fMatchEnd = startIndex;
|
||||
}
|
||||
|
||||
if (!fTransparentBounds) {
|
||||
fLookStart = nativeStart;
|
||||
fLookLimit = nativeLimit;
|
||||
|
@ -1691,7 +1699,9 @@ RegexMatcher &RegexMatcher::region(int32_t start, int32_t limit, UErrorCode &sta
|
|||
return *this;
|
||||
}
|
||||
|
||||
|
||||
RegexMatcher &RegexMatcher::region(int64_t start, int64_t limit, UErrorCode &status) {
|
||||
return region(start, limit, -1, status);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
|
@ -1699,15 +1709,12 @@ RegexMatcher &RegexMatcher::region(int32_t start, int32_t limit, UErrorCode &sta
|
|||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
int32_t RegexMatcher::regionEnd() const {
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
return (int32_t)fRegionLimit;
|
||||
} else {
|
||||
// !!!: Would like a better way to do this!
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
return utext_extract(fInputText, 0, fRegionLimit, NULL, 0, &status);
|
||||
}
|
||||
return (int32_t)fRegionLimit;
|
||||
}
|
||||
|
||||
int64_t RegexMatcher::regionEnd64() const {
|
||||
return fRegionLimit;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
|
@ -1715,13 +1722,11 @@ int32_t RegexMatcher::regionEnd() const {
|
|||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
int32_t RegexMatcher::regionStart() const {
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
return (int32_t)fRegionStart;
|
||||
} else {
|
||||
// !!!: Would like a better way to do this!
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
return utext_extract(fInputText, 0, fRegionStart, NULL, 0, &status);
|
||||
}
|
||||
return (int32_t)fRegionStart;
|
||||
}
|
||||
|
||||
int64_t RegexMatcher::regionStart64() const {
|
||||
return fRegionStart;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1779,7 +1784,7 @@ UText *RegexMatcher::replaceAll(UText *replacement, UText *dest, UErrorCode &sta
|
|||
break;
|
||||
}
|
||||
}
|
||||
appendTail(dest);
|
||||
appendTail(dest, status);
|
||||
}
|
||||
|
||||
return dest;
|
||||
|
@ -1821,7 +1826,7 @@ UText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &s
|
|||
|
||||
reset();
|
||||
if (!find()) {
|
||||
return getInput(dest);
|
||||
return getInput(dest, status);
|
||||
}
|
||||
|
||||
if (dest == NULL) {
|
||||
|
@ -1834,7 +1839,7 @@ UText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &s
|
|||
}
|
||||
|
||||
appendReplacement(dest, replacement, status);
|
||||
appendTail(dest);
|
||||
appendTail(dest, status);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
@ -1936,7 +1941,7 @@ RegexMatcher &RegexMatcher::reset(UText *input) {
|
|||
return *this;
|
||||
}*/
|
||||
|
||||
RegexMatcher &RegexMatcher::reset(int32_t position, UErrorCode &status) {
|
||||
RegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
|
@ -1948,24 +1953,8 @@ RegexMatcher &RegexMatcher::reset(int32_t position, UErrorCode &status) {
|
|||
}
|
||||
|
||||
int64_t nativePos;
|
||||
UBool couldFindStart = TRUE;
|
||||
if (UTEXT_USES_U16(fInputText)) {
|
||||
nativePos = position;
|
||||
} else {
|
||||
UTEXT_SETNATIVEINDEX(fInputText, 0);
|
||||
int32_t i = 0;
|
||||
while (i < position) {
|
||||
UChar32 c = UTEXT_NEXT32(fInputText);
|
||||
if (c != U_SENTINEL) {
|
||||
i += U16_LENGTH(c);
|
||||
} else {
|
||||
couldFindStart = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nativePos = UTEXT_GETNATIVEINDEX(fInputText);
|
||||
}
|
||||
if (!couldFindStart || nativePos < fActiveStart || nativePos >= fActiveLimit) {
|
||||
nativePos = position;
|
||||
if (nativePos < fActiveStart || nativePos >= fActiveLimit) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return *this;
|
||||
}
|
||||
|
@ -2224,15 +2213,17 @@ int32_t RegexMatcher::start(UErrorCode &status) const {
|
|||
return start(0, status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int64_t RegexMatcher::start64(UErrorCode &status) const {
|
||||
return start64(0, status);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// start(int32_t group, UErrorCode &status)
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
int32_t RegexMatcher::start(int32_t group, UErrorCode &status) const {
|
||||
|
||||
int64_t RegexMatcher::start64(int32_t group, UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -2258,16 +2249,13 @@ int32_t RegexMatcher::start(int32_t group, UErrorCode &status) const {
|
|||
s = fFrame->fExtra[groupOffset];
|
||||
}
|
||||
|
||||
if (s == -1 || UTEXT_USES_U16(fInputText)) {
|
||||
return (int32_t)s;
|
||||
} else {
|
||||
// !!!: Would like a better way to do this!
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
return utext_extract(fInputText, 0, s, NULL, 0, &status);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
int32_t RegexMatcher::start(int32_t group, UErrorCode &status) const {
|
||||
return (int32_t)start64(group, status);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
@ -581,11 +581,13 @@ UnicodeString RegexPattern::pattern() const {
|
|||
// patternText
|
||||
//
|
||||
//---------------------------------------------------------------------
|
||||
UText *RegexPattern::patternText() const {
|
||||
UText *RegexPattern::patternText(UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) {return NULL;}
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
if (fPattern != NULL) {
|
||||
return fPattern;
|
||||
} else {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RegexStaticSets::initGlobals(&status);
|
||||
return RegexStaticSets::gStaticSets->fEmptyText;
|
||||
}
|
||||
|
|
|
@ -214,7 +214,7 @@ public:
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return A regexPattern object for the compiled pattern.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
static RegexPattern * U_EXPORT2 compile( UText *regex,
|
||||
UParseError &pe,
|
||||
|
@ -274,7 +274,7 @@ public:
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return A regexPattern object for the compiled pattern.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
static RegexPattern * U_EXPORT2 compile( UText *regex,
|
||||
uint32_t flags,
|
||||
|
@ -331,7 +331,7 @@ public:
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return A regexPattern object for the compiled pattern.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
static RegexPattern * U_EXPORT2 compile( UText *regex,
|
||||
uint32_t flags,
|
||||
|
@ -368,7 +368,7 @@ public:
|
|||
|
||||
/**
|
||||
* Flag to disambiguate RegexPattern::matcher signature
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
enum PatternIsUTextFlag { PATTERN_IS_UTEXT };
|
||||
|
||||
|
@ -389,7 +389,7 @@ public:
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return A RegexMatcher object for this pattern and input.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual RegexMatcher *matcher(UText *input,
|
||||
PatternIsUTextFlag flag,
|
||||
|
@ -460,7 +460,7 @@ public:
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return True if the regular expression exactly matches the full input string.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
static UBool U_EXPORT2 matches(UText *regex,
|
||||
UText *input,
|
||||
|
@ -487,9 +487,9 @@ public:
|
|||
* UText, and that UText was modified, the returned UText may no longer reflect the RegexPattern
|
||||
* object.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual UText *patternText() const;
|
||||
virtual UText *patternText(UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -546,7 +546,7 @@ public:
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return The number of fields into which the input string was split.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual int32_t split(UText *input,
|
||||
UText *dest[],
|
||||
|
@ -677,7 +677,7 @@ public:
|
|||
* @see UREGEX_CASE_INSENSITIVE
|
||||
* @param status Any errors are reported by setting this UErrorCode variable.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
|
||||
|
||||
|
@ -724,7 +724,7 @@ public:
|
|||
* @see UREGEX_CASE_INSENSITIVE
|
||||
* @param status Any errors are reported by setting this UErrorCode variable.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
RegexMatcher(UText *regexp, UText *input,
|
||||
uint32_t flags, UErrorCode &status);
|
||||
|
@ -770,12 +770,12 @@ public:
|
|||
* at the specified startIndex, and extending to the end of the input.
|
||||
* The input region is reset to include the entire input string.
|
||||
* A successful match must extend to the end of the input.
|
||||
* @param startIndex The input string index at which to begin matching.
|
||||
* @param startIndex The input string (native) index at which to begin matching.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return TRUE if there is a match
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual UBool matches(int32_t startIndex, UErrorCode &status);
|
||||
virtual UBool matches(int64_t startIndex, UErrorCode &status);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -802,12 +802,12 @@ public:
|
|||
* <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
|
||||
* <code>end()</code>, and <code>group()</code> functions.</p>
|
||||
*
|
||||
* @param startIndex The input string index at which to begin matching.
|
||||
* @param startIndex The input string (native) index at which to begin matching.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return TRUE if there is a match.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual UBool lookingAt(int32_t startIndex, UErrorCode &status);
|
||||
virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -829,12 +829,12 @@ public:
|
|||
* Resets this RegexMatcher and then attempts to find the next substring of the
|
||||
* input string that matches the pattern, starting at the specified index.
|
||||
*
|
||||
* @param start the position in the input string to begin the search
|
||||
* @param start The (native) index in the input string to begin the search.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return TRUE if a match is found.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UBool find(int32_t start, UErrorCode &status);
|
||||
virtual UBool find(int64_t start, UErrorCode &status);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -849,30 +849,6 @@ public:
|
|||
virtual UnicodeString group(UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Flag to disambiguate RegexMatcher::group signature
|
||||
* @internal ICU 4.4 technology preview
|
||||
*/
|
||||
enum MatcherDestIsUTextFlag { MATCHER_DEST_IS_UTEXT };
|
||||
|
||||
/**
|
||||
* Returns a string containing the text matched by the previous match.
|
||||
* If the pattern can match an empty string, an empty string may be returned.
|
||||
* @param dest A mutable UText in which the matching text is placed.
|
||||
* If NULL, a new UText will be created (which may not be mutable).
|
||||
* @param flag Must be RegexMatcher::MATCHER_DEST_IS_UTEXT; used to
|
||||
* disambiguate method signature.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* Possible errors are U_REGEX_INVALID_STATE if no match
|
||||
* has been attempted or the last match failed.
|
||||
* @return A string containing the matched input text. If a pre-allocated UText
|
||||
* was provided, it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
*/
|
||||
virtual UText *group(UText *dest, MatcherDestIsUTextFlag flag, UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns a string containing the text captured by the given group
|
||||
* during the previous match operation. Group(0) is the entire match.
|
||||
|
@ -888,6 +864,31 @@ public:
|
|||
virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of capturing groups in this matcher's pattern.
|
||||
* @return the number of capture groups
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t groupCount() const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns a shallow clone of the entire live input string with the UText current native index
|
||||
* set to the beginning of the requested group.
|
||||
* Note that copying the entire input string may cause significant performance and memory issues.
|
||||
* @param dest The UText into which the input should be copied, or NULL to create a new UText
|
||||
* @param group_len A reference to receive the length of the desired capture group
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* Possible errors are U_REGEX_INVALID_STATE if no match
|
||||
* has been attempted or the last match failed and
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
|
||||
* @return dest if non-NULL, a shallow copy of the input text otherwise
|
||||
*
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
|
||||
virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* Returns a string containing the text captured by the given group
|
||||
* during the previous match operation. Group(0) is the entire match.
|
||||
|
@ -906,23 +907,20 @@ public:
|
|||
virtual UText *group(int32_t groupNum, UText *dest, UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of capturing groups in this matcher's pattern.
|
||||
* @return the number of capture groups
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t groupCount() const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the start of the text matched
|
||||
* during the previous match operation.
|
||||
* @param status a reference to a UErrorCode to receive any errors.
|
||||
* @return The position in the input string of the start of the last match.
|
||||
* @return The (native) position in the input string of the start of the last match.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t start(UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual int64_t start64(UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the start of the text matched by the
|
||||
|
@ -934,11 +932,16 @@ public:
|
|||
* errors are U_REGEX_INVALID_STATE if no match has been
|
||||
* attempted or the last match failed, and
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
|
||||
* @return the start position of substring matched by the specified group.
|
||||
* @return the (native) start position of substring matched by the specified group.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t start(int32_t group, UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual int64_t start64(int32_t group, UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the first character following the
|
||||
|
@ -947,10 +950,18 @@ public:
|
|||
* errors are U_REGEX_INVALID_STATE if no match has been
|
||||
* attempted or the last match failed.
|
||||
* @return the index of the last character matched, plus one.
|
||||
* The index value returned is a native index, corresponding to
|
||||
* code units for the underlying encoding type, for example,
|
||||
* a byte index for UTF8.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t end(UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual int64_t end64(UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the character following the
|
||||
|
@ -963,10 +974,18 @@ public:
|
|||
* @return the index of the first character following the text
|
||||
* captured by the specifed group during the previous match operation.
|
||||
* Return -1 if the capture group exists in the pattern but was not part of the match.
|
||||
* The index value returned is a native index, corresponding to
|
||||
* code units for the underlying encoding type, for example,
|
||||
* a byte index for UTF8.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t end(int32_t group, UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual int64_t end64(int32_t group, UErrorCode &status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Resets this matcher. The effect is to remove any memory of previous matches,
|
||||
|
@ -983,7 +1002,7 @@ public:
|
|||
* Resets this matcher, and set the current input position.
|
||||
* The effect is to remove any memory of previous matches,
|
||||
* and to cause subsequent find() operations to begin at
|
||||
* the specified position in the input string.
|
||||
* the specified (native) position in the input string.
|
||||
* <p>
|
||||
* The matcher's region is reset to its default, which is the entire
|
||||
* input string.
|
||||
|
@ -994,7 +1013,7 @@ public:
|
|||
* @return this RegexMatcher.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual RegexMatcher &reset(int32_t index, UErrorCode &status);
|
||||
virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -1028,7 +1047,7 @@ public:
|
|||
* until after regexp operations on it are done.
|
||||
* @return this RegexMatcher.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual RegexMatcher &reset(UText *input);
|
||||
|
||||
|
@ -1064,7 +1083,7 @@ public:
|
|||
* a UnicodeString.
|
||||
* @return the input text
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual UText *inputText() const;
|
||||
|
||||
|
@ -1075,9 +1094,9 @@ public:
|
|||
* @param dest The UText into which the input should be copied, or NULL to create a new UText
|
||||
* @return dest if non-NULL, a shallow copy of the input text otherwise
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual UText *getInput(UText *dest) const;
|
||||
virtual UText *getInput(UText *dest, UErrorCode &status) const;
|
||||
|
||||
|
||||
/** Sets the limits of this matcher's region.
|
||||
|
@ -1093,35 +1112,55 @@ public:
|
|||
* The function will fail if start is greater than limit, or if either index
|
||||
* is less than zero or greater than the length of the string being matched.
|
||||
*
|
||||
* @param start The index to begin searches at.
|
||||
* @param start The (native) index to begin searches at.
|
||||
* @param limit The index to end searches at (exclusive).
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
virtual RegexMatcher ®ion(int32_t start, int32_t limit, UErrorCode &status);
|
||||
virtual RegexMatcher ®ion(int64_t start, int64_t limit, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Identical to region(start, limit, status) but also allows a start position without
|
||||
* resetting the region state.
|
||||
* @param startIndex The (native) index within the region bounds at which to begin searches.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* If startIndex is not within the specified region bounds,
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR is returned.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual RegexMatcher ®ion(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Reports the start index of this matcher's region. The searches this matcher
|
||||
* conducts are limited to finding matches within regionStart (inclusive) and
|
||||
* regionEnd (exclusive).
|
||||
*
|
||||
* @return The starting index of this matcher's region.
|
||||
* @return The starting (native) index of this matcher's region.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
virtual int32_t regionStart() const;
|
||||
|
||||
/**
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual int64_t regionStart64() const;
|
||||
|
||||
|
||||
/**
|
||||
* Reports the end (limit) index (exclusive) of this matcher's region. The searches
|
||||
* this matcher conducts are limited to finding matches within regionStart
|
||||
* (inclusive) and regionEnd (exclusive).
|
||||
*
|
||||
* @return The ending point of this matcher's region.
|
||||
* @return The ending point (native) of this matcher's region.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
virtual int32_t regionEnd() const;
|
||||
|
||||
/**
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual int64_t regionEnd64() const;
|
||||
|
||||
/**
|
||||
* Queries the transparency of region bounds for this matcher.
|
||||
* See useTransparentBounds for a description of transparent and opaque bounds.
|
||||
|
@ -1249,7 +1288,7 @@ public:
|
|||
* @return a string containing the results of the find and replace.
|
||||
* If a pre-allocated UText was provided, it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
|
||||
|
||||
|
@ -1299,7 +1338,7 @@ public:
|
|||
* @return a string containing the results of the find and replace.
|
||||
* If a pre-allocated UText was provided, it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
|
||||
|
||||
|
@ -1360,7 +1399,7 @@ public:
|
|||
*
|
||||
* @return this RegexMatcher
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual RegexMatcher &appendReplacement(UText *dest,
|
||||
UText *replacement, UErrorCode &status);
|
||||
|
@ -1389,9 +1428,9 @@ public:
|
|||
* Must not be NULL.
|
||||
* @return the destination string.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
virtual UText *appendTail(UText *dest);
|
||||
virtual UText *appendTail(UText *dest, UErrorCode &status);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -1444,7 +1483,7 @@ public:
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return The number of fields into which the input string was split.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
virtual int32_t split(UText *input,
|
||||
UText *dest[],
|
||||
|
@ -1581,6 +1620,15 @@ public:
|
|||
UErrorCode &status);
|
||||
|
||||
|
||||
/**
|
||||
* inline version of ReportFindProgress() to eliminate function calls where a check for
|
||||
* the callback suffices.
|
||||
*
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
#define REGEXFINDPROGRESS_INTERRUPT(pos, status) \
|
||||
(fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE)
|
||||
|
||||
|
||||
/**
|
||||
* setTrace Debug function, enable/disable tracing of the matching engine.
|
||||
|
|
|
@ -158,9 +158,9 @@ uregex_open( const UChar *pattern,
|
|||
* information is not wanted, pass NULL for this parameter.
|
||||
* @param status Receives error detected by this function.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL URegularExpression * U_EXPORT2
|
||||
U_DRAFT URegularExpression * U_EXPORT2
|
||||
uregex_openUText(UText *pattern,
|
||||
uint32_t flags,
|
||||
UParseError *pe,
|
||||
|
@ -280,9 +280,9 @@ uregex_pattern(const URegularExpression *regexp,
|
|||
* @return the pattern text. The storage for the text is owned by the regular expression
|
||||
* object, and must not be altered or deleted.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL UText * U_EXPORT2
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
uregex_patternUText(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
|
@ -341,9 +341,9 @@ uregex_setText(URegularExpression *regexp,
|
|||
* @param text The subject text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_setUText(URegularExpression *regexp,
|
||||
UText *text,
|
||||
UErrorCode *status);
|
||||
|
@ -388,9 +388,9 @@ uregex_getText(URegularExpression *regexp,
|
|||
* @return The subject text currently associated with this regular expression.
|
||||
* If a pre-allocated UText was provided, it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL UText * U_EXPORT2
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
uregex_getUText(URegularExpression *regexp,
|
||||
UText *dest,
|
||||
UErrorCode *status);
|
||||
|
@ -409,7 +409,7 @@ uregex_getUText(URegularExpression *regexp,
|
|||
* Matcher.matches() in Java
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param startIndex The input string index at which to begin matching, or -1
|
||||
* @param startIndex The input string (native) index at which to begin matching, or -1
|
||||
* to match the input Region.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return TRUE if there is a match
|
||||
|
@ -420,6 +420,15 @@ uregex_matches(URegularExpression *regexp,
|
|||
int32_t startIndex,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_matches.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uregex_matches64(URegularExpression *regexp,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Attempts to match the input string, starting from the specified index, against the pattern.
|
||||
* The match may be of any length, and is not required to extend to the end
|
||||
|
@ -437,7 +446,7 @@ uregex_matches(URegularExpression *regexp,
|
|||
* and <code>uregexp_group()</code> functions.</p>
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param startIndex The input string index at which to begin matching, or
|
||||
* @param startIndex The input string (native) index at which to begin matching, or
|
||||
* -1 to match the Input Region
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return TRUE if there is a match.
|
||||
|
@ -448,6 +457,15 @@ uregex_lookingAt(URegularExpression *regexp,
|
|||
int32_t startIndex,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_lookingAt.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uregex_lookingAt64(URegularExpression *regexp,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Find the first matching substring of the input string that matches the pattern.
|
||||
* If startIndex is >= zero the search for a match begins at the specified index,
|
||||
|
@ -461,7 +479,7 @@ uregex_lookingAt(URegularExpression *regexp,
|
|||
* <code>uregex_group()</code> will provide more information regarding the match.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param startIndex The position in the input string to begin the search, or
|
||||
* @param startIndex The position (native) in the input string to begin the search, or
|
||||
* -1 to search within the Input Region.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return TRUE if a match is found.
|
||||
|
@ -472,6 +490,15 @@ uregex_find(URegularExpression *regexp,
|
|||
int32_t startIndex,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_find.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uregex_find64(URegularExpression *regexp,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Find the next pattern match in the input string. Begin searching
|
||||
* the input at the location following the end of he previous match,
|
||||
|
@ -523,6 +550,37 @@ uregex_group(URegularExpression *regexp,
|
|||
int32_t destCapacity,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/** Returns a shallow immutable clone of the entire input string. The returned UText current native index
|
||||
* is set to the beginning of the requested capture group. The capture group length is also
|
||||
* returned via groupLength.
|
||||
* Group #0 is the complete string of matched text.
|
||||
* Group #1 is the text matched by the first set of capturing parentheses.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param groupNum The capture group to extract. Group 0 is the complete
|
||||
* match. The value of this parameter must be
|
||||
* less than or equal to the number of capture groups in
|
||||
* the pattern.
|
||||
* @param dest A mutable UText in which to store the current input.
|
||||
* If NULL, a new UText will be created as an immutable shallow clone
|
||||
* of the entire input string.
|
||||
* @param groupLength The group length of the desired capture group.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return The subject text currently associated with this regular expression.
|
||||
* If a pre-allocated UText was provided, it will always be used and returned.
|
||||
|
||||
*
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
uregex_groupUText(URegularExpression *regexp,
|
||||
int32_t groupNum,
|
||||
UText *dest,
|
||||
int64_t *groupLength,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/** Extract the string for the specified matching expression or subexpression.
|
||||
* Group #0 is the complete string of matched text.
|
||||
* Group #1 is the text matched by the first set of capturing parentheses.
|
||||
|
@ -541,12 +599,11 @@ uregex_group(URegularExpression *regexp,
|
|||
* @internal ICU 4.4 technology preview
|
||||
*/
|
||||
U_INTERNAL UText * U_EXPORT2
|
||||
uregex_groupUText(URegularExpression *regexp,
|
||||
uregex_groupUTextDeep(URegularExpression *regexp,
|
||||
int32_t groupNum,
|
||||
UText *dest,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the start of the text matched by the
|
||||
* specified capture group during the previous match operation. Return -1 if
|
||||
|
@ -557,7 +614,7 @@ uregex_groupUText(URegularExpression *regexp,
|
|||
* @param regexp The compiled regular expression.
|
||||
* @param groupNum The capture group number
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return the starting position in the input of the text matched
|
||||
* @return the starting (native) position in the input of the text matched
|
||||
* by the specified group.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
|
@ -566,6 +623,15 @@ uregex_start(URegularExpression *regexp,
|
|||
int32_t groupNum,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_start.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT int64_t U_EXPORT2
|
||||
uregex_start64(URegularExpression *regexp,
|
||||
int32_t groupNum,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns the index in the input string of the position following the end
|
||||
* of the text matched by the specified capture group.
|
||||
|
@ -576,7 +642,7 @@ uregex_start(URegularExpression *regexp,
|
|||
* @param regexp The compiled regular expression.
|
||||
* @param groupNum The capture group number
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return the index of the position following the last matched character.
|
||||
* @return the (native) index of the position following the last matched character.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
|
@ -584,6 +650,15 @@ uregex_end(URegularExpression *regexp,
|
|||
int32_t groupNum,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_end.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT int64_t U_EXPORT2
|
||||
uregex_end64(URegularExpression *regexp,
|
||||
int32_t groupNum,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Reset any saved state from the previous match. Has the effect of
|
||||
* causing uregex_findNext to begin at the specified index, and causing
|
||||
|
@ -592,7 +667,7 @@ uregex_end(URegularExpression *regexp,
|
|||
* match region that may have been set.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param index The position in the text at which a
|
||||
* @param index The position (native) in the text at which a
|
||||
* uregex_findNext() should begin searching.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @stable ICU 3.0
|
||||
|
@ -602,7 +677,15 @@ uregex_reset(URegularExpression *regexp,
|
|||
int32_t index,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_reset.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_reset64(URegularExpression *regexp,
|
||||
int64_t index,
|
||||
UErrorCode *status);
|
||||
|
||||
/** Sets the limits of the matching region for this URegularExpression.
|
||||
* The region is the part of the input string that will be considered when matching.
|
||||
* Invoking this method resets any saved state from the previous match,
|
||||
|
@ -617,8 +700,8 @@ uregex_reset(URegularExpression *regexp,
|
|||
* is less than zero or greater than the length of the string being matched.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param regionStart The index to begin searches at.
|
||||
* @param regionLimit The index to end searches at (exclusive).
|
||||
* @param regionStart The (native) index to begin searches at.
|
||||
* @param regionLimit The (native) index to end searches at (exclusive).
|
||||
* @param status A pointer to a UErrorCode to receive any errors.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
|
@ -628,20 +711,48 @@ uregex_setRegion(URegularExpression *regexp,
|
|||
int32_t regionLimit,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_setRegion.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_setRegion64(URegularExpression *regexp,
|
||||
int64_t regionStart,
|
||||
int64_t regionLimit,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Variation on uregex_setRegion to set the region without resetting the start index
|
||||
* without resetting the position for subsequent matches.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_setRegionAndStart(URegularExpression *regexp,
|
||||
int64_t regionStart,
|
||||
int64_t regionLimit,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Reports the start index of the matching region. Any matches found are limited to
|
||||
* to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param status A pointer to a UErrorCode to receive any errors.
|
||||
* @return The starting index of this matcher's region.
|
||||
* @return The starting (native) index of this matcher's region.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
uregex_regionStart(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_regionStart.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT int64_t U_EXPORT2
|
||||
uregex_regionStart64(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Reports the end index (exclusive) of the matching region for this URegularExpression.
|
||||
|
@ -650,13 +761,21 @@ uregex_regionStart(const URegularExpression *regexp,
|
|||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param status A pointer to a UErrorCode to receive any errors.
|
||||
* @return The ending point of this matcher's region.
|
||||
* @return The ending point (native) of this matcher's region.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
uregex_regionEnd(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* 64bit version of uregex_regionEnd.
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT int64_t U_EXPORT2
|
||||
uregex_regionEnd64(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Queries the transparency of region bounds for this URegularExpression.
|
||||
* See useTransparentBounds for a description of transparent and opaque bounds.
|
||||
|
@ -813,9 +932,9 @@ uregex_replaceAll(URegularExpression *regexp,
|
|||
* @return A UText containing the results of the find and replace.
|
||||
* If a pre-allocated UText was provided, it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL UText * U_EXPORT2
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
uregex_replaceAllUText(URegularExpression *regexp,
|
||||
UText *replacement,
|
||||
UText *dest,
|
||||
|
@ -872,9 +991,9 @@ uregex_replaceFirst(URegularExpression *regexp,
|
|||
* @return A UText containing the results of the find and replace.
|
||||
* If a pre-allocated UText was provided, it will always be used and returned.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL UText * U_EXPORT2
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
uregex_replaceFirstUText(URegularExpression *regexp,
|
||||
UText *replacement,
|
||||
UText *dest,
|
||||
|
@ -956,9 +1075,9 @@ uregex_appendReplacement(URegularExpression *regexp,
|
|||
* @param dest A mutable UText that will receive the result. Must not be NULL.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_appendReplacementUText(URegularExpression *regexp,
|
||||
UText *replacementText,
|
||||
UText *dest,
|
||||
|
@ -1009,11 +1128,12 @@ uregex_appendTail(URegularExpression *regexp,
|
|||
* @param dest A mutable UText that will receive the result. Must not be NULL.
|
||||
* @return The destination UText.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_INTERNAL UText * U_EXPORT2
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
uregex_appendTailUText(URegularExpression *regexp,
|
||||
UText *dest);
|
||||
UText *dest,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
|
||||
|
@ -1105,9 +1225,9 @@ uregex_split( URegularExpression *regexp,
|
|||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return The number of fields into which the input string was split.
|
||||
*
|
||||
* @internal ICU 4.4 technology preview
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
uregex_splitUText(URegularExpression *regexp,
|
||||
UText *destFields[],
|
||||
int32_t destFieldsCapacity,
|
||||
|
@ -1257,10 +1377,21 @@ uregex_getMatchCallback(const URegularExpression *regexp,
|
|||
|
||||
|
||||
/**
|
||||
* Function pointer for a regular expression find/findNext callback function.
|
||||
* When set, a callback function will be called during a find operation after each
|
||||
* attempt at a match. If the call back function returns FALSE, the find
|
||||
* operation will be terminated early.
|
||||
* Function pointer for a regular expression find callback function.
|
||||
*
|
||||
* When set, a callback function will be called during a find operation
|
||||
* and for operations that depend on find, such as findNext, split and some replace
|
||||
* operations like replaceFirst.
|
||||
* The callback will usually be called after each attempt at a match, but this is not a
|
||||
* guarantee that the callback will be invoked at each character. For finds where the
|
||||
* match engine is invoked at each character, this may be close to true, but less likely
|
||||
* for more optimized loops where the pattern is known to only start, and the match
|
||||
* engine invoked, at certain characters.
|
||||
* When invoked, this callback will specify the index at which a match operation is about
|
||||
* to be attempted, giving the application the opportunity to terminate a long-running
|
||||
* find operation.
|
||||
*
|
||||
* If the call back function returns FALSE, the find operation will be terminated early.
|
||||
*
|
||||
* Note: the callback function must not call other functions on this
|
||||
* URegularExpression
|
||||
|
@ -1282,9 +1413,7 @@ typedef UBool U_CALLCONV URegexFindProgressCallback (
|
|||
U_CDECL_END
|
||||
|
||||
/**
|
||||
* During find operations, this callback will be invoked after each return from a
|
||||
* match attempt, specifying the next index at which a match operation is about to be attempted,
|
||||
* giving the application the opportunity to terminate a long-running find operation.
|
||||
* Set the find progress callback function for this URegularExpression.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param callback A pointer to the user-supplied callback function.
|
||||
|
@ -1302,7 +1431,7 @@ uregex_setFindProgressCallback(URegularExpression *regexp,
|
|||
|
||||
|
||||
/**
|
||||
* Get the callback function for this URegularExpression.
|
||||
* Get the find progress callback function for this URegularExpression.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param callback Out paramater, receives a pointer to the user-supplied
|
||||
|
|
|
@ -341,8 +341,7 @@ U_CAPI UText * U_EXPORT2
|
|||
uregex_patternUText(const URegularExpression *regexp2,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
(void)status;
|
||||
return regexp->fPat->patternText();
|
||||
return regexp->fPat->patternText(*status);
|
||||
}
|
||||
|
||||
|
||||
|
@ -479,7 +478,7 @@ uregex_getUText(URegularExpression *regexp2,
|
|||
if (validateRE(regexp, status, FALSE) == FALSE) {
|
||||
return dest;
|
||||
}
|
||||
return regexp->fMatcher->getInput(dest);
|
||||
return regexp->fMatcher->getInput(dest, *status);
|
||||
}
|
||||
|
||||
|
||||
|
@ -490,8 +489,15 @@ uregex_getUText(URegularExpression *regexp2,
|
|||
//------------------------------------------------------------------------------
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uregex_matches(URegularExpression *regexp2,
|
||||
int32_t startIndex,
|
||||
UErrorCode *status) {
|
||||
int32_t startIndex,
|
||||
UErrorCode *status) {
|
||||
return uregex_matches64( regexp2, (int64_t)startIndex, status);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uregex_matches64(URegularExpression *regexp2,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
UBool result = FALSE;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
|
@ -506,7 +512,6 @@ uregex_matches(URegularExpression *regexp2,
|
|||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_lookingAt
|
||||
|
@ -516,6 +521,13 @@ U_CAPI UBool U_EXPORT2
|
|||
uregex_lookingAt(URegularExpression *regexp2,
|
||||
int32_t startIndex,
|
||||
UErrorCode *status) {
|
||||
return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uregex_lookingAt64(URegularExpression *regexp2,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
UBool result = FALSE;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
|
@ -540,6 +552,13 @@ U_CAPI UBool U_EXPORT2
|
|||
uregex_find(URegularExpression *regexp2,
|
||||
int32_t startIndex,
|
||||
UErrorCode *status) {
|
||||
return uregex_find64( regexp2, (int64_t)startIndex, status);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uregex_find64(URegularExpression *regexp2,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
UBool result = FALSE;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
|
@ -554,6 +573,7 @@ uregex_find(URegularExpression *regexp2,
|
|||
return result;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_findNext
|
||||
|
@ -609,7 +629,7 @@ uregex_group(URegularExpression *regexp2,
|
|||
|
||||
if (destCapacity == 0 || regexp->fText != NULL) {
|
||||
// If preflighting or if we already have the text as UChars,
|
||||
// this is a little cheaper than going through uregex_groupUText()
|
||||
// this is a little cheaper than going through uregex_groupUTextDeep()
|
||||
|
||||
//
|
||||
// Pick up the range of characters from the matcher
|
||||
|
@ -642,7 +662,7 @@ uregex_group(URegularExpression *regexp2,
|
|||
}
|
||||
return fullLength;
|
||||
} else {
|
||||
UText *groupText = uregex_groupUText(regexp2, groupNum, NULL, status);
|
||||
UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
|
||||
int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
|
||||
utext_close(groupText);
|
||||
return result;
|
||||
|
@ -657,6 +677,26 @@ uregex_group(URegularExpression *regexp2,
|
|||
//------------------------------------------------------------------------------
|
||||
U_CAPI UText * U_EXPORT2
|
||||
uregex_groupUText(URegularExpression *regexp2,
|
||||
int32_t groupNum,
|
||||
UText *dest,
|
||||
int64_t *groupLength,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
||||
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
||||
}
|
||||
|
||||
return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_groupUTextDeep
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI UText * U_EXPORT2
|
||||
uregex_groupUTextDeep(URegularExpression *regexp2,
|
||||
int32_t groupNum,
|
||||
UText *dest,
|
||||
UErrorCode *status) {
|
||||
|
@ -693,7 +733,6 @@ uregex_groupUText(URegularExpression *regexp2,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_start
|
||||
|
@ -703,6 +742,13 @@ U_CAPI int32_t U_EXPORT2
|
|||
uregex_start(URegularExpression *regexp2,
|
||||
int32_t groupNum,
|
||||
UErrorCode *status) {
|
||||
return (int32_t)uregex_start64( regexp2, groupNum, status);
|
||||
}
|
||||
|
||||
U_CAPI int64_t U_EXPORT2
|
||||
uregex_start64(URegularExpression *regexp2,
|
||||
int32_t groupNum,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
return 0;
|
||||
|
@ -711,7 +757,6 @@ uregex_start(URegularExpression *regexp2,
|
|||
return result;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_end
|
||||
|
@ -721,6 +766,13 @@ U_CAPI int32_t U_EXPORT2
|
|||
uregex_end(URegularExpression *regexp2,
|
||||
int32_t groupNum,
|
||||
UErrorCode *status) {
|
||||
return (int32_t)uregex_end64( regexp2, groupNum, status);
|
||||
}
|
||||
|
||||
U_CAPI int64_t U_EXPORT2
|
||||
uregex_end64(URegularExpression *regexp2,
|
||||
int32_t groupNum,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
return 0;
|
||||
|
@ -738,6 +790,13 @@ U_CAPI void U_EXPORT2
|
|||
uregex_reset(URegularExpression *regexp2,
|
||||
int32_t index,
|
||||
UErrorCode *status) {
|
||||
uregex_reset64( regexp2, (int64_t)index, status);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uregex_reset64(URegularExpression *regexp2,
|
||||
int64_t index,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
return;
|
||||
|
@ -756,6 +815,14 @@ uregex_setRegion(URegularExpression *regexp2,
|
|||
int32_t regionStart,
|
||||
int32_t regionLimit,
|
||||
UErrorCode *status) {
|
||||
uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uregex_setRegion64(URegularExpression *regexp2,
|
||||
int64_t regionStart,
|
||||
int64_t regionLimit,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
return;
|
||||
|
@ -764,6 +831,24 @@ uregex_setRegion(URegularExpression *regexp2,
|
|||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_setRegionAndStart
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_DRAFT void U_EXPORT2
|
||||
uregex_setRegionAndStart(URegularExpression *regexp2,
|
||||
int64_t regionStart,
|
||||
int64_t regionLimit,
|
||||
int64_t startIndex,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
return;
|
||||
}
|
||||
regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_regionStart
|
||||
|
@ -772,6 +857,12 @@ uregex_setRegion(URegularExpression *regexp2,
|
|||
U_CAPI int32_t U_EXPORT2
|
||||
uregex_regionStart(const URegularExpression *regexp2,
|
||||
UErrorCode *status) {
|
||||
return (int32_t)uregex_regionStart64(regexp2, status);
|
||||
}
|
||||
|
||||
U_CAPI int64_t U_EXPORT2
|
||||
uregex_regionStart64(const URegularExpression *regexp2,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
return 0;
|
||||
|
@ -788,6 +879,12 @@ uregex_regionStart(const URegularExpression *regexp2,
|
|||
U_CAPI int32_t U_EXPORT2
|
||||
uregex_regionEnd(const URegularExpression *regexp2,
|
||||
UErrorCode *status) {
|
||||
return (int32_t)uregex_regionEnd64(regexp2, status);
|
||||
}
|
||||
|
||||
U_CAPI int64_t U_EXPORT2
|
||||
uregex_regionEnd64(const URegularExpression *regexp2,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status) == FALSE) {
|
||||
return 0;
|
||||
|
@ -1602,9 +1699,10 @@ uregex_appendTail(URegularExpression *regexp2,
|
|||
//
|
||||
U_CAPI UText * U_EXPORT2
|
||||
uregex_appendTailUText(URegularExpression *regexp2,
|
||||
UText *dest) {
|
||||
UText *dest,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
return regexp->fMatcher->appendTail(dest);
|
||||
return regexp->fMatcher->appendTail(dest, *status);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1731,21 +1731,47 @@ static void TestUTextAPI(void) {
|
|||
|
||||
/* Capture Group 0, the full match. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUText(re, 0, NULL, &status);
|
||||
actual = uregex_groupUTextDeep(re, 0, NULL, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
|
||||
utext_close(actual);
|
||||
|
||||
/* Capture Group 0 with shallow clone API. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
{
|
||||
int64_t group_len;
|
||||
int32_t len16;
|
||||
UErrorCode shallowStatus = U_ZERO_ERROR;
|
||||
int64_t nativeIndex;
|
||||
|
||||
actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
nativeIndex = utext_getNativeIndex(actual);
|
||||
/* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */
|
||||
/* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */
|
||||
len16 = group_len;
|
||||
|
||||
UChar *groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
|
||||
utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
|
||||
UText groupText = UTEXT_INITIALIZER;
|
||||
utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
|
||||
|
||||
TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
|
||||
utext_close(&groupText);
|
||||
}
|
||||
utext_close(actual);
|
||||
|
||||
/* Capture group #1. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUText(re, 1, NULL, &status);
|
||||
actual = uregex_groupUTextDeep(re, 1, NULL, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT_UTEXT(str_interior, actual);
|
||||
utext_close(actual);
|
||||
|
||||
/* Capture group out of range. Error. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUText(re, 2, NULL, &status);
|
||||
actual = uregex_groupUTextDeep(re, 2, NULL, &status);
|
||||
TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
TEST_ASSERT(utext_nativeLength(actual) == 0);
|
||||
utext_close(actual);
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
|
||||
#define SUPPORT_MUTATING_INPUT_STRING 0
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
//
|
||||
// Test class boilerplate
|
||||
|
@ -1878,14 +1877,19 @@ void RegexTest::API_Match_UTF8() {
|
|||
utext_openUnicodeString(&destText, &dest, &status);
|
||||
UText *result;
|
||||
//const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
|
||||
result = matcher->group((UText *)NULL, RegexMatcher::MATCHER_DEST_IS_UTEXT, status);
|
||||
// Test shallow-clone API
|
||||
int64_t group_len;
|
||||
result = matcher->group((UText *)NULL, group_len, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
|
||||
utext_close(result);
|
||||
result = matcher->group(&destText, RegexMatcher::MATCHER_DEST_IS_UTEXT, status);
|
||||
result = matcher->group(0, &destText, group_len, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(result == &destText);
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
|
||||
// destText is now immutable, reopen it
|
||||
utext_close(&destText);
|
||||
utext_openUnicodeString(&destText, &dest, &status);
|
||||
|
||||
result = matcher->group(0, NULL, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
|
@ -2066,14 +2070,14 @@ void RegexTest::API_Match_UTF8() {
|
|||
unsigned char aboveBMP[] = {0xF0, 0x90, 0x80, 0x81, 0xF0, 0x90, 0x80, 0x82, 0xF0, 0x90, 0x80, 0x83, 0xF0, 0x90, 0x80, 0x84, 0x00};
|
||||
utext_openUTF8(&s, (char *)aboveBMP, -1, &status);
|
||||
m.reset(&s);
|
||||
for (i=0; ; i+=2) {
|
||||
for (i=0; ; i+=4) {
|
||||
if (m.find() == FALSE) {
|
||||
break;
|
||||
}
|
||||
REGEX_ASSERT(m.start(status) == i);
|
||||
REGEX_ASSERT(m.end(status) == i);
|
||||
}
|
||||
REGEX_ASSERT(i==10);
|
||||
REGEX_ASSERT(i==20);
|
||||
|
||||
utext_close(&s);
|
||||
}
|
||||
|
@ -2577,7 +2581,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
|
|||
const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText);
|
||||
|
||||
m.appendTail(&resultText);
|
||||
m.appendTail(&resultText, status);
|
||||
const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
|
||||
REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
|
||||
|
||||
|
@ -2900,14 +2904,14 @@ void RegexTest::API_Pattern_UTF8() {
|
|||
//
|
||||
pat1 = new RegexPattern();
|
||||
REGEX_ASSERT(pat1->pattern() == "");
|
||||
REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText());
|
||||
REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status));
|
||||
delete pat1;
|
||||
|
||||
regextst_openUTF8FromInvariant(&re1, "(Hello, world)*", -1, &status);
|
||||
pat1 = RegexPattern::compile(&re1, pe, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(pat1->pattern() == "(Hello, world)*");
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText());
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));
|
||||
delete pat1;
|
||||
|
||||
utext_close(&re1);
|
||||
|
@ -3090,6 +3094,31 @@ static void set(UVector &vec, int32_t val, UnicodeString index) {
|
|||
vec.setElementAt(val, idx);
|
||||
}
|
||||
|
||||
static void setInt(UVector &vec, int32_t val, int32_t idx) {
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
while (vec.size()<idx+1) {vec.addElement(-1, status);}
|
||||
vec.setElementAt(val, idx);
|
||||
}
|
||||
|
||||
static UBool utextOffsetToNative(UText *utext, int32_t unistrOffset, int32_t& nativeIndex)
|
||||
{
|
||||
UBool couldFind = TRUE;
|
||||
UTEXT_SETNATIVEINDEX(utext, 0);
|
||||
int32_t i = 0;
|
||||
while (i < unistrOffset) {
|
||||
UChar32 c = UTEXT_NEXT32(utext);
|
||||
if (c != U_SENTINEL) {
|
||||
i += U16_LENGTH(c);
|
||||
} else {
|
||||
couldFind = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nativeIndex = UTEXT_GETNATIVEINDEX(utext);
|
||||
return couldFind;
|
||||
}
|
||||
|
||||
|
||||
void RegexTest::regex_find(const UnicodeString &pattern,
|
||||
const UnicodeString &flags,
|
||||
const UnicodeString &inputString,
|
||||
|
@ -3112,6 +3141,8 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
|||
RegexMatcher *matcher = NULL, *UTF8Matcher = NULL;
|
||||
UVector groupStarts(status);
|
||||
UVector groupEnds(status);
|
||||
UVector groupStartsUTF8(status);
|
||||
UVector groupEndsUTF8(status);
|
||||
UBool isMatch = FALSE, isUTF8Match = FALSE;
|
||||
UBool failed = FALSE;
|
||||
int32_t numFinds;
|
||||
|
@ -3120,6 +3151,9 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
|||
UBool useLookingAtFunc = FALSE;
|
||||
int32_t regionStart = -1;
|
||||
int32_t regionEnd = -1;
|
||||
int32_t regionStartUTF8 = -1;
|
||||
int32_t regionEndUTF8 = -1;
|
||||
|
||||
|
||||
//
|
||||
// Compile the caller's pattern
|
||||
|
@ -3278,7 +3312,6 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
|||
goto cleanupAndReturn;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Configure the matcher according to the flags specified with this test.
|
||||
//
|
||||
|
@ -3307,11 +3340,47 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
|||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Generate native indices for UTF8 versions of region and capture group info
|
||||
//
|
||||
if (UTF8Matcher != NULL) {
|
||||
if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
|
||||
if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
|
||||
|
||||
// Fill out the native index UVector info.
|
||||
// Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()
|
||||
for (i=0; i<groupStarts.size(); i++) {
|
||||
int32_t start = groupStarts.elementAti(i);
|
||||
// -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
|
||||
if (start >= 0) {
|
||||
int32_t startUTF8;
|
||||
if (!utextOffsetToNative(&inputText, start, startUTF8)) {
|
||||
errln("Error at line %d: could not find native index for group start %d. UTF16 index %d", line, i, start);
|
||||
failed = TRUE;
|
||||
goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.
|
||||
}
|
||||
setInt(groupStartsUTF8, startUTF8, i);
|
||||
}
|
||||
|
||||
int32_t end = groupEnds.elementAti(i);
|
||||
// -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
|
||||
if (end >= 0) {
|
||||
int32_t endUTF8;
|
||||
if (!utextOffsetToNative(&inputText, end, endUTF8)) {
|
||||
errln("Error at line %d: could not find native index for group end %d. UTF16 index %d", line, i, end);
|
||||
failed = TRUE;
|
||||
goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.
|
||||
}
|
||||
setInt(groupEndsUTF8, endUTF8, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (regionStart>=0) {
|
||||
matcher->region(regionStart, regionEnd, status);
|
||||
REGEX_CHECK_STATUS_L(line);
|
||||
if (UTF8Matcher != NULL) {
|
||||
UTF8Matcher->region(regionStart, regionEnd, status);
|
||||
UTF8Matcher->region(regionStartUTF8, regionEndUTF8, status);
|
||||
REGEX_CHECK_STATUS_L(line);
|
||||
}
|
||||
}
|
||||
|
@ -3388,28 +3457,30 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
|||
REGEX_CHECK_STATUS_L(line);
|
||||
for (i=0; i<=matcher->groupCount(); i++) {
|
||||
int32_t expectedStart = (i >= groupStarts.size()? -1 : groupStarts.elementAti(i));
|
||||
int32_t expectedStartUTF8 = (i >= groupStartsUTF8.size()? -1 : groupStartsUTF8.elementAti(i));
|
||||
if (matcher->start(i, status) != expectedStart) {
|
||||
errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d",
|
||||
line, i, expectedStart, matcher->start(i, status));
|
||||
failed = TRUE;
|
||||
goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.
|
||||
} else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStart) {
|
||||
} else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStartUTF8) {
|
||||
errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d (UTF8)",
|
||||
line, i, expectedStart, UTF8Matcher->start(i, status));
|
||||
line, i, expectedStartUTF8, UTF8Matcher->start(i, status));
|
||||
failed = TRUE;
|
||||
goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.
|
||||
}
|
||||
|
||||
int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));
|
||||
int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));
|
||||
if (matcher->end(i, status) != expectedEnd) {
|
||||
errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d",
|
||||
line, i, expectedEnd, matcher->end(i, status));
|
||||
failed = TRUE;
|
||||
// Error on end position; keep going; real error is probably yet to come as group
|
||||
// end positions work from end of the input data towards the front.
|
||||
} else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEnd) {
|
||||
} else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEndUTF8) {
|
||||
errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d (UTF8)",
|
||||
line, i, expectedEnd, UTF8Matcher->end(i, status));
|
||||
line, i, expectedEndUTF8, UTF8Matcher->end(i, status));
|
||||
failed = TRUE;
|
||||
// Error on end position; keep going; real error is probably yet to come as group
|
||||
// end positions work from end of the input data towards the front.
|
||||
|
@ -4757,21 +4828,21 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
|||
|
||||
/* Capture Group 0, the full match. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUText(re, 0, &bufferText, &status);
|
||||
actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(actual == &bufferText);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);
|
||||
|
||||
/* Capture group #1. Should succeed. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUText(re, 1, &bufferText, &status);
|
||||
actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(actual == &bufferText);
|
||||
REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);
|
||||
|
||||
/* Capture group out of range. Error. */
|
||||
status = U_ZERO_ERROR;
|
||||
actual = uregex_groupUText(re, 2, &bufferText, &status);
|
||||
actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
|
||||
REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
REGEX_ASSERT(actual == &bufferText);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue