mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 12:40:02 +00:00
ICU-2422 regexp, Look-ahead ops added. Bug in caching input
string in Matcher fixed. X-SVN-Rev: 10938
This commit is contained in:
parent
4d7921d1b8
commit
61b188cc37
6 changed files with 178 additions and 52 deletions
|
@ -500,7 +500,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
|||
fRXPat->fCompiledPat->setElementAt(op, savePosition);
|
||||
|
||||
// Append an JMP operation into the compiled pattern. The operand for
|
||||
// the OR will eventually be the location following the ')' for the
|
||||
// the JMP will eventually be the location following the ')' for the
|
||||
// group. This will be patched in later, when the ')' is encountered.
|
||||
op = URX_BUILD(URX_JMP, 0);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
@ -601,18 +601,70 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
|||
fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP
|
||||
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
|
||||
}
|
||||
break;
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case doOpenLookAhead:
|
||||
// Open Paren.
|
||||
error(U_REGEX_UNIMPLEMENTED);
|
||||
// Positive Look-ahead (?= stuff )
|
||||
// Compiles to
|
||||
// 1 START_LA dataLoc
|
||||
// 2. NOP reserved for use by quantifiers on the block.
|
||||
// Look-ahead can't have quantifiers, but paren stack
|
||||
// compile time conventions require the slot anyhow.
|
||||
// 3. NOP may be replaced if there is are '|' ops in the block.
|
||||
// 4. code for parenthesized stuff.
|
||||
// 5. ENDLA
|
||||
//
|
||||
// Two data slots are reserved, for saving the stack ptr and the input position.
|
||||
{
|
||||
int32_t dataLoc = fRXPat->fDataSize;
|
||||
fRXPat->fDataSize += 2;
|
||||
int32_t op = URX_BUILD(URX_LA_START, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
op = URX_BUILD(URX_NOP, 0);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
// On the Parentheses stack, start a new frame and add the postions
|
||||
// of the NOPs.
|
||||
fParenStack.push(EParenClass::lookAhead, *fStatus); // Begin a new frame.
|
||||
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP
|
||||
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
|
||||
}
|
||||
break;
|
||||
|
||||
case doOpenLookAheadNeg:
|
||||
// Open Paren.
|
||||
error(U_REGEX_UNIMPLEMENTED);
|
||||
// Negated Lookahead. (?! stuff )
|
||||
// Compiles to
|
||||
// 1. START_LA dataloc
|
||||
// 2. SAVE_STATE 7 // Fail within look-ahead block restores to this state,
|
||||
// // which continues with the match.
|
||||
// 3. NOP // Std. Open Paren sequence, for possible '|'
|
||||
// 4. code for parenthesized stuff.
|
||||
// 5. END_LA // Cut back stack, remove saved state from step 2.
|
||||
// 6. FAIL // code in block succeeded, so neg. lookahead fails.
|
||||
// 7. ...
|
||||
{
|
||||
int32_t dataLoc = fRXPat->fDataSize;
|
||||
fRXPat->fDataSize += 2;
|
||||
int32_t op = URX_BUILD(URX_LA_START, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
op = URX_BUILD(URX_STATE_SAVE, 0); // dest address will be patched later.
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
op = URX_BUILD(URX_NOP, 0);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
// On the Parentheses stack, start a new frame and add the postions
|
||||
// of the StateSave and NOP.
|
||||
fParenStack.push(EParenClass::negLookAhead, *fStatus); // Begin a new frame.
|
||||
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE
|
||||
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
|
||||
|
||||
// Instructions #5 and #6 will be added when the ')' is encountered.
|
||||
}
|
||||
break;
|
||||
|
||||
case doOpenLookBehind:
|
||||
|
@ -1410,11 +1462,12 @@ void RegexCompile::handleCloseParen() {
|
|||
|
||||
// Fixup any operations within the just-closed parenthesized group
|
||||
// that need to reference the end of the (block).
|
||||
// (The first one on popped from the stack is an unused slot for
|
||||
// (The first one popped from the stack is an unused slot for
|
||||
// alternation (OR) state save, but applying the fixup to it does no harm.)
|
||||
for (;;) {
|
||||
patIdx = fParenStack.popi();
|
||||
if (patIdx < 0) {
|
||||
// value < 0 flags the start of the frame on the paren stack.
|
||||
break;
|
||||
}
|
||||
U_ASSERT(patIdx>0 && patIdx <= fRXPat->fCompiledPat->size());
|
||||
|
@ -1429,11 +1482,11 @@ void RegexCompile::handleCloseParen() {
|
|||
// parentesized grouping this is
|
||||
|
||||
switch (patIdx) {
|
||||
case -1:
|
||||
case plain:
|
||||
// No additional fixups required.
|
||||
// This is the case with most kinds of groupings.
|
||||
// (Grouping-only parentheses)
|
||||
break;
|
||||
case -2:
|
||||
case capturing:
|
||||
// Capturing Parentheses.
|
||||
// Insert a End Capture op into the pattern.
|
||||
// The frame offset of the variables for this cg is obtained from the
|
||||
|
@ -1447,7 +1500,7 @@ void RegexCompile::handleCloseParen() {
|
|||
fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);
|
||||
}
|
||||
break;
|
||||
case -3:
|
||||
case atomic:
|
||||
// Atomic Parenthesis.
|
||||
// Insert a LD_SP operation to restore the state stack to the position
|
||||
// it was when the atomic parens were entered.
|
||||
|
@ -1460,6 +1513,37 @@ void RegexCompile::handleCloseParen() {
|
|||
}
|
||||
break;
|
||||
|
||||
case EParenClass::lookAhead:
|
||||
{
|
||||
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
|
||||
U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
|
||||
int32_t dataLoc = URX_VAL(startOp);
|
||||
int32_t op = URX_BUILD(URX_LA_END, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
}
|
||||
break;
|
||||
|
||||
case negLookAhead:
|
||||
{
|
||||
// See comment at doOpenLookAheadNeg
|
||||
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
|
||||
U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
|
||||
int32_t dataLoc = URX_VAL(startOp);
|
||||
int32_t op = URX_BUILD(URX_LA_END, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
op = URX_BUILD(URX_FAIL, 0);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
// Patch the URX_SAVE near the top of the block.
|
||||
int32_t saveOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen);
|
||||
U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE);
|
||||
int32_t dest = fRXPat->fCompiledPat->size();
|
||||
saveOp = URX_BUILD(URX_STATE_SAVE, dest);
|
||||
fRXPat->fCompiledPat->setElementAt(saveOp, fMatchOpenParen);
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
U_ASSERT(FALSE);
|
||||
}
|
||||
|
|
|
@ -65,8 +65,21 @@ public:
|
|||
static void cleanup(); // Memory cleanup
|
||||
|
||||
|
||||
|
||||
// Categories of parentheses in pattern.
|
||||
// The category is saved in the compile-time parentheses stack frame, and
|
||||
// determines the code to be generated when the matching close ) is encountered.
|
||||
enum EParenClass {
|
||||
plain = -1, // No special handling
|
||||
capturing = -2,
|
||||
atomic = -3,
|
||||
lookAhead = -4,
|
||||
negLookAhead = -5
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
|
||||
UBool doParseActions(EParseAction a);
|
||||
void error(UErrorCode e); // error reporting convenience function.
|
||||
|
||||
|
|
|
@ -99,12 +99,18 @@ enum {
|
|||
// capture group variables in the state stack frame.
|
||||
URX_STO_INP_LOC = 35, // Store the input location. Operand is location
|
||||
// within the matcher data (not stack).
|
||||
URX_JMPX = 36 // Conditional JMP.
|
||||
URX_JMPX = 36, // Conditional JMP.
|
||||
// First Operand: JMP target location.
|
||||
// Second Operand: Data location containing an
|
||||
// input position. If current input position ==
|
||||
// saved input position, FAIL rather than taking
|
||||
// the JMP.
|
||||
// the JMP
|
||||
URX_LA_START = 37, // Starting a LookAround expression.
|
||||
// Save InputPos and SP in static data.
|
||||
// Operand: Static data offset for the save
|
||||
URX_LA_END = 38 // Ending a Lookaround expression.
|
||||
// Restore InputPos and Stack to saved values.
|
||||
// Operand: Static data offset for saved data.
|
||||
};
|
||||
|
||||
// Keep this list of opcode names in sync with the above enum
|
||||
|
@ -146,7 +152,9 @@ enum {
|
|||
"LD_SP", \
|
||||
"BACKREF", \
|
||||
"STO_INP_LOC", \
|
||||
"JMPX"
|
||||
"JMPX", \
|
||||
"LA_START", \
|
||||
"LA_END"
|
||||
|
||||
//
|
||||
// Convenience macros for assembling and disassembling a compiled operation.
|
||||
|
|
|
@ -35,8 +35,6 @@ U_NAMESPACE_BEGIN
|
|||
RegexMatcher::RegexMatcher(const RegexPattern *pat) {
|
||||
fPattern = pat;
|
||||
fInput = NULL;
|
||||
fInputUC = NULL;
|
||||
fInputLength = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fStack = new UVector32(status); // TODO: do something with status.
|
||||
fData = fSmallData;
|
||||
|
@ -177,7 +175,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest,
|
|||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
UnicodeString &RegexMatcher::appendTail(UnicodeString &dest) {
|
||||
int32_t len = fInputLength-fMatchEnd;
|
||||
int32_t len = fInput->length()-fMatchEnd;
|
||||
if (len > 0) {
|
||||
dest.append(*fInput, fMatchEnd, len);
|
||||
}
|
||||
|
@ -237,12 +235,9 @@ UBool RegexMatcher::find() {
|
|||
// TODO: Needs optimization
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
int32_t startPos;
|
||||
// TODO: needs to go up to the very end, so a pattern that can match a zero lenght
|
||||
// string can match at the end of a string. Can't do until loop-breaking
|
||||
// is added to the engine, though, otherwise it triggers too many bugs.
|
||||
startPos = fMatchEnd;
|
||||
U_ASSERT(startPos >= 0 && startPos <= fInputLength);
|
||||
int32_t startPos = fMatchEnd;
|
||||
int32_t inputLen = fInput->length();
|
||||
U_ASSERT(startPos >= 0 && startPos <= inputLen);
|
||||
for (;;) {
|
||||
MatchAt(startPos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
|
@ -251,7 +246,7 @@ UBool RegexMatcher::find() {
|
|||
if (fMatch) {
|
||||
return TRUE;
|
||||
}
|
||||
if (startPos >= fInputLength) {
|
||||
if (startPos >= inputLen) {
|
||||
break;
|
||||
}
|
||||
startPos = fInput->moveIndex32(startPos, 1);
|
||||
|
@ -265,7 +260,8 @@ UBool RegexMatcher::find(int32_t start, UErrorCode &status) {
|
|||
if (U_FAILURE(status)) {
|
||||
return FALSE;
|
||||
}
|
||||
if (start < 0 || start >= fInputLength) {
|
||||
int32_t inputLen = fInput->length();
|
||||
if (start < 0 || start >= inputLen) {
|
||||
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -275,7 +271,7 @@ UBool RegexMatcher::find(int32_t start, UErrorCode &status) {
|
|||
// TODO: optimize the search for a leading literal string.
|
||||
// TODO: optimize based on the minimum length of a possible match
|
||||
int32_t startPos;
|
||||
for (startPos=start; startPos < fInputLength; startPos=fInput->moveIndex32(startPos, 1)) {
|
||||
for (startPos=start; startPos < inputLen; startPos=fInput->moveIndex32(startPos, 1)) {
|
||||
MatchAt(startPos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return FALSE;
|
||||
|
@ -351,7 +347,7 @@ UBool RegexMatcher::matches(UErrorCode &status) {
|
|||
}
|
||||
reset();
|
||||
MatchAt(0, status);
|
||||
UBool success = (fMatch && fMatchEnd==fInputLength);
|
||||
UBool success = (fMatch && fMatchEnd==fInput->length());
|
||||
return success;
|
||||
}
|
||||
|
||||
|
@ -427,8 +423,6 @@ RegexMatcher &RegexMatcher::reset() {
|
|||
|
||||
RegexMatcher &RegexMatcher::reset(const UnicodeString &input) {
|
||||
fInput = &input;
|
||||
fInputLength = input.length();
|
||||
fInputUC = fInput->getBuffer();
|
||||
reset();
|
||||
return *this;
|
||||
}
|
||||
|
@ -511,7 +505,7 @@ UBool RegexMatcher::isWordBoundary(int32_t pos) {
|
|||
|
||||
// Determine whether char c at current position is a member of the word set of chars.
|
||||
// If we're off the end of the string, behave as though we're not at a word char.
|
||||
if (pos < fInputLength) {
|
||||
if (pos < fInput->length()) {
|
||||
UChar32 c = fInput->char32At(pos);
|
||||
int8_t ctype = u_charType(c);
|
||||
if (ctype==U_NON_SPACING_MARK || ctype==U_ENCLOSING_MARK) {
|
||||
|
@ -619,6 +613,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
const UChar *litText = fPattern->fLiteralText.getBuffer();
|
||||
UVector *sets = fPattern->fSets;
|
||||
int32_t inputLen = fInput->length();
|
||||
const UChar *inputBuf = fInput->getBuffer();
|
||||
|
||||
REStackFrame *fp = resetStack();
|
||||
int32_t frameSize = fPattern->fFrameSize;
|
||||
|
@ -663,9 +658,9 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
|
||||
|
||||
case URX_ONECHAR:
|
||||
if (fp->fInputIdx < fInputLength) {
|
||||
if (fp->fInputIdx < inputLen) {
|
||||
UChar32 c;
|
||||
U16_NEXT(fInputUC, fp->fInputIdx, fInputLength, c);
|
||||
U16_NEXT(inputBuf, fp->fInputIdx, inputLen, c);
|
||||
if (c == opValue) {
|
||||
break;
|
||||
}
|
||||
|
@ -691,7 +686,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
|
||||
int32_t stringEndIndex = fp->fInputIdx + stringLen;
|
||||
if (stringEndIndex <= inputLen &&
|
||||
u_strncmp(fInputUC+fp->fInputIdx, litText+stringStartIdx, stringLen) == 0) {
|
||||
u_strncmp(inputBuf+fp->fInputIdx, litText+stringStartIdx, stringLen) == 0) {
|
||||
// Success. Advance the current input position.
|
||||
fp->fInputIdx = stringEndIndex;
|
||||
} else {
|
||||
|
@ -792,7 +787,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
|
||||
case URX_BACKSLASH_D: // Test for decimal digit
|
||||
{
|
||||
if (fp->fInputIdx >= fInputLength) {
|
||||
if (fp->fInputIdx >= inputLen) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
break;
|
||||
}
|
||||
|
@ -822,7 +817,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
case URX_BACKSLASH_X: // Match combining character sequence
|
||||
{ // Closer to Grapheme cluster than to Perl \X
|
||||
// Fail if at end of input
|
||||
if (fp->fInputIdx >= fInputLength) {
|
||||
if (fp->fInputIdx >= inputLen) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
break;
|
||||
}
|
||||
|
@ -852,7 +847,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
break;
|
||||
}
|
||||
fp->fInputIdx = fInput->moveIndex32(fp->fInputIdx, 1);
|
||||
if (fp->fInputIdx >= fInputLength) {
|
||||
if (fp->fInputIdx >= inputLen) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -877,7 +872,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
// The high bit of the op value is a flag for the match polarity.
|
||||
// 0: success if input char is in set.
|
||||
// 1: success if input char is not in set.
|
||||
if (fp->fInputIdx >= fInputLength) {
|
||||
if (fp->fInputIdx >= inputLen) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
break;
|
||||
}
|
||||
|
@ -886,7 +881,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
opValue &= ~URX_NEG_SET;
|
||||
U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
|
||||
UChar32 c;
|
||||
U16_NEXT(fInputUC, fp->fInputIdx, fInputLength, c);
|
||||
U16_NEXT(inputBuf, fp->fInputIdx, inputLen, c);
|
||||
const UnicodeSet *s = fPattern->fStaticSets[opValue];
|
||||
if (s->contains(c)) {
|
||||
success = !success;
|
||||
|
@ -899,10 +894,10 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
|
||||
|
||||
case URX_SETREF:
|
||||
if (fp->fInputIdx < fInputLength) {
|
||||
if (fp->fInputIdx < inputLen) {
|
||||
// There is input left. Pick up one char and test it for set membership.
|
||||
UChar32 c;
|
||||
U16_NEXT(fInputUC, fp->fInputIdx, fInputLength, c);
|
||||
U16_NEXT(inputBuf, fp->fInputIdx, inputLen, c);
|
||||
U_ASSERT(opValue > 0 && opValue < sets->size());
|
||||
UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
|
||||
if (s->contains(c)) {
|
||||
|
@ -919,14 +914,14 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
case URX_DOTANY:
|
||||
{
|
||||
// . matches anything
|
||||
if (fp->fInputIdx >= fInputLength) {
|
||||
if (fp->fInputIdx >= inputLen) {
|
||||
// At end of input. Match failed. Backtrack out.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
break;
|
||||
}
|
||||
// There is input left. Advance over one char, unless we've hit end-of-line
|
||||
UChar32 c;
|
||||
U16_NEXT(fInputUC, fp->fInputIdx, fInputLength, c);
|
||||
U16_NEXT(inputBuf, fp->fInputIdx, inputLen, c);
|
||||
if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible
|
||||
(c == 0x0a || c==0x0d || c==0x0c || c==0x85 ||c==0x2028 || c==0x2029)) {
|
||||
// End of line in normal mode. . does not match.
|
||||
|
@ -941,7 +936,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
{
|
||||
// ., in dot-matches-all (including new lines) mode
|
||||
// . matches anything
|
||||
if (fp->fInputIdx >= fInputLength) {
|
||||
if (fp->fInputIdx >= inputLen) {
|
||||
// At end of input. Match failed. Backtrack out.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
break;
|
||||
|
@ -1119,7 +1114,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
break;
|
||||
}
|
||||
if ((fp->fInputIdx + len > inputLen) ||
|
||||
u_strncmp(fInputUC+groupStartIdx, fInputUC+fp->fInputIdx, len) != 0) {
|
||||
u_strncmp(inputBuf+groupStartIdx, inputBuf+fp->fInputIdx, len) != 0) {
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize); // FAIL, no match.
|
||||
} else {
|
||||
fp->fInputIdx += len; // Match. Advance current input position.
|
||||
|
@ -1149,7 +1144,37 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case URX_LA_START:
|
||||
{
|
||||
// Entering a lookahead block.
|
||||
// Save Stack Ptr, Input Pos.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
fData[opValue] = fStack->size();
|
||||
fData[opValue+1] = fp->fInputIdx;
|
||||
}
|
||||
break;
|
||||
|
||||
case URX_LA_END:
|
||||
{
|
||||
// Leaving a look-ahead block.
|
||||
// restore Stack Ptr, Input Pos to positions they had on entry to block.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
int32_t stackSize = fStack->size();
|
||||
int32_t newStackSize = fData[opValue];
|
||||
U_ASSERT(stackSize >= newStackSize);
|
||||
if (stackSize > newStackSize) {
|
||||
int32_t *newFP = fStack->getBuffer() + newStackSize - frameSize;
|
||||
int32_t i;
|
||||
for (i=0; i<frameSize; i++) {
|
||||
newFP[i] = ((int32_t *)fp)[i];
|
||||
}
|
||||
fp = (REStackFrame *)newFP;
|
||||
fStack->setSize(newStackSize);
|
||||
}
|
||||
fp->fInputIdx = fData[opValue+1];
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
// Trouble. The compiled pattern contains an entry with an
|
||||
|
|
|
@ -681,9 +681,7 @@ private:
|
|||
|
||||
const RegexPattern *fPattern;
|
||||
const UnicodeString *fInput;
|
||||
const UChar *fInputUC;
|
||||
|
||||
int32_t fInputLength;
|
||||
UBool fMatch; // True if the last match was successful.
|
||||
int32_t fMatchStart; // Position of the start of the most recent match
|
||||
int32_t fMatchEnd; // First position after the end of the most recent match
|
||||
|
|
|
@ -1294,8 +1294,6 @@ void RegexTest::Errors() {
|
|||
REGEX_ERR("(?-si) stuff", 1, 3, U_REGEX_UNIMPLEMENTED);
|
||||
|
||||
// Look-ahead, Look-behind
|
||||
REGEX_ERR("abc(?=xyz).*", 1, 6, U_REGEX_UNIMPLEMENTED); // look-ahead
|
||||
REGEX_ERR("abc(?!xyz).*", 1, 6, U_REGEX_UNIMPLEMENTED); // negated look-ahead
|
||||
REGEX_ERR("abc(?<=xyz).*", 1, 7, U_REGEX_UNIMPLEMENTED); // look-behind
|
||||
REGEX_ERR("abc(?<!xyz).*", 1, 7, U_REGEX_UNIMPLEMENTED); // negated look-behind
|
||||
REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX); // illegal construct
|
||||
|
@ -1699,10 +1697,10 @@ void RegexTest::PerlTests() {
|
|||
//
|
||||
UnicodeString resultString;
|
||||
UnicodeString perlExpr = fields[3];
|
||||
groupsMat->reset(perlExpr);
|
||||
cgMat->reset(perlExpr);
|
||||
|
||||
while (perlExpr.length() > 0) {
|
||||
groupsMat->reset(perlExpr);
|
||||
cgMat->reset(perlExpr);
|
||||
if (perlExpr.startsWith("$&")) {
|
||||
resultString.append(testMat->group(status));
|
||||
perlExpr.remove(0, 2);
|
||||
|
|
Loading…
Add table
Reference in a new issue