ICU-105 Regular Expressions initial check in

X-SVN-Rev: 10050
2025-04-10 07:39:16 +00:00 · 2002-10-22 00:09:32 +00:00 · 2002-10-22 00:09:32 +00:00 · 11891ab0d8
commit 11891ab0d8
parent c6a0025e15
18 changed files with 3437 additions and 7 deletions
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -942,6 +942,7 @@ private:
    friend class TransliterationRule;

    friend class RBBIRuleScanner;
+    friend class RegexCompile;

    /**
     * Constructs a set from the given pattern.  See the class description
--- a/icu4c/source/common/upropset.h
+++ b/icu4c/source/common/upropset.h
@ -63,7 +63,7 @@ class Hashtable;
 * @author Alan Liu
 * @internal
 */
-class UnicodePropertySet /* not : public UObject because all methods are static */ {
+class U_COMMON_API UnicodePropertySet /* not : public UObject because all methods are static */ {

 public:

--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@ -62,7 +62,8 @@ nultrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o anytrans.o \
 name2uni.o uni2name.o unitohex.o nortrans.o quant.o transreg.o \
 nfrs.o nfrule.o nfsubs.o rbnf.o esctrn.o unesctrn.o \
 funcrepl.o strrepl.o tridpars.o \
-ucurr.o
+ucurr.o \
+regexcmp.o rematch.o repattrn.o


 STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
--- a/icu4c/source/i18n/i18n.dsp
+++ b/icu4c/source/i18n/i18n.dsp
@ -55,8 +55,8 @@ BSC32=bscmake.exe
 # ADD BSC32 /nologo
 LINK32=link.exe
 # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
-# ADD LINK32 icuuc.lib /nologo /base:"0x4a900000" /dll /machine:I386 /out:"..\..\bin\icuin22.dll" /implib:"..\..\lib\icuin.lib" /libpath:"..\..\lib"
-# SUBTRACT LINK32 /pdb:none /debug
+# ADD LINK32 icuuc.lib /nologo /base:"0x4a900000" /dll /debug /machine:I386 /out:"..\..\bin\icuin22.dll" /implib:"..\..\lib\icuin.lib" /libpath:"..\..\lib"
+# SUBTRACT LINK32 /pdb:none

 !ELSEIF  "$(CFG)" == "i18n - Win32 Debug"

@ -279,10 +279,22 @@ SOURCE=.\rbt_set.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\regexcmp.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\rematch.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\remtrans.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\repattrn.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\search.cpp
 # End Source File
 # Begin Source File
@ -1376,6 +1388,65 @@ SOURCE=.\rbt_set.h
 # End Source File
 # Begin Source File

+SOURCE=.\unicode\regex.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\unicode\regex.h
+
+"..\..\include\unicode\regex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\regex.h
+
+"..\..\include\unicode\regex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win64 Release"
+
+# Begin Custom Build
+InputPath=.\unicode\regex.h
+
+"..\..\include\unicode\regex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win64 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\regex.h
+
+"..\..\include\unicode\regex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\regexcmp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\regexcst.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\regeximp.h
+# End Source File
+# Begin Source File
+
 SOURCE=.\remtrans.h
 # End Source File
 # Begin Source File
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@ -0,0 +1,962 @@
+
+//
+//  file:  regexcmp.cpp
+//
+//  Copyright (C) 2002, International Business Machines Corporation and others.
+//  All Rights Reserved.
+//
+//  This file contains the ICU regular expression scanner, which is responsible
+//  for preprocessing a regular expression pattern into the tokenized form that
+//  is used by the match finding engine.
+//
+
+
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/uchriter.h"
+#include "unicode/parsepos.h"
+#include "unicode/parseerr.h"
+#include "unicode/regex.h"
+#include "regeximp.h"
+#include "upropset.h"
+#include "cmemory.h"
+#include "cstring.h"
+
+#include "stdio.h"    // TODO:  Get rid of this
+
+#include "regexcst.h"   // Contains state table for the regex pattern parser.
+                       //   generated by a Perl script.
+#include "regexcmp.h"
+
+#include "uassert.h"
+
+
+U_NAMESPACE_BEGIN
+
+const char RegexCompile::fgClassID=0;
+static const int RESCAN_DEBUG = 0;
+
+//----------------------------------------------------------------------------------------
+//
+// Unicode Sets for each of the character classes needed for parsing a regex pattern.
+//               (Initialized with hex values for portability to EBCDIC based machines.
+//                Really ugly, but there's no good way to avoid it.)
+//
+//              The sets are referred to by name in the regexcst.txt, which is the
+//              source form of the state transition table.  These names are converted
+//              to indicies in regexcst.h by the perl state table building script.
+//              The indices are used to access the array gRuleSets.
+//
+//----------------------------------------------------------------------------------------
+
+// Characters that have no special meaning, and thus do not need to be escaped.  Expressed
+//    as the inverse of those needing escaping --  [^\*\?\+\[\(\)\{\}\^\$\|\\\.]
+static const UChar gRuleSet_rule_char_pattern[]       = { 
+ //   [    ^      \     *     \     ?     \     +     \     [     \     (     /     )
+    0x5b, 0x5e, 0x5c, 0x2a, 0x5c, 0x3f, 0x5c, 0x2b, 0x5c, 0x5b, 0x5c, 0x28, 0x5c, 0x29, 
+ //   \     {    \     }     \     ^     \     $     \     |     \     \     \     .     ]   
+    0x5c, 0x7b,0x5c, 0x7d, 0x5c, 0x5e, 0x5c, 0x24, 0x5c, 0x7c, 0x5c, 0x5c, 0x5c, 0x2e, 0x5d, 0};
+
+
+static const UChar gRuleSet_name_char_pattern[]       = {
+//    [    _      \    p     {     L      }     \     p     {    N      }     ]
+    0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0};
+
+static const UChar gRuleSet_digit_char_pattern[] = {
+//    [    0      -    9     ]
+    0x5b, 0x30, 0x2d, 0x39, 0x5d, 0};
+
+static const UChar gRuleSet_name_start_char_pattern[] = {
+//    [    _      \    p     {     L      }     ]
+    0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5d, 0 };
+
+static const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00};  // "any"
+
+static UnicodeSet  *gRuleSets[10];         // Array of ptrs to the actual UnicodeSet objects.
+
+
+
+//----------------------------------------------------------------------------------------
+//
+//  Constructor.
+//
+//----------------------------------------------------------------------------------------
+RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
+{
+    fStatus             = &status;
+
+    fScanIndex = 0;
+    fNextIndex = 0;
+
+    fLineNum    = 1;
+    fCharNum    = 0;
+    fQuoteMode  = FALSE;
+    fFreeForm   = FALSE;
+
+    fMatchOpenParen  = -1;
+    fMatchCloseParen = -1;
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    //
+    //  Set up the constant Unicode Sets.
+    //    
+    if (gRuleSets[kRuleSet_rule_char-128] == NULL) {
+        //  TODO:  Make thread safe.
+        //  TODO:  Memory Cleanup on ICU shutdown.
+        gRuleSets[kRuleSet_rule_char-128]       = new UnicodeSet(gRuleSet_rule_char_pattern,       status);
+        gRuleSets[kRuleSet_white_space-128]     = new UnicodeSet(UnicodePropertySet::getRuleWhiteSpaceSet(status));
+        gRuleSets[kRuleSet_digit_char-128]      = new UnicodeSet(gRuleSet_digit_char_pattern,      status);
+        if (U_FAILURE(status)) {
+            delete gRuleSets[kRuleSet_rule_char-128];
+            delete gRuleSets[kRuleSet_white_space-128];
+            delete gRuleSets[kRuleSet_digit_char-128];
+            gRuleSets[kRuleSet_rule_char-128]   = NULL;
+            gRuleSets[kRuleSet_white_space-128] = NULL;
+            gRuleSets[kRuleSet_digit_char-128]  = NULL;
+            return;
+        }
+    }
+}
+
+
+
+//----------------------------------------------------------------------------------------
+//
+//  Destructor
+//
+//----------------------------------------------------------------------------------------
+RegexCompile::~RegexCompile() {
+}
+
+//---------------------------------------------------------------------------------
+//
+//  Compile regex pattern.   The state machine for rules parsing is here.
+//                         The state tables are hand-written in the file regexcst.txt,
+//                         and converted to the form used here by a perl
+//                         script regexcst.pl
+//
+//---------------------------------------------------------------------------------
+void    RegexCompile::compile(                    
+                         RegexPattern &rxp,          // User level patter object to receive
+                                                     //    the compiled pattern.
+                         const UnicodeString &pat,   // Source pat to be compiled.
+                         UParseError &pp,            // Error position info
+                         UErrorCode &e)              // Error Code
+{
+    fStatus             = &e;
+    fRXPat              = &rxp;
+    fParseErr           = &pp;
+    fStackPtr           = 0;
+    fStack[fStackPtr]   = 0;
+
+    if (U_FAILURE(*fStatus)) {
+        return;
+    }
+
+    // There should be no pattern stuff in the RegexPattern object.  They can not be reused.
+    U_ASSERT(fRXPat->fPattern.length() == 0);
+
+    // Prepare the RegexPattern object to receive the compiled pattern.
+    fRXPat->fPattern        = pat;
+
+
+    // Initialize the pattern scanning state machine
+    fPatternLength = pat.length();
+    uint16_t                state = 1;
+    const RegexTableEl      *tableEl;
+    nextChar(fC);                        // Fetch the first char from the pattern string.
+
+    //
+    // Main loop for the regex pattern parsing state machine.
+    //   Runs once per state transition.
+    //   Each time through optionally performs, depending on the state table,
+    //      - an advance to the the next pattern char
+    //      - an action to be performed.
+    //      - pushing or popping a state to/from the local state return stack.
+    //   file regexcst.txt is the source for the state table.  The logic behind
+    //     recongizing the pattern syntax is there, not here.
+    //
+    for (;;) {
+        //  Bail out if anything has gone wrong.
+        //  Regex pattern parsing stops on the first error encountered.
+        if (U_FAILURE(*fStatus)) {
+            break;
+        }
+
+        U_ASSERT(state != 0);
+
+        // Find the state table element that matches the input char from the rule, or the
+        //    class of the input character.  Start with the first table row for this
+        //    state, then linearly scan forward until we find a row that matches the
+        //    character.  The last row for each state always matches all characters, so
+        //    the search will stop there, if not before.
+        //
+        tableEl = &gRuleParseStateTable[state];
+        if (RESCAN_DEBUG) {
+            printf("char, line, col = (\'%c\', %d, %d)    state=%s ",
+                fC.fChar, fLineNum, fCharNum, RegexStateNames[state]);
+        }
+
+        for (;;) {    // loop through table rows belonging to this state, looking for one
+                      //   that matches the current input char.
+            if (RESCAN_DEBUG) { printf(".");}
+            if (tableEl->fCharClass < 127 && fC.fQuoted == FALSE &&   tableEl->fCharClass == fC.fChar) {
+                // Table row specified an individual character, not a set, and
+                //   the input character is not quoted, and
+                //   the input character matched it.
+                break;
+            }
+            if (tableEl->fCharClass == 255) {
+                // Table row specified default, match anything character class.
+                break;
+            }
+            if (tableEl->fCharClass == 254 && fC.fQuoted)  {
+                // Table row specified "quoted" and the char was quoted.
+                break;
+            }
+            if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1)  {
+                // Table row specified eof and we hit eof on the input.
+                break;
+            }
+
+            if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 &&   // Table specs a char class &&
+                fC.fQuoted == FALSE &&                                      //   char is not escaped &&
+                fC.fChar != (UChar32)-1) {                                   //   char is not EOF
+                UnicodeSet *uniset = gRuleSets[tableEl->fCharClass-128];
+                if (uniset->contains(fC.fChar)) {
+                    // Table row specified a character class, or set of characters,
+                    //   and the current char matches it.
+                    break;
+                }
+            }
+
+            // No match on this row, advance to the next  row for this state,
+            tableEl++;
+        }
+        if (RESCAN_DEBUG) { printf("\n");}
+
+        //
+        // We've found the row of the state table that matches the current input
+        //   character from the rules string.
+        // Perform any action specified  by this row in the state table.
+        if (doParseActions((EParseAction)tableEl->fAction) == FALSE) {
+            // Break out of the state machine loop if the
+            //   the action signalled some kind of error, or
+            //   the action was to exit, occurs on normal end-of-rules-input.
+            break;
+        }
+
+        if (tableEl->fPushState != 0) {
+            fStackPtr++;
+            if (fStackPtr >= kStackSize) {
+                error(U_BRK_INTERNAL_ERROR);
+                printf("RegexCompile::parse() - state stack overflow.\n");
+                fStackPtr--;
+            }
+            fStack[fStackPtr] = tableEl->fPushState;
+        }
+
+        if (tableEl->fNextChar) {
+            nextChar(fC);
+        }
+
+        // Get the next state from the table entry, or from the
+        //   state stack if the next state was specified as "pop".
+        if (tableEl->fNextState != 255) {
+            state = tableEl->fNextState;
+        } else {
+            state = fStack[fStackPtr];
+            fStackPtr--;
+            if (fStackPtr < 0) {
+                error(U_BRK_INTERNAL_ERROR);
+                printf("RegexCompile::compile() - state stack underflow.\n");
+                fStackPtr++;
+            }
+        }
+
+    }
+
+}
+
+
+
+
+
+//----------------------------------------------------------------------------------------
+//
+//  doParseAction        Do some action during regex pattern parsing.
+//                       Called by the parse state machine.
+//
+//
+//----------------------------------------------------------------------------------------
+UBool RegexCompile::doParseActions(EParseAction action)
+{
+    UBool   returnVal = TRUE;
+
+    switch ((Regex_PatternParseAction)action) {
+
+    case doPatStart:
+        // Start of pattern compiles to:
+        //0   SAVE   2        Fall back to position of FAIL
+        //1   jmp    3
+        //2   FAIL            Stop if we ever reach here.
+        //3   NOP             Dummy, so start of pattern looks the same as
+        //                    the start of an ( grouping.
+        //4   NOP             Resreved, will be replaced by a save if there are
+        //                    OR | operators at the top level
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_STATE_SAVE, 2), *fStatus);
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_JMP,  3), *fStatus);
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_FAIL, 0), *fStatus);
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP,  0), *fStatus);
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP,  0), *fStatus);
+
+        fParenStack.push(-1, *fStatus);     // Begin a Paren Stack Frame
+        fParenStack.push( 3, *fStatus);     // Push location of first NOP
+        break;
+
+    case doPatFinish:
+        // We've scanned to the end of the pattern
+        //  The end of pattern compiles to:
+        //        URX_END
+        //    which will top the runtime match engine.
+        //  Encountering end of pattern also behaves like a close paren,
+        //   and forces fixups of the State Save at the beginning of the compiled pattern
+        //   and of any OR operations at the top level.
+        // 
+        handleCloseParen();
+        
+        // add the END operation to the compiled pattern.
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_END, 0), *fStatus);
+        
+        // Terminate the pattern compilation state machine.
+        returnVal = FALSE;
+        break;
+
+
+
+    case doOrOperator:
+        // Scanning a '|', as in (A|B)
+        {
+            // Insert a SAVE operation at the start of the pattern section preceding
+            //   this OR at this level.  This SAVE will branch the match forward
+            //   to the right hand side of the OR in the event that the left hand
+            //   side fails to match and backtracks.  Locate the position for the
+            //   save from the location on the top of the parentheses stack.
+            int32_t savePosition = fParenStack.popi();
+            int32_t op = fRXPat->fCompiledPat->elementAti(savePosition);
+            U_ASSERT(URX_TYPE(op) == URX_NOP);  // original contents of reserved location
+            op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1);
+            fRXPat->fCompiledPat->setElementAt(op, savePosition);
+
+            // Append an JMP operation into the compiled pattern.  The operand for
+            //  the OR will eventually be the location following the ')' for the
+            //  group.  This will be patched in later, when the ')' is encountered.
+            op = URX_BUILD(URX_JMP, 0);
+            fRXPat->fCompiledPat->addElement(op, *fStatus);
+
+            // Push the position of the newly added JMP op onto the parentheses stack.
+            // This registers if for fixup when this block's close paren is encountered.
+            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
+
+            // Append a NOP to the compiled pattern.  This is the slot reserved
+            //   for a SAVE in the event that there is yet another '|' following
+            //   this one.
+            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
+            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
+        }
+        break;
+
+
+    case doOpenCaptureParen:
+        // Open Paren.
+        //   Compile to a 
+        //      - NOP, which later may be replaced by a save-state if the
+        //         parenthesized group gets a * quantifier, followed by
+        //      - START_CAPTURE
+        //      - NOP, which may later be replaced by a save-state if there
+        //             is an '|' alternation within the parens.
+        {
+            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
+            fRXPat->fNumCaptureGroups++;
+            int32_t  cop = URX_BUILD(URX_START_CAPTURE, fRXPat->fNumCaptureGroups);
+            fRXPat->fCompiledPat->addElement(cop, *fStatus);
+            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
+
+            // On the Parentheses stack, start a new frame and add the postions
+            //   of the two NOPs.  Depending on what follows in the pattern, the
+            //   NOPs may be changed to SAVE_STATE or JMP ops, with a target
+            //   address of the end of the parenthesized group.
+            fParenStack.push(-2, *fStatus);           // Begin a new frame.
+            fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus);   // The first NOP
+            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP
+        }
+         break;
+
+    case doOpenNonCaptureParen:
+        // Open Paren.
+        break;
+
+    case doOpenAtomicParen:
+        // Open Paren.
+        break;
+
+    case doOpenLookAhead:
+        // Open Paren.
+        break;
+
+    case doOpenLookAheadNeg:
+        // Open Paren.
+        break;
+
+    case doOpenLookBehind:
+        // Open Paren.
+        break;
+
+    case doOpenLookBehindNeg:
+        // Open Paren.
+        break;
+
+    case doExprRParen:
+        break;
+
+    case doCloseParen:
+        handleCloseParen();
+        break;
+
+    case doNOP:
+        break;
+
+
+    case doBadOpenParenType:
+    case doRuleError:
+        error(U_BRK_RULE_SYNTAX);
+        returnVal = FALSE;
+        break;
+
+
+    case doPlus:
+        //  Normal '+'  compiles to
+        //     1.   stuff to be repeated  (already built)
+        //     2.   state-save  4
+        //     3.   jmp 1
+        //     4.   ...
+        {
+            int32_t   topLoc;        // location of item #1, the start of the stuff to repeat
+
+            if (fRXPat->fCompiledPat->size() == fMatchCloseParen)    
+            {
+                // The thing being repeated (item 1) is a parenthesized block.
+                //   Pick up the location of the top of the block.
+                topLoc = fMatchOpenParen+1;   
+            } else {
+                // Repeating just a single item, the last thing in the compiled patternn so far.
+                topLoc = fRXPat->fCompiledPat->size()-1;
+            }
+
+            // Locate the position in the compiled pattern where the match will continue
+            //   after completing the +   (4 in the comment above)
+            int32_t continueLoc = fRXPat->fCompiledPat->size()+2;
+
+            // Emit the STATE_SAVE
+            int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
+            fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
+
+            // Emit the JMP
+            int32_t jmpOp = URX_BUILD(URX_JMP, topLoc);
+            fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
+        }
+        break;
+
+    case doOpt:
+        // Normal (greedy) ? quantifier.
+        //  Compiles to
+        //     1. state save 3
+        //     2.    body of optional stuff
+        //     3. ...
+        // Insert the state save into the compiled pattern, and we're done.
+        {
+            int32_t   saveStateLoc = blockTopLoc();      
+            int32_t   saveStateOp  = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size());
+            fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
+        }
+        break;
+
+
+
+    case doStar:
+        // Normal (greedy) * quantifier.
+        // Compiles to
+        //       1.   STATE_SAVE   3
+        //       2.      body of stuff being iterated over
+        //       3.   JMP  0
+        //       4.   ...
+        //
+        { 
+            // location of item #1, the STATE_SAVE
+            int32_t   saveStateLoc = blockTopLoc();       
+
+            // Locate the position in the compiled pattern where the match will continue
+            //   after completing the *.   (4 in the comment above)
+            int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
+
+            // Put together the save state op store it into the compiled code.
+            int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
+            fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
+
+            // Append the URX_JMP operation to the compiled pattern.  Its target
+            // is the locaton of the state-save, above.
+            int32_t jmpOp = URX_BUILD(URX_JMP, saveStateLoc);
+            fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
+        }
+        break;
+
+
+    case doStartString:
+        // We've just scanned a single "normal" character from the pattern,
+        // which is a character without special meaning that will need to be
+        // matched literally.   Save it away.  It may be the start of a string.
+        {
+            fStringOpStart = fRXPat->fLiteralText.length();
+            fRXPat->fLiteralText.append(fC.fChar);
+            break;
+        }
+
+    case doStringChar:
+        // We've just scanned a "normal" character from the pattern, which now
+        //   needs to be appended the the literal match string being that is
+        //   already being assembled.
+        {
+            fRXPat->fLiteralText.append(fC.fChar);
+            break;
+        }
+
+
+
+    case doSplitString:
+        // We've just peeked at a quantifier, e.g. a *, following a scanned string.
+        //   Separate the last character from the string, because the quantifier
+        //   only applies to it, not to the entire string.  Emit into the compiled
+        //   pattern:
+        //      -  string chars[0..n-2]     (as a string, assuming more than one char)
+        //      -  string char [n-1]        (as a single character)
+        {
+            // Locate the positions of the last and next-to-last characters
+            //  in the string.  Requires a bit of futzing around to account for
+            //  surrogate pairs, since we want 32 bit code points, not 16 bit code units.
+            int32_t  strLength = fRXPat->fLiteralText.length() - fStringOpStart;
+            U_ASSERT(strLength > 0);
+            int32_t  lastCharIdx = fRXPat->fLiteralText.length()-1;
+            lastCharIdx = fRXPat->fLiteralText.getChar32Start(lastCharIdx);
+            int32_t nextToLastCharIdx = lastCharIdx-1;
+            if (nextToLastCharIdx > fStringOpStart) {
+                nextToLastCharIdx = fRXPat->fLiteralText.getChar32Start(nextToLastCharIdx);
+            }
+
+            if (nextToLastCharIdx > fStringOpStart) {
+                // The string contains three or more code units.
+                // emit the first through the next-to-last as a string.
+                int32_t  stringToken = URX_BUILD(URX_STRING, fStringOpStart);
+                fRXPat->fCompiledPat->addElement(stringToken, *fStatus);
+                stringToken = URX_BUILD(URX_STRING_LEN, lastCharIdx - fStringOpStart);
+                fRXPat->fCompiledPat->addElement(stringToken, *fStatus);
+            }
+            else if (nextToLastCharIdx == fStringOpStart) {
+                // The string contains exactly two code units.
+                // emit the first into the compiled pattern as a single char
+                UChar32  c = fRXPat->fLiteralText.char32At(nextToLastCharIdx);
+                int32_t  charToken = URX_BUILD(URX_ONECHAR, c);
+                fRXPat->fCompiledPat->addElement(charToken, *fStatus);
+            }
+            // In all cases emit the last char as a single character.
+            UChar32  c = fRXPat->fLiteralText.char32At(lastCharIdx);
+            int32_t  charToken = URX_BUILD(URX_ONECHAR, c);
+            fRXPat->fCompiledPat->addElement(charToken, *fStatus);
+        }
+        break;
+
+    case doEndString:
+        // We have reached the end of a literal string in the pattern.
+        // Emit the string token into the compiled pattern, or if the string
+        //   has only one character, emit the single character token instead.
+        {
+            int32_t   strLength = fRXPat->fLiteralText.length() - fStringOpStart;
+            U_ASSERT(strLength > 0);  
+            int32_t  lastCharIdx = fRXPat->fLiteralText.length()-1;
+            lastCharIdx = fRXPat->fLiteralText.getChar32Start(lastCharIdx);
+            if (lastCharIdx == fStringOpStart) {
+                // The string contains exactly one character.
+                //  Emit it into the compiled pattern as a single char.
+                int32_t  charToken = URX_BUILD(URX_ONECHAR, fRXPat->fLiteralText.char32At(fStringOpStart));
+                fRXPat->fCompiledPat->addElement(charToken, *fStatus);
+            } else {
+                // The string contains two or more chars.  Emit as a string.
+                // Compiled string consumes two tokens in the compiled pattern, one
+                //   for the index of the start-of-string, and one for the length.
+                int32_t  stringToken = URX_BUILD(URX_STRING, fStringOpStart);
+                fRXPat->fCompiledPat->addElement(stringToken, *fStatus);
+                stringToken = URX_BUILD(URX_STRING_LEN, strLength);
+                fRXPat->fCompiledPat->addElement(stringToken, *fStatus);
+            }
+        }
+        break;
+
+
+            
+    case doDotAny:
+        // scanned a ".",  match any single character.
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOTANY, 0), *fStatus);
+        break;
+
+
+    case doExprFinished:
+        break;
+
+    case doExit:
+        returnVal = FALSE;
+        break;
+
+    case doScanUnicodeSet:
+        {
+            UnicodeSet *theSet = scanSet();
+            if (theSet == NULL) {
+                break;
+            }
+            if (theSet->size() > 1) {
+                //  The set contains two or more chars.
+                //  Put it into the compiled pattern as a set.
+                int32_t setNumber = fRXPat->fSets->size();
+                fRXPat->fSets->addElement(theSet, *fStatus);
+                int32_t setOp = URX_BUILD(URX_SETREF, setNumber);
+                fRXPat->fCompiledPat->addElement(setOp, *fStatus);
+            }
+            else
+            {
+                // The set contains only a single code point.  Put it into
+                //   the compiled pattern as a single char operation rather
+                //   than a set, and discard the set itself.
+                UChar32  c = theSet->charAt(0);
+                if (c == -1) {
+                    // Set contained no chars.  Stuff an invalid char that can't match.
+                    c = 0x1fffff;
+                }
+                int32_t  charToken = URX_BUILD(URX_ONECHAR, c);
+                fRXPat->fCompiledPat->addElement(charToken, *fStatus);
+                delete theSet;
+            }
+        }
+        break;
+
+    default:
+        error(U_BRK_INTERNAL_ERROR);
+        returnVal = FALSE;
+        break;
+    }
+    return returnVal;
+};
+
+
+//------------------------------------------------------------------------------
+//
+//   blockTopLoc()          Find or create a location in the compiled pattern
+//                          at the start of the operation or block that has
+//                          just been compiled.  Needed when a quantifier (* or
+//                          whatever) appears, and we need to add an operation
+//                          at the start of the thing being quantified.
+//
+//                          (Parenthesized Blocks) have a slot with a NOP that
+//                          is reserved for this purpose.  .* or similar don't
+//                          and a slot needs to be added.
+//
+//------------------------------------------------------------------------------
+int32_t   RegexCompile::blockTopLoc() {
+    int32_t   theLoc;
+    if (fRXPat->fCompiledPat->size() == fMatchCloseParen)    
+    {
+        // The item just processed is a parenthesized block.
+        theLoc = fMatchOpenParen;   // A slot is already reserved for us.
+        U_ASSERT(theLoc > 0);
+        uint32_t  opAtTheLoc = fRXPat->fCompiledPat->elementAti(theLoc);
+        U_ASSERT(URX_TYPE(opAtTheLoc) == URX_NOP);
+    }
+    else {
+        // Item just compiled is a single thing, a ".", or a single char, or a set reference.
+        // No slot for STATE_SAVE was pre-reserved in the compiled code.
+        // We need to make space now.
+        theLoc = fRXPat->fCompiledPat->size()-1;
+        int32_t opAtTheLoc = fRXPat->fCompiledPat->elementAti(theLoc);
+        int32_t prevType = URX_TYPE(opAtTheLoc);
+        U_ASSERT(prevType==URX_ONECHAR || prevType==URX_SETREF || prevType==URX_DOTANY);
+        int32_t  nop = URX_BUILD(URX_NOP, 0);
+        fRXPat->fCompiledPat->insertElementAt(nop, theLoc, *fStatus);
+    }
+    return theLoc;
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+//    handleCloseParen      When compiling a close paren, we need to go back
+//                          and fix up any JMP or SAVE operations within the
+//                          parenthesized block that need to target the end
+//                          of the block.  The locations of these are kept on
+//                          the paretheses stack.
+//
+//                          This function is called both when encountering a
+//                          real ) and at the end of the pattern.
+//
+//-------------------------------------------------------------------------------
+void  RegexCompile::handleCloseParen() {
+    int32_t   patIdx;
+    int32_t   patOp;
+    U_ASSERT(fParenStack.size() >= 1);
+    
+    // Fixup any operations within the just-closed parenthesized group
+    //    that need to reference the end of the (block).
+    //    (The first one on popped from the stack is an unused slot for
+    //     alternation (OR) state save, but applying the fixup to it does no harm.)
+    for (;;) {
+        patIdx = fParenStack.popi();
+        if (patIdx < 0) {
+            break;
+        }
+        U_ASSERT(patIdx>0 && patIdx <= fRXPat->fCompiledPat->size());
+        patOp = fRXPat->fCompiledPat->elementAti(patIdx);
+        U_ASSERT(URX_VAL(patOp) == 0);          // Branch target for JMP should not be set.
+        patOp |= fRXPat->fCompiledPat->size();  // Set it now.
+        fRXPat->fCompiledPat->setElementAt(patOp, patIdx);
+        fMatchOpenParen     = patIdx;
+    }
+    
+    // DO any additional fixups, depending on the specific kind of
+    // parentesized grouping this is
+    
+    switch (patIdx) {
+    case -1:
+        // No additional fixups required.
+        //   This is the case with most kinds of groupings.
+        break;
+    case -2:
+        // Capturing Parentheses.  
+        //   Insert a End Capture op into the pattern.
+        //   Grab the group number from the start capture op
+        //      and put it into the end-capture op.
+        {
+            int32_t   captureOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
+            U_ASSERT(URX_TYPE(captureOp) == URX_START_CAPTURE);
+            int32_t   captureGroupNumber = URX_VAL(captureOp);
+            U_ASSERT(captureGroupNumber > 0);
+            int32_t   endCaptureOp = URX_BUILD(URX_END_CAPTURE, captureGroupNumber);
+            fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);
+        }
+        break;
+    default:
+        U_ASSERT(FALSE);
+    }
+
+    // remember the next location in the compiled pattern.
+    // The compilation of Quantifiers will look at this to see whether its looping
+    //   over a parenthesized block or a single item
+    fMatchCloseParen = fRXPat->fCompiledPat->size();
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+//  Error         Report a rule parse error.
+//                Only report it if no previous error has been recorded.
+//
+//----------------------------------------------------------------------------------------
+void RegexCompile::error(UErrorCode e) {
+    if (U_SUCCESS(*fStatus)) {
+        *fStatus = e;
+        fParseErr->line  = fLineNum;
+        fParseErr->offset = fCharNum;
+        fParseErr->preContext[0] = 0;
+        fParseErr->preContext[0] = 0;
+    }
+}
+
+
+
+
+
+
+
+
+
+//
+//  Assorted Unicode character constants.
+//     Numeric because there is no portable way to enter them as literals.
+//     (Think EBCDIC).
+//
+static const UChar      chCR        = 0x0d;      // New lines, for terminating comments.
+static const UChar      chLF        = 0x0a;
+static const UChar      chNEL       = 0x85;      //    NEL newline variant
+static const UChar      chLS        = 0x2028;    //    Unicode Line Separator
+static const UChar      chApos      = 0x27;      //  single quote, for quoted chars.
+static const UChar      chPound     = 0x23;      // '#', introduces a comment.
+static const UChar      chBackSlash = 0x5c;      // '\'  introduces a char escape
+static const UChar      chLParen    = 0x28;
+static const UChar      chRParen    = 0x29;
+
+
+//----------------------------------------------------------------------------------------
+//
+//  nextCharLL    Low Level Next Char from the regex pattern.
+//                Get a char from the string,
+//                keep track of input position for error reporting.
+//
+//----------------------------------------------------------------------------------------
+UChar32  RegexCompile::nextCharLL() {
+    UChar32       ch;
+    UnicodeString &pattern = fRXPat->fPattern;
+
+    if (fPatternLength==0 || fNextIndex >= fPatternLength) {
+        return (UChar32)-1;
+    }
+    ch         = pattern.char32At(fNextIndex);
+    fNextIndex = pattern.moveIndex32(fNextIndex, 1);
+
+    if (ch == chCR ||
+        ch == chNEL ||
+        ch == chLS   ||
+        ch == chLF && fLastChar != chCR) {
+        // Character is starting a new line.  Bump up the line number, and
+        //  reset the column to 0.
+        fLineNum++;
+        fCharNum=0;
+        if (fQuoteMode) {
+            error(U_BRK_NEW_LINE_IN_QUOTED_STRING);
+            fQuoteMode = FALSE;
+        }
+    }
+    else {
+        // Character is not starting a new line.  Except in the case of a
+        //   LF following a CR, increment the column position.
+        if (ch != chLF) {
+            fCharNum++;
+        }
+    }
+    fLastChar = ch;
+    return ch;
+}
+
+
+//---------------------------------------------------------------------------------
+//
+//   nextChar     for rules scanning.  At this level, we handle stripping
+//                out comments and processing backslash character escapes.
+//                The rest of the rules grammar is handled at the next level up.
+//
+//---------------------------------------------------------------------------------
+void RegexCompile::nextChar(RegexPatternChar &c) {
+
+    // Unicode Character constants needed for the processing done by nextChar(),
+    //   in hex because literals wont work on EBCDIC machines.
+
+    fScanIndex = fNextIndex;
+    c.fChar    = nextCharLL();
+    c.fQuoted  = FALSE;
+
+    if (fQuoteMode) {
+        c.fQuoted = TRUE;
+    }
+    else
+    {
+        // We are not in a 'quoted region' of the source.
+        //
+        if (c.fChar == chPound) {
+            // Start of a comment.  Consume the rest of it.
+            //  The new-line char that terminates the comment is always returned.
+            //  It will be treated as white-space, and serves to break up anything
+            //    that might otherwise incorrectly clump together with a comment in
+            //    the middle (a variable name, for example.)
+            for (;;) {
+                c.fChar = nextCharLL();
+                if (c.fChar == (UChar32)-1 ||  // EOF
+                    c.fChar == chCR     ||
+                    c.fChar == chLF     ||
+                    c.fChar == chNEL    ||
+                    c.fChar == chLS)       {break;}
+            }
+        }
+        if (c.fChar == (UChar32)-1) {
+            return;
+        }
+
+        //
+        //  check for backslash escaped characters.
+        //  Use UnicodeString::unescapeAt() to handle them.
+        //
+        if (c.fChar == chBackSlash) {
+            c.fQuoted = TRUE;
+            int32_t startX = fNextIndex;
+            c.fChar = fRXPat->fPattern.unescapeAt(fNextIndex);
+            if (fNextIndex == startX) {
+                error(U_BRK_HEX_DIGITS_EXPECTED);
+            }
+            fCharNum += fNextIndex-startX;
+        }
+    }
+    // putc(c.fChar, stdout);
+}
+
+
+
+//---------------------------------------------------------------------------------
+//
+//  scanSet    Construct a UnicodeSet from the text at the current scan
+//             position.  Advance the scan position to the first character
+//             after the set.
+//
+//             The scan position is normally under the control of the state machine
+//             that controls pattern parsing.  UnicodeSets, however, are parsed by
+//             the UnicodeSet constructor, not by the Regex pattern parser.  
+//
+//---------------------------------------------------------------------------------
+UnicodeSet *RegexCompile::scanSet() {
+    UnicodeSet    *uset = NULL;
+    ParsePosition  pos;
+    int            startPos;
+    int            i;
+
+    if (U_FAILURE(*fStatus)) {
+        return NULL;
+    }
+
+    pos.setIndex(fScanIndex);
+    startPos = fScanIndex;
+    UErrorCode localStatus = U_ZERO_ERROR;
+    uset = new UnicodeSet(fRXPat->fPattern, pos,
+                         localStatus);
+    if (U_FAILURE(localStatus)) {
+        //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
+        //         UnicodeSet appears to not be reporting correctly at this time.
+        printf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
+        error(localStatus);
+        delete uset;
+        return NULL;
+    }
+
+    // Advance the current scan postion over the UnicodeSet.
+    //   Don't just set fScanIndex because the line/char positions maintained
+    //   for error reporting would be thrown off.
+    i = pos.getIndex();
+    for (;;) {
+        if (fNextIndex >= i) {
+            break;
+        }
+        nextCharLL();
+    }
+
+    return uset;
+};
+
+
+U_NAMESPACE_END
+
--- a/icu4c/source/i18n/regexcmp.h
+++ b/icu4c/source/i18n/regexcmp.h
@ -0,0 +1,157 @@
+//
+//  regexcmp.h
+//
+//  Copyright (C) 2002, International Business Machines Corporation and others.
+//  All Rights Reserved.
+//
+//  This file contains declarations for the class RegexCompile and for compiled
+//  regular expression data format
+//
+
+
+#ifndef RBBISCAN_H
+#define RBBISCAN_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/uniset.h"
+#include "unicode/parseerr.h"
+#include "uhash.h"
+#include "uvector.h"
+
+
+
+U_NAMESPACE_BEGIN
+
+
+static const UBool REGEX_DEBUG = TRUE;
+
+//--------------------------------------------------------------------------------
+//
+//  class RegexCompile    does the lowest level, character-at-a-time
+//                        scanning of a regular expression.  
+//
+//                        The output of the scanner is a tokenized form
+//                        of the RE, plus prebuilt UnicodeSet objects for each
+//                        set of charcters that is referenced.
+//
+//--------------------------------------------------------------------------------
+static const int    kStackSize = 100;               // The size of the state stack for
+                                                    //   pattern parsing.  Corresponds roughly
+                                                    //   to the depth of parentheses nesting
+                                                    //   that is allowed in the rules.
+
+enum EParseAction {dummy01, dummy02};               // Placeholder enum for the specifier for
+                                                    //   actions that are specified in the
+                                                    //   rule parsing state table.
+struct  RegexTableEl;
+class   RegexPattern;
+
+
+class RegexCompile : public UObject {
+public:
+
+    struct RegexPatternChar {
+        UChar32             fChar;
+        UBool               fQuoted;
+    };
+
+    RegexCompile(UErrorCode &e);
+    
+    void       compile(RegexPattern &rxp, const UnicodeString &pat, UParseError &pp, UErrorCode &e);
+
+
+    virtual    ~RegexCompile();
+
+    void        nextChar(RegexPatternChar &c);      // Get the next char from the input stream.
+
+    UBool       push(const RegexPatternChar &c);    // Push (unget) one character.
+                                                    //   Only a single character may be pushed.
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @draft ICU 2.2
+     */
+    virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @draft ICU 2.2
+     */
+    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
+
+private:
+
+    UBool       doParseActions(EParseAction a);
+    void        error(UErrorCode e);                   // error reporting convenience function.
+
+    UChar32     nextCharLL();
+    UnicodeSet  *scanSet();
+    void        handleCloseParen();
+    int32_t     blockTopLoc();                       // Locate a position in the compiled pattern
+                                                     //  at the top of the just completed block
+                                                     //  or operation.
+
+
+    UErrorCode                    *fStatus;
+    RegexPattern                  *fRXPat;
+    UParseError                   *fParseErr;
+
+    int32_t                       fScanIndex;        // Index of current character being processed
+                                                     //   in the rule input string.
+    int32_t                       fNextIndex;        // Index of the next character, which
+                                                     //   is the first character not yet scanned.
+    UBool                         fQuoteMode;        // Scan is in a quoted region
+    UBool                         fFreeForm;         // Scan mode is free-form, ignore spaces.
+    int                           fLineNum;          // Line number in input file.
+    int                           fCharNum;          // Char position within the line.
+    UChar32                       fLastChar;         // Previous char, needed to count CR-LF
+                                                     //   as a single line, not two.
+
+    RegexPatternChar              fC;                // Current char for parse state machine
+                                                     //   processing.
+
+    int32_t                       fStringOpStart;    // While a literal string is being scanned
+                                                     //   holds the start index within RegexPattern.
+                                                     //   fLiteralText where the string is being stored.
+
+    RegexTableEl                  **fStateTable;     // State Transition Table for regex Rule
+                                                     //   parsing.  index by p[state][char-class]
+
+    uint16_t                      fStack[kStackSize];  // State stack, holds state pushes
+    int                           fStackPtr;           //  and pops as specified in the state
+                                                       //  transition rules.
+
+    int32_t                       fPatternLength;    // Length of the input pattern string.
+
+    UStack                        fParenStack;       // parentheses stack.  Each frame consists of
+                                                     //   the positions of compiled pattern operations
+                                                     //   needing fixup, followed by negative vallue.  The  
+                                                     //   first entry in each frame is the position of the
+                                                     //   spot reserved for use when a quantifier
+                                                     //   needs to add a SAVE at the start of a (block)
+                                                     //   The negative value (-1, -2,...) indicates
+                                                     //   the kind of paren that opened the frame.  Some
+                                                     //   need special handling on close.
+
+
+    int32_t                       fMatchOpenParen;   // The position in the compiled pattern
+                                                     //   of the slot reserved for a state save
+                                                     //   at the start of the most recently processed
+                                                     //   parenthesized block.
+    int32_t                       fMatchCloseParen;  // The position in the pattern of the first
+                                                     //   location after the most recently processed
+                                                     //   parenthesized block.
+
+    /**
+     * The address of this static class variable serves as this class's ID
+     * for ICU "poor man's RTTI".
+     */
+    static const char fgClassID;
+};
+
+U_NAMESPACE_END
+
+#endif
--- a/icu4c/source/i18n/regexcst.h
+++ b/icu4c/source/i18n/regexcst.h
@ -0,0 +1,220 @@
+//---------------------------------------------------------------------------------
+//
+// Generated Header File.  Do not edit by hand.
+//    This file contains the state table for the ICU Regular Expression Pattern Parser
+//    It is generated by the Perl script "regexcst.pl" from
+//    the rule parser state definitions file "regexcst.txt".
+//
+//   Copyright (C) 2002 International Business Machines Corporation 
+//   and others. All rights reserved.  
+//
+//---------------------------------------------------------------------------------
+#ifndef RBBIRPT_H
+#define RBBIRPT_H
+
+U_NAMESPACE_BEGIN
+//
+// Character classes for regex pattern scanning.
+//
+    static const uint8_t kRuleSet_digit_char = 128;
+    static const uint8_t kRuleSet_white_space = 129;
+    static const uint8_t kRuleSet_rule_char = 130;
+
+
+enum Regex_PatternParseAction {
+    doExprOrOperator,
+    doCloseParen,
+    doTagValue,
+    doOrOperator,
+    doOpenCaptureParen,
+    doBadOpenParenType,
+    doRuleError,
+    doStartString,
+    doNGOpt,
+    doPossesiveStar,
+    doOpenLookBehind,
+    doExprRParen,
+    doStar,
+    doPossesivePlus,
+    doNGStar,
+    doOpenLookAheadNeg,
+    doPlus,
+    doOpenNonCaptureParen,
+    doNGPlus,
+    doPatFinish,
+    doIntervalMinValue,
+    doIntervalDigit,
+    doPossesiveOpt,
+    doOpt,
+    doOpenAtomicParen,
+    doStringChar,
+    doOpenLookAhead,
+    doNumberExpectedError,
+    doDotAny,
+    doExprFinished,
+    doScanUnicodeSet,
+    doNOP,
+    doExit,
+    doPatStart,
+    doEndString,
+    doOpenLookBehindNeg,
+    doSplitString,
+    rbbiLastAction};
+
+//-------------------------------------------------------------------------------
+//
+//  RegexTableEl       represents the structure of a row in the transition table
+//                     for the pattern parser state machine.
+//-------------------------------------------------------------------------------
+struct RegexTableEl {
+    Regex_PatternParseAction      fAction;
+    uint8_t                       fCharClass;       // 0-127:    an individual ASCII character
+                                                    // 128-255:  character class index
+    uint8_t                       fNextState;       // 0-250:    normal next-state numbers
+                                                    // 255:      pop next-state from stack.
+    uint8_t                       fPushState;
+    UBool                         fNextChar;
+};
+
+static const struct RegexTableEl gRuleParseStateTable[] = {
+    {doNOP, 0, 0, 0, TRUE}
+    , {doPatStart, 255, 3, 2, FALSE}     //  1      start
+    , {doPatFinish, 255, 2,0,  FALSE}     //  2      finish
+    , {doStartString, 254, 10,0,  TRUE}     //  3      term
+    , {doStartString, 130, 10,0,  TRUE}     //  4 
+    , {doScanUnicodeSet, 91 /* [ */, 17,0,  TRUE}     //  5 
+    , {doNOP, 40 /* ( */, 29, 17, TRUE}     //  6 
+    , {doDotAny, 46 /* . */, 17,0,  TRUE}     //  7 
+    , {doNOP, 253, 255,0,  FALSE}     //  8 
+    , {doRuleError, 255, 67,0,  FALSE}     //  9 
+    , {doStringChar, 254, 10,0,  TRUE}     //  10      string
+    , {doStringChar, 130, 10,0,  TRUE}     //  11 
+    , {doSplitString, 63 /* ? */, 17,0,  FALSE}     //  12 
+    , {doSplitString, 43 /* + */, 17,0,  FALSE}     //  13 
+    , {doSplitString, 42 /* * */, 17,0,  FALSE}     //  14 
+    , {doSplitString, 123 /* { */, 17,0,  FALSE}     //  15 
+    , {doEndString, 255, 17,0,  FALSE}     //  16 
+    , {doNOP, 42 /* * */, 40,0,  TRUE}     //  17      expr-quant
+    , {doNOP, 43 /* + */, 43,0,  TRUE}     //  18 
+    , {doNOP, 63 /* ? */, 46,0,  TRUE}     //  19 
+    , {doNOP, 255, 21,0,  FALSE}     //  20 
+    , {doNOP, 254, 3,0,  FALSE}     //  21      expr-cont
+    , {doNOP, 130, 3,0,  FALSE}     //  22 
+    , {doNOP, 91 /* [ */, 3,0,  FALSE}     //  23 
+    , {doNOP, 40 /* ( */, 3,0,  FALSE}     //  24 
+    , {doNOP, 46 /* . */, 3,0,  FALSE}     //  25 
+    , {doOrOperator, 124 /* | */, 3,0,  TRUE}     //  26 
+    , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  27 
+    , {doExprFinished, 255, 255,0,  FALSE}     //  28 
+    , {doNOP, 63 /* ? */, 31,0,  TRUE}     //  29      open-paren
+    , {doOpenCaptureParen, 255, 3, 17, FALSE}     //  30 
+    , {doOpenNonCaptureParen, 58 /* : */, 3, 17, TRUE}     //  31      open-paren-extended
+    , {doOpenAtomicParen, 62 /* > */, 3, 17, TRUE}     //  32 
+    , {doOpenLookAhead, 61 /* = */, 3, 21, TRUE}     //  33 
+    , {doOpenLookAheadNeg, 33 /* ! */, 3, 21, TRUE}     //  34 
+    , {doNOP, 60 /* < */, 37,0,  TRUE}     //  35 
+    , {doBadOpenParenType, 255, 67,0,  FALSE}     //  36 
+    , {doOpenLookBehind, 61 /* = */, 3, 21, TRUE}     //  37      open-paren-lookbehind
+    , {doOpenLookBehindNeg, 33 /* ! */, 3, 21, TRUE}     //  38 
+    , {doBadOpenParenType, 255, 67,0,  FALSE}     //  39 
+    , {doNGStar, 63 /* ? */, 21,0,  TRUE}     //  40      quant-star
+    , {doPossesiveStar, 43 /* + */, 21,0,  TRUE}     //  41 
+    , {doStar, 255, 21,0,  FALSE}     //  42 
+    , {doNGPlus, 63 /* ? */, 21,0,  TRUE}     //  43      quant-plus
+    , {doPossesivePlus, 43 /* + */, 21,0,  TRUE}     //  44 
+    , {doPlus, 255, 21,0,  FALSE}     //  45 
+    , {doNGOpt, 63 /* ? */, 21,0,  TRUE}     //  46      quant-opt
+    , {doPossesiveOpt, 43 /* + */, 21,0,  TRUE}     //  47 
+    , {doOpt, 255, 21,0,  FALSE}     //  48 
+    , {doNOP, 129, 49,0,  TRUE}     //  49      interval-open
+    , {doIntervalMinValue, 128, 52,0,  FALSE}     //  50 
+    , {doNumberExpectedError, 255, 67,0,  FALSE}     //  51 
+    , {doNOP, 129, 56,0,  TRUE}     //  52      interval-value
+    , {doNOP, 125 /* } */, 56,0,  FALSE}     //  53 
+    , {doIntervalDigit, 128, 52,0,  TRUE}     //  54 
+    , {doNumberExpectedError, 255, 67,0,  FALSE}     //  55 
+    , {doNOP, 129, 56,0,  TRUE}     //  56      interval-close
+    , {doTagValue, 125 /* } */, 59,0,  TRUE}     //  57 
+    , {doNumberExpectedError, 255, 67,0,  FALSE}     //  58 
+    , {doNOP, 254, 3,0,  FALSE}     //  59      expr-cont-no-interval
+    , {doNOP, 130, 3,0,  FALSE}     //  60 
+    , {doNOP, 91 /* [ */, 3,0,  FALSE}     //  61 
+    , {doNOP, 40 /* ( */, 3,0,  FALSE}     //  62 
+    , {doNOP, 46 /* . */, 3,0,  FALSE}     //  63 
+    , {doExprOrOperator, 124 /* | */, 3,0,  TRUE}     //  64 
+    , {doExprRParen, 41 /* ) */, 255,0,  TRUE}     //  65 
+    , {doExprFinished, 255, 255,0,  FALSE}     //  66 
+    , {doExit, 255, 67,0,  TRUE}     //  67      errorDeath
+ };
+static const char *RegexStateNames[] = {    0,
+     "start",
+     "finish",
+     "term",
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+     "string",
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+     "expr-quant",
+    0,
+    0,
+    0,
+     "expr-cont",
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+     "open-paren",
+    0,
+     "open-paren-extended",
+    0,
+    0,
+    0,
+    0,
+    0,
+     "open-paren-lookbehind",
+    0,
+    0,
+     "quant-star",
+    0,
+    0,
+     "quant-plus",
+    0,
+    0,
+     "quant-opt",
+    0,
+    0,
+     "interval-open",
+    0,
+    0,
+     "interval-value",
+    0,
+    0,
+    0,
+     "interval-close",
+    0,
+    0,
+     "expr-cont-no-interval",
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+     "errorDeath",
+    0};
+
+U_NAMESPACE_END
+#endif
--- a/icu4c/source/i18n/regexcst.pl
+++ b/icu4c/source/i18n/regexcst.pl
@ -0,0 +1,326 @@
+#
+#  regexcst.pl
+#            Compile the regular expression paser state table data into initialized C data.
+#            Usage:
+#                   cd icu/source/i18n
+#                   perl regexcst.pl < regexcst.txt > regexcst.h
+#
+#             The output file, regexcst.h, is included by some of the .cpp regex
+#             implementation files.   This perl script is NOT run as part
+#             of a normal ICU build.  It is run by hand when needed, and the
+#             regexcst.h generated file is put back into cvs.
+#
+#             See regexcst.txt for a description of the input format for this script.
+#
+#             This script is derived from rbbicst.pl, which peforms the same function
+#             for the Rule Based Break Iterator Rule Parser.  Perhaps they could be
+#             merged?
+#
+#*********************************************************************
+#   Copyright (C) 2002 International Business Machines Corporation   *
+#   and others. All rights reserved.                                 *
+#*********************************************************************
+
+
+$num_states = 1;         # Always the state number for the line being compiled.
+$line_num  = 0;          # The line number in the input file.
+
+$states{"pop"} = 255;    # Add the "pop"  to the list of defined state names.
+                         # This prevents any state from being labelled with "pop",
+                         #  and resolves references to "pop" in the next state field.
+
+line_loop: while (<>) {
+    chomp();
+    $line = $_;
+    @fields = split();
+    $line_num++;
+
+    # Remove # comments, which are any fields beginning with a #, plus all
+    #  that follow on the line.
+    for ($i=0; $i<@fields; $i++) {
+        if ($fields[$i] =~ /^#/) {
+            @fields = @fields[0 .. $i-1];
+            last;
+        }
+    }
+    # ignore blank lines, and those with no fields left after stripping comments..
+    if (@fields == 0) {
+        next;
+    }
+
+    #
+    # State Label:  handling.
+    #    Does the first token end with a ":"?  If so, it's the name  of a state.
+    #    Put in a hash, together with the current state number,
+    #        so that we can later look up the number from the name.
+    #
+    if (@fields[0] =~ /.*:$/) {
+        $state_name = @fields[0];
+        $state_name =~ s/://;        # strip off the colon from the state name.
+
+        if ($states{$state_name} != 0) {
+            print "  rbbicst: at line $line-num duplicate definition of state $state_name\n";
+        }
+        $states{$state_name} = $num_states;
+        $stateNames[$num_states] = $state_name;
+
+        # if the label was the only thing on this line, go on to the next line,
+        # otherwise assume that a state definition is on the same line and fall through.
+        if (@fields == 1) {
+            next line_loop;
+        }
+        shift @fields;                       # shift off label field in preparation
+                                             #  for handling the rest of the line.
+    }
+
+    #
+    # State Transition line.
+    #   syntax is this,
+    #       character   [n]  target-state  [^push-state]  [function-name]
+    #   where
+    #      [something]   is an optional something
+    #      character     is either a single quoted character e.g. '['
+    #                       or a name of a character class, e.g. white_space
+    #
+
+    $state_line_num[$num_states] = $line_num;   # remember line number with each state
+                                                #  so we can make better error messages later.
+    #
+    # First field, character class or literal character for this transition.
+    #
+    if ($fields[0] =~ /^'.'$/) {
+        # We've got a quoted literal character.
+        $state_literal_chars[$num_states] = $fields[0];
+        $state_literal_chars[$num_states] =~ s/'//g;
+    } else {
+        # We've got the name of a character class.
+        $state_char_class[$num_states] = $fields[0];
+        if ($fields[0] =~ /[\W]/) {
+            print "  rbbicsts:  at line $line_num, bad character literal or character class name.\n";
+            print "     scanning $fields[0]\n";
+            exit(-1);
+        }
+    }
+    shift @fields;
+
+    #
+    # do the 'n' flag
+    #
+    $state_flag[$num_states] = "FALSE";
+    if ($fields[0] eq "n") {
+        $state_flag[$num_states] = "TRUE";
+        shift @fields;
+    }
+
+    #
+    # do the destination state.
+    #
+    $state_dest_state[$num_states] = $fields[0];
+    if ($fields[0] eq "") {
+        print "  rbbicsts:  at line $line_num, destination state missing.\n";
+        exit(-1);
+    }
+    shift @fields;
+
+    #
+    # do the push state, if present.
+    #
+    if ($fields[0] =~ /^\^/) {
+        $fields[0] =~ s/^\^//;
+        $state_push_state[$num_states] = $fields[0];
+        if ($fields[0] eq "" ) {
+            print "  rbbicsts:  at line $line_num, expected state after ^ (no spaces).\n";
+            exit(-1);
+        }
+        shift @fields;
+    }
+
+    #
+    # Lastly, do the optional action name.
+    #
+    if ($fields[0] ne "") {
+        $state_func_name[$num_states] = $fields[0];
+        shift @fields;
+    }
+
+    #
+    #  There should be no fields left on the line at this point.
+    #
+    if (@fields > 0) {
+       print "  rbbicsts:  at line $line_num, unexpected extra stuff on input line.\n";
+       print "     scanning $fields[0]\n";
+   }
+   $num_states++;
+}
+
+#
+# We've read in the whole file, now go back and output the
+#   C source code for the state transition table.
+#
+# We read all states first, before writing anything,  so that the state numbers
+# for the destination states are all available to be written.
+#
+
+#
+# Make hashes for the names of the character classes and
+#      for the names of the actions that appeared.
+#
+for ($state=1; $state < $num_states; $state++) {
+    if ($state_char_class[$state] ne "") {
+        if ($charClasses{$state_char_class[$state]} == 0) {
+            $charClasses{$state_char_class[$state]} = 1;
+        }
+    }
+    if ($state_func_name[$state] eq "") {
+        $state_func_name[$state] = "doNOP";
+    }
+    if ($actions{$state_action_name[$state]} == 0) {
+        $actions{$state_func_name[$state]} = 1;
+    }
+}
+
+#
+# Check that all of the destination states have been defined
+#
+#
+$states{"exit"} = 0;              # Predefined state name, terminates state machine.
+for ($state=1; $state<$num_states; $state++) {
+   if ($states{$state_dest_state[$state]} == 0 && $state_dest_state[$state] ne "exit") {
+       print "Error at line $state_line_num[$state]: target state \"$state_dest_state[$state]\" is not defined.\n";
+       $errors++;
+   }
+   if ($state_push_state[$state] ne "" && $states{$state_push_state[$state]} == 0) {
+       print "Error at line $state_line_num[$state]: target state \"$state_push_state[$state]\" is not defined.\n";
+       $errors++;
+   }
+}
+
+die if ($errors>0);
+
+print "//---------------------------------------------------------------------------------\n";
+print "//\n";
+print "// Generated Header File.  Do not edit by hand.\n";
+print "//    This file contains the state table for the ICU Regular Expression Pattern Parser\n";
+print "//    It is generated by the Perl script \"regexcst.pl\" from\n";
+print "//    the rule parser state definitions file \"regexcst.txt\".\n";
+print "//\n";
+print "//   Copyright (C) 2002 International Business Machines Corporation \n";
+print "//   and others. All rights reserved.  \n";
+print "//\n";
+print "//---------------------------------------------------------------------------------\n";
+print "#ifndef RBBIRPT_H\n";
+print "#define RBBIRPT_H\n";
+print "\n";
+print "U_NAMESPACE_BEGIN\n";
+
+#
+# Emit the constants for indicies of Unicode Sets
+#   Define one constant for each of the character classes encountered.
+#   At the same time, store the index corresponding to the set name back into hash.
+#
+print "//\n";
+print "// Character classes for regex pattern scanning.\n";
+print "//\n";
+$i = 128;                   # State Table values for Unicode char sets range from 128-250.
+                            # Sets "default", "quoted", etc. get special handling.
+                            #  They have no corresponding UnicodeSet object in the state machine,
+                            #    but are handled by special case code.  So we emit no reference
+                            #    to a UnicodeSet object to them here.
+foreach $setName (keys %charClasses) {
+    if ($setName eq "default") {
+        $charClasses{$setName} = 255;}
+    elsif ($setName eq "quoted") {
+        $charClasses{$setName} = 254;}
+    elsif ($setName eq "eof") {
+        $charClasses{$setName} = 253;}
+    else {
+        # Normal character class.  Fill in array with a ptr to the corresponding UnicodeSet in the state machine.
+       print "    static const uint8_t kRuleSet_$setName = $i;\n";
+        $charClasses{$setName} = $i;
+        $i++;
+    }
+}
+print "\n\n";
+
+#
+# Emit the enum for the actions to be performed.
+#
+print "enum Regex_PatternParseAction {\n";
+foreach $act (keys %actions) {
+    print "    $act,\n";
+}
+print "    rbbiLastAction};\n\n";
+
+#
+# Emit the struct definition for transtion table elements.
+#
+print "//-------------------------------------------------------------------------------\n";
+print "//\n";
+print "//  RegexTableEl       represents the structure of a row in the transition table\n";
+print "//                     for the pattern parser state machine.\n";
+print "//-------------------------------------------------------------------------------\n";
+print "struct RegexTableEl {\n";
+print "    Regex_PatternParseAction      fAction;\n";
+print "    uint8_t                       fCharClass;       // 0-127:    an individual ASCII character\n";
+print "                                                    // 128-255:  character class index\n";
+print "    uint8_t                       fNextState;       // 0-250:    normal next-state numbers\n";
+print "                                                    // 255:      pop next-state from stack.\n";
+print "    uint8_t                       fPushState;\n";
+print "    UBool                         fNextChar;\n";
+print "};\n\n";
+
+#
+# emit the state transition table
+#
+print "static const struct RegexTableEl gRuleParseStateTable[] = {\n";
+print "    {doNOP, 0, 0, 0, TRUE}\n";    # State 0 is a dummy.  Real states start with index = 1.
+for ($state=1; $state < $num_states; $state++) {
+    print "    , {$state_func_name[$state],";
+    if ($state_literal_chars[$state] ne "") {
+        $c = $state_literal_chars[$state];
+        printf(" %d /* $c */,", ord($c));   #  use numeric value, so EBCDIC machines are ok.
+    }else {
+        print " $charClasses{$state_char_class[$state]},";
+    }
+    print " $states{$state_dest_state[$state]},";
+
+    # The push-state field is optional.  If omitted, fill field with a zero, which flags
+    #   the state machine that there is no push state.
+    if ($state_push_state[$state] eq "") {
+        print "0, ";
+    } else {
+        print " $states{$state_push_state[$state]},";
+    }
+    print " $state_flag[$state]} ";
+
+    # Put out a C++ comment showing the number (index) of this state row,
+    #   and, if this is the first row of the table for this state, the state name.
+    print "    //  $state ";
+    if ($stateNames[$state] ne "") {
+        print "     $stateNames[$state]";
+    }
+    print "\n";
+};
+print " };\n";
+
+
+#
+# emit a mapping array from state numbers to state names.
+#
+#    This array is used for producing debugging output from the pattern parser.
+#
+print "static const char *RegexStateNames[] = {";
+for ($state=0; $state<$num_states; $state++) {
+    if ($stateNames[$state] ne "") {
+        print "     \"$stateNames[$state]\",\n";
+    } else {
+        print "    0,\n";
+    }
+}
+print "    0};\n\n";
+
+print "U_NAMESPACE_END\n";
+print "#endif\n";
+
+
+
--- a/icu4c/source/i18n/regexcst.txt
+++ b/icu4c/source/i18n/regexcst.txt
@ -0,0 +1,231 @@
+
+#*****************************************************************************
+#
+#   Copyright (C) 2002, International Business Machines Corporation and others.
+#   All Rights Reserved.
+#
+#*****************************************************************************
+#
+#  file:  regexcst.txt
+#  ICU Regular Expression Parser State Table
+#
+#     This state table is used when reading and parsing a regular expression pattern
+#     The pattern parser uses a state machine; the data in this file define the
+#     state transitions that occur for each input character.
+#
+#     *** This file defines the regex pattern grammar.   This is it.
+#     *** The determination of what is accepted is here.
+#
+#     This file is processed by a perl script "regexcst.pl" to produce initialized C arrays
+#     that are then built with the rule parser.
+#
+
+#
+# Here is the syntax of the state definitions in this file:
+#
+#
+#StateName:
+#   input-char           n next-state           ^push-state     action    
+#   input-char           n next-state           ^push-state     action    
+#       |                |   |                      |             |
+#       |                |   |                      |             |--- action to be performed by state machine
+#       |                |   |                      |                  See function RBBIRuleScanner::doParseActions()
+#       |                |   |                      |
+#       |                |   |                      |--- Push this named state onto the state stack.
+#       |                |   |                           Later, when next state is specified as "pop",
+#       |                |   |                           the pushed state will become the current state.
+#       |                |   |
+#       |                |   |--- Transition to this state if the current input character matches the input
+#       |                |        character or char class in the left hand column.  "pop" causes the next
+#       |                |        state to be popped from the state stack.
+#       |                |
+#       |                |--- When making the state transition specified on this line, advance to the next
+#       |                     character from the input only if 'n' appears here.
+#       |
+#       |--- Character or named character classes to test for.  If the current character being scanned
+#            matches, peform the actions and go to the state specified on this line.
+#            The input character is tested sequentally, in the order written.  The characters and
+#            character classes tested for do not need to be mutually exclusive.  The first match wins.
+#            
+
+
+
+
+#
+#  start state, scan position is at the beginning of the pattern.
+#
+start:
+   default                 term                 ^finish             doPatStart
+    
+#
+#  finish  -  We've scanned off the end of the pattern string.
+#             The "doPatFinish" action will stop the pattern scanning state machine.
+#
+finish:
+    default                finish                                   doPatFinish
+     
+
+    
+    
+#
+#  term.  Eat through a single rule character, or a composite thing, which
+#         could be a parenthesized expression  or a Unicode Set.
+#
+term:
+    quoted               n string                                   doStartString
+    rule_char            n string                                   doStartString
+    '['                  n expr-quant     		            doScanUnicodeSet
+    '('                  n open-paren            ^expr-quant          
+    '.'                  n expr-quant                               doDotAny
+    eof		           pop
+    default                errorDeath                               doRuleError
+    
+
+#
+#   string        We've encountered a literal character, or an escaped character.
+#                 Continue with any additional literal chars, building the sequence
+#                 into a string.
+#
+string:
+    quoted                n string                                  doStringChar
+    rule_char             n string                                  doStringChar
+    # If the string ends in a quatinfier, we need to split off the last character so that
+    #   the quantifier effects only it, and not the entire string.  (e.g.  "ABC*")
+    '?'                     expr-quant                              doSplitString
+    '+'                     expr-quant                              doSplitString
+    '*'                     expr-quant                              doSplitString
+    '{'                     expr-quant                              doSplitString
+    default                 expr-quant                              doEndString
+
+#
+#   expr-quant    We've just finished scanning a term, now look for the optional
+#                 trailing quantifier - *, +, ?, *?,  etc.
+#
+expr-quant:
+    '*'                  n  quant-star                       
+    '+'                  n  quant-plus                              
+    '?'                  n  quant-opt        
+    default                 expr-cont 
+    
+    
+#
+#  expr-cont      Expression, continuation.  At a point where additional terms are
+#                                            allowed, but not required.
+#
+expr-cont:
+    quoted                  term                                    
+    rule_char               term                                    
+    '['                     term                                    
+    '('                     term                                    
+    '.'                     term                                    
+    '|'                  n  term                                    doOrOperator
+    ')'                  n  pop                                     doCloseParen
+    default                 pop                                     doExprFinished
+    
+
+#
+#   open-paren    We've got an open paren.  We need to scan further to
+#                 determine what kind of quantifier it is - plain (, (?:, (?>, or whatever.
+#
+open-paren:
+    '?'                  n  open-paren-extended
+    default                 term            ^expr-quant             doOpenCaptureParen
+    
+open-paren-extended:
+    ':'                  n  term            ^expr-quant             doOpenNonCaptureParen  #  (?:
+    '>'                  n  term            ^expr-quant             doOpenAtomicParen      #  (?>
+    '='                  n  term            ^expr-cont              doOpenLookAhead        #  (?=
+    '!'                  n  term            ^expr-cont              doOpenLookAheadNeg     #  (?!
+    '<'                  n  open-paren-lookbehind
+    default                 errorDeath                              doBadOpenParenType
+    
+open-paren-lookbehind:
+    '='                  n  term            ^expr-cont              doOpenLookBehind       #  (?<=
+    '!'                  n  term            ^expr-cont              doOpenLookBehindNeg    #  (?<!
+    default                 errorDeath                              doBadOpenParenType
+    
+
+#
+#  quant-star     Scanning a '*' quantifier.  Need to look ahead to decide
+#                 between plain '*', '*?', '*+'
+#
+quant-star:
+     '?'                 n  expr-cont                               doNGStar               #  *?
+     '+'                 n  expr-cont                               doPossesiveStar        #  *+
+     default                expr-cont                               doStar
+
+
+#
+#  quant-plus     Scanning a '+' quantifier.  Need to look ahead to decide
+#                 between plain '+', '+?', '++'
+#
+quant-plus:
+     '?'                 n  expr-cont                               doNGPlus               #  *?
+     '+'                 n  expr-cont                               doPossesivePlus        #  *+
+     default                expr-cont                               doPlus
+
+
+#
+#  quant-opt  Scanning a '?' quantifier.  Need to look ahead to decide
+#                  between plain '?', '??', '?+'
+#
+quant-opt:
+     '?'                 n  expr-cont                               doNGOpt                 #  *?
+     '+'                 n  expr-cont                               doPossesiveOpt          #  *+
+     default                expr-cont                               doOpt
+
+
+#
+#   Interval         scanning a '{', the opening delimiter for an interval specification
+#                                   {number} or {min, max}
+#
+interval-open:
+    white_space          n  interval-open
+    digit_char              interval-value                          doIntervalMinValue
+    default                 errorDeath                              doNumberExpectedError
+    
+interval-value:
+    white_space          n  interval-close
+    '}'                     interval-close
+    digit_char           n  interval-value                          doIntervalDigit
+    default                 errorDeath                              doNumberExpectedError
+    
+interval-close:
+    white_space          n  interval-close
+    '}'                  n  expr-cont-no-interval                   doTagValue
+    default                 errorDeath                              doNumberExpectedError
+    
+    
+    
+#
+#  expr-cont-no-tag    Expression, continuation.  At a point where additional terms are
+#                                            allowed, but not required.  Just like
+#                                            expr-cont, above, except that no interval
+#                                            specification {min, max}  is permitted.
+#
+expr-cont-no-interval:
+    quoted                  term                                    
+    rule_char               term                                    
+    '['                     term                                    
+    '('                     term                                    
+    '.'                     term                                    
+    '|'                  n  term                                    doExprOrOperator
+    ')'                  n  pop                                     doExprRParen
+    default                 pop                                     doExprFinished
+    
+    
+
+
+    
+    
+
+    
+#
+# errorDeath.   This state is specified as the next state whenever a syntax error
+#               in the source rules is detected.  Barring bugs, the state machine will never
+#               actually get here, but will stop because of the action associated with the error.
+#               But, just in case, this state asks the state machine to exit.
+errorDeath:
+    default              n errorDeath                               doExit
+
+
--- a/icu4c/source/i18n/regeximp.h
+++ b/icu4c/source/i18n/regeximp.h
@ -0,0 +1,46 @@
+// 
+//   Copyright (C) 2002 International Business Machines Corporation 
+//   and others. All rights reserved.  
+//
+//   file:  regeximp.h
+//
+//           ICU Regular Expressions, declarations of internal implementation types
+//           and constants that are common between the pattern compiler and the 
+//           runtime execution engine.
+//
+
+#ifndef _REGEXIMP_H
+#define _REGEXIMP_H
+
+
+//
+//  Opcode types     In the compiled form of the regex, these are the type, or opcodes,
+//                   of the entries.
+//
+static const uint32_t     URX_UNUSED1       = 1;
+static const uint32_t     URX_END           = 2;
+static const uint32_t     URX_ONECHAR       = 3;
+static const uint32_t     URX_STRING        = 4;    // Value field is index of string start
+static const uint32_t     URX_STRING_LEN    = 5;    // Value field is string length (code units)
+static const uint32_t     URX_STATE_SAVE    = 6;    // Value field is pattern position to push
+static const uint32_t     URX_NOP           = 7;
+static const uint32_t     URX_START_CAPTURE = 8;    // Value field is capture group number.
+static const uint32_t     URX_END_CAPTURE   = 9;    // Value field is capture group number
+static const uint32_t     URX_UNUSED10      = 10;   // Value field is index in pattern to
+                                                    //   loop back to.
+static const uint32_t     URX_SETREF        = 11;   // Value field is index of set in array of sets.
+static const uint32_t     URX_DOTANY        = 12; 
+static const uint32_t     URX_JMP           = 13;   // Value field is destination position in
+                                                    //   the pattern.
+static const uint32_t     URX_FAIL          = 14;   // Stop match operation;  No match.
+
+//
+//  Convenience macros for assembling and disassembling a compiled operation.
+//
+#define URX_BUILD(type, val) (int32_t)((type << 24) | (val))
+#define URX_TYPE(x)          ((x) >> 24) 
+#define URX_VAL(x)           ((x) & 0xffffff)
+
+                
+#endif
+
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@ -0,0 +1,391 @@
+//
+//  file:  rematch.cpp    
+//
+/*
+**********************************************************************
+*   Copyright (C) 2002 International Business Machines Corporation   *
+*   and others. All rights reserved.                                 *
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/regex.h"
+#include "unicode/uniset.h"
+#include "uassert.h"
+#include "uvector.h"
+#include "regeximp.h"
+
+#include "stdio.h"
+
+U_NAMESPACE_BEGIN
+
+//-----------------------------------------------------------------------------
+//
+//   Constructor and Destructor
+//
+//-----------------------------------------------------------------------------
+RegexMatcher::RegexMatcher(const RegexPattern *pat)  { 
+    fPattern           = pat;
+    fInput             = NULL;
+    fInputLength       = 0;
+    UErrorCode  status = U_ZERO_ERROR;
+    fBackTrackStack    = new UStack(status);   // TODO:  do something with status.
+    fCaptureStarts     = new UVector(status);
+    fCaptureEnds       = new UVector(status);
+    int i;
+    for (i=0; i<fPattern->fNumCaptureGroups; i++) {
+        fCaptureStarts->addElement(-1, status);
+        fCaptureEnds  ->addElement(-1, status);
+    }
+    reset();
+}
+
+
+RegexMatcher::RegexMatcher(const RegexMatcher &other) {
+    U_ASSERT(TRUE);
+}
+
+
+RegexMatcher::~RegexMatcher() {
+    delete fBackTrackStack;
+}
+
+
+
+
+RegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest,
+                                              const UnicodeString &replacement) {
+    return *this;
+}
+
+
+
+UnicodeString &RegexMatcher::appendTail(UnicodeString &dest) {
+    return dest;
+}
+
+
+
+uint32_t RegexMatcher::end(UErrorCode &err) const {
+    return 0;
+}
+
+
+
+uint32_t RegexMatcher::end(int group, UErrorCode &err) const {
+    return 0;
+}
+
+
+
+UBool RegexMatcher::find() {
+    return FALSE;
+}
+
+
+
+UBool RegexMatcher::find(uint32_t start, UErrorCode &err) {
+    return FALSE;
+}
+
+
+
+UnicodeString RegexMatcher::group(UErrorCode &err) const {
+    return UnicodeString();
+}
+
+
+
+UnicodeString RegexMatcher::group(int group, UErrorCode &err) const {
+    return UnicodeString();
+}
+
+
+
+
+int RegexMatcher::groupCount() const {
+    return 0;
+}
+
+
+
+const UnicodeString &RegexMatcher::input() const {
+    return *fInput;
+}
+
+
+
+
+UBool RegexMatcher::lookingAt(UErrorCode &status) {
+    reset();
+    MatchAt(0, status);
+    return fLastMatch;
+}
+
+
+
+UBool RegexMatcher::matches(UErrorCode &status) {
+    reset();
+    MatchAt(0, status);
+    UBool   success  = (fLastMatch && fLastMatchEnd==fInputLength);
+    return success;
+}
+
+
+
+
+const RegexPattern &RegexMatcher::pattern() const {
+    return *fPattern;
+}
+
+
+
+UnicodeString RegexMatcher::replaceAll(const UnicodeString &replacement, UErrorCode &err) {
+    return UnicodeString();
+}
+
+
+
+
+UnicodeString RegexMatcher::replaceFirst(const UnicodeString &replacement, UErrorCode &err) {
+    return UnicodeString();
+}
+
+
+
+RegexMatcher &RegexMatcher::reset() {
+    fLastMatchStart = -1;
+    fLastMatchEnd   =  0;
+    int i;
+    for (i=0; i<fPattern->fNumCaptureGroups; i++) {
+        fCaptureStarts->setElementAt(i, -1);
+    }
+    
+    return *this;
+}
+
+
+
+RegexMatcher &RegexMatcher::reset(const UnicodeString &input) {
+    fInput          = &input;
+    fInputLength    = input.length();
+    reset();
+    return *this;
+}
+
+
+
+int RegexMatcher::start(UErrorCode &err) const {
+    return 0;
+}
+
+
+
+
+int RegexMatcher::start(int group, UErrorCode &err) const {
+    return 0;
+}
+
+
+//--------------------------------------------------------------------------------
+//
+//     backTrack    Within the match engine, this function is called when
+//                  a local match failure occurs, and the match needs to back
+//                  track and proceed down another path.
+//
+//                  Note:  Inline function.  Keep its body above MatchAt().
+//
+//--------------------------------------------------------------------------------
+void RegexMatcher::backTrack(int32_t &inputIdx, int32_t &patIdx)  {
+    inputIdx = fBackTrackStack->popi();
+    patIdx   = fBackTrackStack->popi();
+    int i;
+    for (i=0; i<fPattern->fNumCaptureGroups; i++) {
+        if (fCaptureStarts->elementAti(i) >= inputIdx) {
+            fCaptureStarts->setElementAt(i, -1);
+        }
+    }
+}
+
+
+            
+//--------------------------------------------------------------------------------
+//
+//   MatchAt      This is the actual matching engine.
+//
+//--------------------------------------------------------------------------------
+void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
+    int32_t     inputIdx = startIdx;   // Current position in the input string.
+    int32_t     patIdx   = 0;          // Current position in the compiled pattern.
+    UBool       isMatch  = FALSE;      // True if the we have a match.
+
+    int32_t     op;                    // Operation from the compiled pattern, split into
+    int32_t     opType;                //    the opcode
+    int32_t     opValue;               //    and the operand value.
+
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    //  Cache frequently referenced items from the compiled pattern
+    //  in local variables.
+    //
+    UVector             *pat     = fPattern->fCompiledPat;
+    const UnicodeString *litText = &fPattern->fLiteralText;
+    UVector             *sets    = fPattern->fSets;
+    
+
+    //
+    //  Main loop for interpreting the compiled pattern.
+    //  One iteration of the loop per pattern operation performed.
+    //
+    for (;;) {
+        op      = pat->elementAti(patIdx);
+        opType  = URX_TYPE(op);
+        opValue = URX_VAL(op);
+        // printf("%d   %d  \"%c\"\n", patIdx, inputIdx, fInput->char32At(inputIdx));
+        patIdx++;
+
+        switch (opType) {
+
+
+        case URX_NOP:
+            break;
+
+
+        case URX_ONECHAR:
+            {
+                UChar32 inputChar = fInput->char32At(inputIdx);
+                if (inputChar == opValue) {
+                    // TODO: handle the bogus 0xffff return from char32At for index out of range.
+                    inputIdx = fInput->moveIndex32(inputIdx, 1);
+                } else {
+                    // No match.  Back up matching to a saved state
+                    backTrack(inputIdx, patIdx);
+                }
+                break;
+            }
+
+
+        case URX_STRING:
+            {
+                int32_t stringStartIdx, stringLen;
+                stringStartIdx = opValue;
+
+                op      = pat->elementAti(patIdx);
+                patIdx++;
+                opType  = URX_TYPE(op);
+                opValue = URX_VAL(op);
+                U_ASSERT(opType == URX_STRING_LEN);
+                stringLen = opValue;
+
+                if (fInput->compareBetween(inputIdx,
+                                            inputIdx+stringLen,
+                                            *litText,
+                                            stringStartIdx,
+                                            stringStartIdx+stringLen) == 0)
+                {
+                    inputIdx += stringLen;
+                } else {
+                    // No match.  Back up matching to a saved state
+                    backTrack(inputIdx, patIdx);
+                }
+            }
+            break;
+
+
+
+        case URX_STATE_SAVE:
+            // When saving state for backtracking, the pattern position that a
+            //   backtrack should (eventually) continue at is "opValue".
+            fBackTrackStack->push(opValue,  status);
+            fBackTrackStack->push(inputIdx, status);
+            break;
+
+
+        case URX_END:
+            // The match loop will exit via this path on a successful match,
+            //   when we reach the end of the pattern.
+            isMatch = TRUE;
+            goto  breakFromLoop;
+
+        case URX_START_CAPTURE:
+            U_ASSERT(opValue > 0 && opValue <= fPattern->fNumCaptureGroups);
+            fCaptureStarts->setElementAt(inputIdx,   opValue);
+            fCaptureEnds  ->setElementAt((int32_t)0, opValue);
+            break;
+
+
+        case URX_END_CAPTURE:
+            U_ASSERT(opValue > 0 && opValue <= fPattern->fNumCaptureGroups);
+            fCaptureEnds->setElementAt(inputIdx, opValue);
+            break;
+
+
+        case URX_SETREF:
+            if (inputIdx < fInputLength) {
+                // There is input left.  Pick up one char and test it for set membership.
+                UChar32  c = fInput->char32At(inputIdx);
+                U_ASSERT(opValue > 0 && opValue < sets->size());
+                UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
+                if (s->contains(c)) {
+                    // The character is in the set.  A Match.
+                    inputIdx = fInput->moveIndex32(inputIdx, 1);
+                    break;
+                }
+            }
+            // Either at end of input, or the character wasn't in the set.
+            // Either way, we need to back track out.
+            backTrack(inputIdx, patIdx);
+            break;
+            
+
+        case URX_DOTANY:
+            // . matches anything, but does not match if we've run out of input.
+            if (inputIdx < fInputLength) {
+                // There is input left.  Advance one character in it.
+                inputIdx = fInput->moveIndex32(inputIdx, 1);
+            } else {
+            backTrack(inputIdx, patIdx);
+            }
+            break;
+
+        case URX_JMP:
+            patIdx = opValue;
+            break;
+
+        case URX_FAIL:
+            isMatch = FALSE;
+            goto breakFromLoop;
+
+
+        default:
+            // Trouble.  The compiled pattern contains an entry with an
+            //           unrecognized type tag.
+            U_ASSERT(false);
+        }
+
+        if (U_FAILURE(status)) {
+            break;
+        }
+    }
+    
+breakFromLoop:
+    fLastMatch = isMatch;
+    if (isMatch) {
+        fLastMatchStart  = startIdx;
+        fLastMatchEnd    = inputIdx;
+        }
+    return;
+}
+
+
+
+
+
+const char RegexMatcher::fgClassID = 0;
+
+U_NAMESPACE_END
+
+
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@ -0,0 +1,284 @@
+//
+//  file:  repattrn.cpp    
+//
+/*
+**********************************************************************
+*   Copyright (C) 2002 International Business Machines Corporation   *
+*   and others. All rights reserved.                                 *
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/regex.h"
+#include "uassert.h"
+#include "uvector.h"
+#include "regexcmp.h"
+#include "regeximp.h"
+
+#include "stdio.h"    // TODO:  get rid of this...
+
+U_NAMESPACE_BEGIN
+
+//--------------------------------------------------------------------------
+//
+//    RegexPattern    Constructors and destructor
+//
+//--------------------------------------------------------------------------
+RegexPattern::RegexPattern() {
+    UErrorCode status = U_ZERO_ERROR;
+    fFlags            = 0;
+    fCompiledPat      = NULL;
+    fSets             = NULL;
+    fBadState         = FALSE;
+    fNumCaptureGroups = 0;
+
+    fCompiledPat      = new UVector(status);
+
+    // fSets is a vector of all UnicodeSets built for this pattern.
+    //   Reserve element 0, to allow a sanity check against refs to element 0.
+    fSets             = new UVector(status);
+    fSets->addElement((int32_t)0, status);
+
+    if (U_FAILURE(status)) {
+        fBadState = TRUE;
+        delete fCompiledPat;
+        delete fSets;
+        fCompiledPat      = NULL;
+        fSets             = NULL;
+    }
+};
+
+
+RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
+    // TODO.   Need to add a reasonable assign or copy  constructor
+    //         to UVector.
+    U_ASSERT(FALSE);
+};
+
+
+RegexPattern::~RegexPattern() {
+    delete fCompiledPat;
+    int i;
+    for (i=0; i<fSets->size(); i++) {
+        UnicodeSet *s;
+        s = (UnicodeSet *)fSets->elementAt(i);
+        if (s != NULL) {
+            delete s;
+        }
+    }
+    delete fSets;
+    fSets = NULL;
+};
+
+RegexPattern  *RegexPattern::clone() const { 
+    RegexPattern  *copy = new RegexPattern(*this);
+    return copy;
+};
+
+//---------------------------------------------------------------------
+//
+//   compile        
+//
+//---------------------------------------------------------------------
+RegexPattern  *RegexPattern::compile(
+                             const UnicodeString &regex,
+                             uint32_t              flags,
+                             UParseError          &pe,
+                             UErrorCode           &err)  {
+
+
+    if (U_FAILURE(err)) {
+        return NULL;
+    }
+    RegexPattern *This = new RegexPattern;
+    if (This == NULL) {
+        err = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+
+    RegexCompile     compiler(err);
+    compiler.compile(*This, regex, pe, err);
+
+    return This;
+};
+    
+
+
+
+//---------------------------------------------------------------------
+//
+//   flags
+//
+//---------------------------------------------------------------------
+uint32_t RegexPattern::flags() const {
+    return fFlags;
+}
+
+
+//---------------------------------------------------------------------
+//
+//   matcher(UnicodeString, err)
+//
+//---------------------------------------------------------------------
+RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
+                                    UErrorCode          &err)  const {
+    RegexMatcher    *retMatcher = NULL;
+
+    if (U_FAILURE(err)) {return NULL;};
+
+    retMatcher = new RegexMatcher(this); 
+    if (retMatcher == NULL) {
+        err = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    retMatcher->reset(input);
+    return retMatcher;
+};
+
+
+
+
+//---------------------------------------------------------------------
+//
+//   pattern
+//
+//---------------------------------------------------------------------
+UnicodeString RegexPattern::pattern() const {
+    return fPattern;
+}
+
+
+
+
+//---------------------------------------------------------------------
+//
+//   split
+//
+//---------------------------------------------------------------------
+uint32_t  RegexPattern::split(const UnicodeString &input,
+        UnicodeString    dest[],
+        uint32_t         destCapacity,
+        UErrorCode       &err) const
+{
+    if (U_FAILURE(err)) {
+        return 0;
+    };
+    // TODO:  
+    return 0;
+}
+
+
+
+//---------------------------------------------------------------------
+//
+//   hashcode
+//
+//---------------------------------------------------------------------
+int32_t   RegexPattern::hashCode(void) const {
+    return 0;           // TODO:   Do something better here
+};
+
+
+//---------------------------------------------------------------------
+//
+//   dump    Output the compiled form of the pattern.
+//           Debugging function only.
+//
+//---------------------------------------------------------------------
+static char *opNames[] = {
+        "ZERO",
+        "?1",
+        "END",
+        "ONECHAR",
+        "STRING",
+        "STRING_LEN",
+        "STATE_SAVE",
+        "NOP",
+        "START_CAPTURE",
+        "END_CAPTURE",
+        "?10",
+        "SETREF",
+        "DOTANY",
+        "JMP",
+        "FAIL"
+};
+
+void   RegexPattern::dump() {
+    int      index;
+    int      i;
+    UChar    c;
+    int32_t  op;
+    int32_t  pinnedType;
+    int32_t  type;
+    int32_t  val;
+    int32_t  stringStart;
+
+
+    printf("Original Pattern:  ");
+    for (i=0; i<fPattern.length(); i++) {
+        printf("%c", fPattern.charAt(i));
+    }
+    printf("\n");
+    printf("Pattern Valid?:     %s\n", fBadState? "no" : "yes");
+    printf("\nIndex   Binary     Type             Operand\n"
+           "-------------------------------------------\n");
+    for (index = 0; ; index++) {
+        op         = fCompiledPat->elementAti(index);
+        val        = URX_VAL(op);
+        type       = URX_TYPE(op);
+        pinnedType = type;
+        if (pinnedType >= sizeof(opNames)/sizeof(char *)) {
+            pinnedType = 0;
+        }
+
+        printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
+        switch (type) {
+        case URX_NOP:
+        case URX_DOTANY:
+        case URX_FAIL:
+            // Types with no operand field of interest.
+            break;
+
+        case URX_START_CAPTURE:
+        case URX_END_CAPTURE:
+        case URX_SETREF:
+        case URX_STATE_SAVE:
+        case URX_JMP:
+            // types with an integer operand field.
+            printf("%d", val);
+            break;
+
+        case URX_ONECHAR:
+            printf("%c", val<256?val:'?');
+            break;
+
+        case URX_STRING:
+            stringStart = val;
+            break;
+
+        case URX_STRING_LEN:
+            for (i=stringStart; i<stringStart+val; i++) {
+                c = fLiteralText[i];
+                if (c >= 256) {c = '?';};
+                printf("%c", c);
+            }
+            break;
+            
+        case URX_END:
+            goto breakFromLoop;
+            
+        default:
+            printf("??????");
+            break;
+        }
+        printf("\n");
+    }
+breakFromLoop:
+    printf("\n\n");
+};
+
+
+
+const char RegexPattern::fgClassID = 0;
+
+U_NAMESPACE_END
--- a/icu4c/source/i18n/unicode/regex.h
+++ b/icu4c/source/i18n/unicode/regex.h
@ -0,0 +1,430 @@
+/*
+**********************************************************************
+*   Copyright (C) 2002, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef REGEX_H
+#define REGEX_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/parseerr.h"
+
+U_NAMESPACE_BEGIN
+
+// Forward Declarations...
+class RegexMatcher;
+class UVector;
+class UStack;
+
+
+//---------------------------------------------------------------------------------
+//
+//  Flags for Regular Expression Modes.
+//   TODO:  Move to C header once one exists.
+//   All flags default to off or false
+//   All are as defined by Java Regexes.
+//
+//---------------------------------------------------------------------------------
+enum {
+        UREGEX_CANON_EQ         = 128,    // Forces normalization of pattern and strings.
+        UREGEX_CASE_INSENSITIVE = 2,      // Enable case insensitive matching.
+        UREGEX_COMMENTS         = 4,      // Allow white space and comments within patterns
+        UREGEX_DOTALL           = 32,     // If set, "." matches line terminators.
+                                          //   otherwise matching stops at line end.
+        UREGEX_MULTILINE        = 8,      // Control behavior of "$" and "^". 
+                                          //   If set, recognize line terminators within string
+                                          //   otherwise, match only at start and end of
+                                          //   input string
+        UREGEX_UNICODE_CASE     = 64,     // If set, use full Unicode case folding for case
+                                          //   insensitive matches.  Otherwise, case insensitive
+                                          //   matching only affects chars in the ASCII range.
+                                          //   TODO:  do we want to support this option at all?
+        UREGEX_UNIX_LINES       = 1       // If set, only \n is recognized as a line terminator.
+                                          //   otherwise recognize all Unicode line endings.
+};
+
+
+
+//---------------------------------------------------------------------------------
+//
+//    class  RegexPattern
+//
+//---------------------------------------------------------------------------------
+class U_I18N_API RegexPattern: public UObject {
+public:
+    
+    
+    RegexPattern();
+    RegexPattern(const RegexPattern &other);
+    virtual ~RegexPattern();
+    
+    UBool                  operator==(const RegexPattern& that) const;
+    inline UBool           operator!=(const RegexPattern& that) const;
+    
+    RegexPattern  &operator =(const RegexPattern &other);
+    virtual RegexPattern  *clone() const;
+
+    // TODO:  Do we really want a hashCode function on this class?
+    virtual int32_t         hashCode(void) const;
+    
+    
+   /**
+    *     Compiles the given regular expression into a pattern 
+    */
+    static RegexPattern *compile( const UnicodeString &regex,
+        UParseError          &pe,
+        UErrorCode           &err); 
+    
+   /**
+    *     Compiles the given regular expression into a pattern with the given flags 
+    */
+    static RegexPattern *compile( const UnicodeString &regex,
+        uint32_t              flags,
+        UParseError          &pe,
+        UErrorCode           &err); 
+
+
+   /**
+    *     Return the flags for this pattern
+    */
+    virtual uint32_t flags() const;
+    
+   /*
+    *  Creates a matcher that will match the given input against this pattern.
+    */
+    virtual RegexMatcher *matcher(const UnicodeString &input,
+        UErrorCode          &err) const;
+    
+    
+   /*
+    *  Compiles the given regular expression and attempts to match the given input against it.
+    */
+    static UBool matches(const UnicodeString   &regex,
+        const UnicodeString   &input,
+        UParseError     &pe,
+        UErrorCode      &err); 
+    
+    
+   /*
+    *    Returns the regular expression from which this pattern was compiled. 
+    */
+    virtual UnicodeString pattern() const;
+    
+    
+    /*
+    *    Split a string around matches of the pattern.  Somewhat like split() form Perl.
+    *    @param input   The string to be split into fields.  The field delimiters
+    *                   match the pattern (in the "this" object)
+    *    @param dest    An array of UnicodeStrings to receive the results of the split.
+    *                   This is an array of actual UnicodeString objects, not an
+    *                   array of pointers to strings.  Local (stack based) arrays can
+    *                   work well here.
+    *    @param destCapacity  The number of elements in the destination array.
+    *                   If the number of fields found is less than destCapacity, the
+    *                   extra strings in the destination array are not altered.
+    *                   If the number of destination strings is less than the number
+    *                   of fields, the trailing part of the input string, including any
+    *                   field delimiters, is placed in the last destination string.
+    *    @return        The number of fields into which the input string was split.
+    */
+    virtual uint32_t  split(const UnicodeString &input,
+        UnicodeString    dest[],
+        uint32_t         destCapacity,
+        UErrorCode       &err) const;
+    
+    
+    
+    //
+    //   dump   Debug function, displays the compiled form of a pattern.
+    //
+    void dump();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @draft ICU 2.2
+     */
+    virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
+    
+    /**
+    * ICU "poor man's RTTI", returns a UClassID for this class.
+    *
+    * @draft ICU 2.2
+    */
+    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
+    
+    static const char fgClassID;
+
+private:
+    //
+    //  Implementation Data
+    //
+    UnicodeString   fPattern;      // The original pattern string.
+    uint32_t        fFlags;        // The flags used when compiling the pattern.
+                                   //   TODO:  make an enum type for the flags.
+    UVector         *fCompiledPat; // The compiled, tokenized pattern.
+    UnicodeString   fLiteralText;  // Any literal string data from the pattern, 
+                                   //   after un-escaping, for use during the match.
+    UVector         *fSets;        // Any UnicodeSets referenced from the pattern.
+    UBool           fBadState;     // True if any prior error has left this
+                                   //  RegexPattern unusable.
+
+    int32_t         fNumCaptureGroups;
+
+    friend class RegexCompile;
+    friend class RegexMatcher;
+
+};
+
+
+
+
+
+
+
+
+
+//--------------------------------------------------------------------------------
+//
+//    class RegexMatcher 
+//
+//--------------------------------------------------------------------------------
+class U_I18N_API RegexMatcher: public UObject {
+public:
+    
+   /*   Destructor.  Note that there are no public constructors; creation is
+    *   done with RegexPattern::matcher().
+    */
+    virtual ~RegexMatcher();
+
+   /*
+    *   Implements a replace operation intended to be used as part of an
+    *   incremental find-and-replace.
+    *
+    *   The input sequence, starting from the append position and ending at
+    *   the start of the current match is appended to the destination string.
+    *
+    *   Then the replacement string is appended to the output string,
+    *   including handling any substitutions of captured text.
+    *
+    *   The append position is set to the position of the first
+    *   character following the match in the input string.
+    *
+    *   Returns:  This Matcher
+    *
+    *    error:  Illegal state - no match yet attemtped, or last match failed.
+    *            IndexOutOfBounds - caputure string number from replacement string.
+    */
+    virtual RegexMatcher &appendReplacement(UnicodeString &dest,
+        const UnicodeString &replacement);
+    
+    
+   /*
+    * This method reads characters from the input sequence,
+    * starting at the append position, and appends them to the
+    * destination string. It is intended to be invoked after one
+    * or more invocations of the appendReplacement method in order
+    * to copy the remainder of the input sequence. 
+    *
+    *  @return  the destination string.
+    */
+    virtual UnicodeString &appendTail(UnicodeString &dest); 
+    
+    
+    /*
+    *    Returns the index of the last character matched, plus one.
+    *    error:  Illegal state - no match yet attemtped, or last match failed.
+    */
+    virtual uint32_t end(UErrorCode &err) const;
+    
+    
+    /*
+    *    Returns the index of the last character, plus one, of the subsequence 
+    *    captured by the given group during the previous match operation. 
+    *    Errors:  Illegal state, index out of bounds
+    */
+    virtual uint32_t end(int group, UErrorCode &err) const; 
+    
+    
+    /*
+    *  Attempts to find the next subsequence of the input sequence that matches the pattern.
+    */
+    virtual UBool find();
+    
+    
+    /*
+    *   Resets this matcher and then attempts to find the next subsequence of the 
+    *   input sequence that matches the pattern, starting at the specified index. 
+    *  Errors:  Index out of bounds.
+    */
+    virtual UBool find(uint32_t start, UErrorCode &err); 
+    
+    
+    /*
+    *   Returns the input subsequence matched by the previous match. 
+    *   If the pattern can match an empty string, and empty string may be returned.
+    *    Errors:   illegal state (no match has yet been attempted.)
+    */
+    virtual UnicodeString group(UErrorCode &err) const;
+    
+    
+    /*
+    *    Returns the input subsequence captured by the given group during the previous match operation. 
+    *    Group(0) is the entire match.
+    *    Errors:   Index out of bounds, illegal state (no match has yet been attempted.)
+    */
+    virtual UnicodeString group(int group, UErrorCode &err) const; 
+    
+    
+    /*
+    *   Returns the number of capturing groups in this matcher's pattern.
+    */
+    virtual int groupCount() const;
+    
+    
+    /*
+    *   Returns the input string being matched.
+    */
+    virtual const UnicodeString &input() const; 
+    
+    
+    /*
+    *   Attempts to match the input string, starting at the beginning, against the pattern.
+    *   Like the matches method, this method always starts at the beginning of the input string;
+    *   unlike that method, it does not require that the entire input sequence be matched. 
+    *
+    *   If the match succeeds then more information can be obtained via the start, end,
+    *    and group methods.
+    */
+    virtual UBool lookingAt(UErrorCode &err);
+    
+    
+    /*
+    *   Attempts to match the entire input sequence against the pattern.
+    */
+    virtual UBool matches(UErrorCode &err);
+    
+    
+    /*
+    *    Returns the pattern that is interpreted by this matcher.
+    */
+    virtual const RegexPattern &pattern() const;
+    
+    
+    /*
+    *    Replaces every subsequence of the input sequence that matches the pattern
+    *    with the given replacement string.
+    *
+    *    This method first resets this matcher. It then scans the input sequence
+    *    looking for matches of the pattern. Characters that are not part of any 
+    *    match are left unchanged; each match is replaced in the result by the
+    *    replacement string. The replacement string may contain references to
+    *    captured subsequences as in the appendReplacement method. 
+    *
+    *    @return   The target string.  Depending on how the RegexMatcher was
+    *              created, this may either be the original input string or a copy
+    *
+    *    Error:  Index out of bounds (replacement string capture group)
+    *
+    */
+    virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &err); 
+    
+    
+    /*
+    * Replaces the first subsequence of the input sequence that matches
+    * the pattern with the given replacement string. 
+    * This method first resets this matcher. It then scans the input sequence
+    * looking for a match of the pattern. Characters that are not part
+    * of the match are appended directly to the result string; the match is replaced
+    * in the result by the replacement string. The replacement string may contain
+    * references to captured subsequences as in the appendReplacement method. 
+    *
+    *    Error:  Index out of bounds (replacement string capture group)
+    *            Illegal state (no match)
+    *      Note:  Javadoc doesn't list exceptions, but they gotta be there for consistency
+    */
+    virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &err); 
+    
+    
+    /*
+    *   Resets this matcher.
+    */
+    virtual RegexMatcher &reset();
+    
+    
+    /*
+    *   Resets this matcher with a new input sequence. 
+    */
+    virtual RegexMatcher &reset(const UnicodeString &input);  
+    
+    
+    /*
+    *   Returns the start index of the previous match. 
+    *   Error:  Illegal State (no previous match)
+    */
+    virtual int start(UErrorCode &err) const;
+    
+    
+    /*
+    *   Returns the start index of the subsequence captured by the given group
+    *    during the previous match operation.
+    *
+    *   Error:  Illegal State  (no previous match)
+    *           Index out of bounds (no group with specified index)
+    */
+    virtual int start(int group, UErrorCode &err) const;
+    
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @draft ICU 2.2
+     */
+    virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
+    
+    /**
+    * ICU "poor man's RTTI", returns a UClassID for this class.
+    *
+    * @draft ICU 2.2
+    */
+    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
+    
+    static const char fgClassID;
+
+private:
+    // Constructors and other object boilerplate are private.
+    // Creation by users is through factory method in RegexPattern
+    RegexMatcher(const RegexPattern *pat); 
+    RegexMatcher(const RegexMatcher &other);
+    RegexMatcher &operator =(const RegexMatcher &rhs);
+    friend class RegexPattern;
+
+    inline void backTrack(int32_t &inputIdx, int32_t &patIdx);
+
+    //
+    //  MatchAt   This is the internal interface to the match engine itself.
+    //            Match status comes back in matcher member variables.
+    //
+    virtual void MatchAt(int32_t startIdx, UErrorCode &status);   
+
+
+    const RegexPattern  *fPattern;
+    const UnicodeString *fInput;
+    int32_t              fInputLength;
+    UBool                fLastMatch;        // True if the last match was successful.
+    int32_t              fLastMatchStart;
+    int32_t              fLastMatchEnd;
+    UStack              *fBackTrackStack;
+    UVector             *fCaptureStarts;
+    UVector             *fCaptureEnds;
+
+};  
+
+
+
+U_NAMESPACE_END
+#endif
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@ -36,7 +36,7 @@ tsmthred.o tsmutex.o tsnmfmt.o tsputil.o tstnorm.o tzbdtest.o		\
 tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \
 itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o unhxtrts.o hxuntrts.o \
 ufltlgts.o testutil.o transrt.o trnserr.o normconf.o sfwdchit.o \
-jamotest.o srchtest.o reptest.o biditst.o \
+jamotest.o srchtest.o reptest.o biditst.o regextst.o \
 itrbnf.o itrbnfrt.o tstdtmod.o testdata.o datamap.o ucaconf.o

 DEPS = $(OBJECTS:.o=.d)
--- a/icu4c/source/test/intltest/intltest.dsp
+++ b/icu4c/source/test/intltest/intltest.dsp
@ -374,6 +374,10 @@ SOURCE=.\regcoll.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\regextst.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\reptest.cpp
 # End Source File
 # Begin Source File
@ -747,6 +751,10 @@ SOURCE=.\regcoll.h
 # End Source File
 # Begin Source File

+SOURCE=.\regextst.h
+# End Source File
+# Begin Source File
+
 SOURCE=.\reptest.h
 # End Source File
 # Begin Source File
--- a/icu4c/source/test/intltest/itmajor.cpp
+++ b/icu4c/source/test/intltest/itmajor.cpp
@ -26,6 +26,7 @@
 #include "itrbnf.h"
 #include "itrbnfrt.h"
 #include "normconf.h"
+#include "regextst.h"
 #include "tstnorm.h"
 #include "canittst.h"

@ -67,8 +68,12 @@ void MajorTestLevel::runIndexedTest( int32_t index, UBool exec, const char* &nam
 #endif
                break;

-        case 3: name = "unused";
-                // Used to be text bounds.
+        case 3: name = "regex";
+                if (exec) {
+                    logln("TestSuite Regex---"); logln();
+                    RegexTest test;
+                    callTest( test, par );
+                }
                break;

        case 4: name = "format";
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@ -0,0 +1,269 @@
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2002, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+//
+//   regex.cpp
+//
+//      ICU Regular Expressions test, part of intltest.
+//
+
+#include "unicode/utypes.h"
+#include "intltest.h"
+#include "regextst.h"
+
+
+RegexTest::RegexTest() 
+{
+};
+
+
+RegexTest::~RegexTest()
+{
+};
+
+
+
+void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
+{
+    if (exec) logln("TestSuite RegexTest: ");
+    switch (index) {
+
+        case 0: name = "TestRegexAPI";
+            if(exec) TestRegexAPI(); break;
+
+        default: name = ""; break; //needed to end loop
+    }
+}
+
+
+//---------------------------------------------------------------------------
+//
+//    REGEX_TESTLM       Macro + invocation function to simplify writing quick tests
+//                       for the LookingAt() and  Match() functions.
+//
+//       usage:
+//          REGEX_TESTLM("pattern",  "input text",  lookingAt expected, matches expected);
+//
+//          The expected results are UBool - TRUE or FALSE.
+//          The input text is unescaped.  The pattern is not.
+//            
+//
+//---------------------------------------------------------------------------
+#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {errln("RegexTest failure at line %d.  status=%d\n", \
+__LINE__, status); return;}}
+
+#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("RegexTest failure at line %d.\n", __LINE__);};}
+
+#define REGEX_TESTLM(pat, text, looking, match) doRegexLMTest(pat, text, looking, match, __LINE__);
+
+UBool RegexTest::doRegexLMTest(char *pat, char *text, UBool looking, UBool match, int line) {
+    const UnicodeString pattern(pat);
+    const UnicodeString inputText(text);
+    UErrorCode          status  = U_ZERO_ERROR;
+    UParseError         pe;
+    RegexPattern        *REPattern = NULL;
+    RegexMatcher        *REMatcher = NULL;
+    UBool               retVal     = TRUE;
+
+    UnicodeString patString(pat);
+    REPattern = RegexPattern::compile(patString, 0, pe, status);
+    if (U_FAILURE(status)) {
+        errln("RegexTest failure in RegexPattern::compile() at line %d.  Status = %d\n", line, status);
+        return FALSE;
+    }
+
+    UnicodeString inputString(inputText);
+    UnicodeString unEscapedInput = inputString.unescape();
+    REMatcher = REPattern->matcher(unEscapedInput, status);
+    if (U_FAILURE(status)) {
+        errln("RegexTest failure in REPattern::matcher() at line %d.  Status = %d\n", line, status);
+        return FALSE;
+    }
+  
+    UBool actualmatch;
+    actualmatch = REMatcher->lookingAt(status);
+    if (U_FAILURE(status)) {
+        errln("RegexTest failure in lookingAt() at line %d.  Status = %d\n", line, status);
+        retVal =  FALSE;
+    }
+    if (actualmatch != looking) {
+        errln("RegexTest: wrong return from lookingAt() at line %d.\n", line);
+        retVal = FALSE;
+    }
+
+    status = U_ZERO_ERROR;
+    actualmatch = REMatcher->matches(status);
+    if (U_FAILURE(status)) {
+        errln("RegexTest failure in matches() at line %d.  Status = %d\n", line, status);
+        retVal = FALSE;
+    }
+    if (actualmatch != match) {
+        errln("RegexTest: wrong return from matches() at line %d.\n", line);
+        retVal = FALSE;
+    }
+
+    if (retVal == FALSE) {
+        REPattern->dump();
+    }
+
+    delete REPattern;
+    delete REMatcher;
+    return retVal;
+}
+    
+
+//---------------------------------------------------------------------------
+//
+//      TestRegexAPI
+//
+//---------------------------------------------------------------------------
+void RegexTest::TestRegexAPI() {
+    UParseError         pe;
+    UErrorCode          status=U_ZERO_ERROR;
+
+    RegexPattern        pat1;    // Test default constructor to not crash.
+
+    RegexPattern        *pat2;
+    int32_t             flags = 0;
+
+    //
+    // Debug - slide failing test cases early
+    //
+#if 0
+    REGEX_TESTLM("b+", "", FALSE, FALSE);
+        return;
+#endif
+
+    //
+    // Simple pattern compilation
+    //
+    UnicodeString       re("abc");
+    pat2 = RegexPattern::compile(re, flags, pe, status);
+    REGEX_CHECK_STATUS;
+
+    UnicodeString inStr1 = "abcdef this is a test";
+    UnicodeString instr2 = "not abc";
+    UnicodeString empty  = "";
+
+
+    //
+    // Matcher creation and reset.
+    //
+    RegexMatcher *m1 = pat2->matcher(inStr1, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(m1->lookingAt(status) == TRUE); 
+    m1->reset(instr2);
+    REGEX_ASSERT(m1->lookingAt(status) == FALSE);
+    m1->reset(inStr1);
+    REGEX_ASSERT(m1->lookingAt(status) == TRUE);
+    m1->reset(empty);
+    REGEX_ASSERT(m1->lookingAt(status) == FALSE);
+    delete m1;
+    delete pat2;
+
+    //
+    // Pattern with parentheses
+    //
+    REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE,  FALSE);
+    REGEX_TESTLM("st(abc)ring", "stabcring",       TRUE,  TRUE);
+    REGEX_TESTLM("st(abc)ring", "stabcrung",       FALSE, FALSE);
+
+    //
+    // Patterns with *
+    //
+    REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE);
+    REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE);
+    REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE);
+    REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE);
+    REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE);
+
+    REGEX_TESTLM("a*", "",  TRUE, TRUE);
+    REGEX_TESTLM("a*", "b", TRUE, FALSE);
+
+
+    //
+    //  Patterns with "."
+    //
+    REGEX_TESTLM(".", "abc", TRUE, FALSE);
+    REGEX_TESTLM("...", "abc", TRUE, TRUE);
+    REGEX_TESTLM("....", "abc", FALSE, FALSE);
+    REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE);
+    REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE);
+    REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE);
+    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE);
+    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE);
+
+    //
+    //  Patterns with * applied to chars at end of literal string
+    //
+    REGEX_TESTLM("abc*", "ab", TRUE, TRUE);
+    REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE);
+
+    //
+    //  Supplemental chars match as single chars, not a pair of surrogates.
+    //
+    REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE);
+    REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE);
+    REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE);
+
+
+    //
+    //  UnicodeSets in the pattern
+    //
+    REGEX_TESTLM("[1-6]", "1", TRUE, TRUE);
+    REGEX_TESTLM("[1-6]", "3", TRUE, TRUE);
+    REGEX_TESTLM("[1-6]", "7", FALSE, FALSE);
+    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
+    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
+    REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE);
+
+    REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE);
+    REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE);
+    REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE);
+    REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE);   // note that * matches 0 occurences.
+    REGEX_TESTLM("[a][b][[:Zs:]]*", "ab   ", TRUE, TRUE);
+
+    //
+    //   OR operator in patterns
+    //
+    REGEX_TESTLM("(a|b)", "a", TRUE, TRUE);
+    REGEX_TESTLM("(a|b)", "b", TRUE, TRUE);
+    REGEX_TESTLM("(a|b)", "c", FALSE, FALSE);
+    REGEX_TESTLM("a|b", "b", TRUE, TRUE);
+
+    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE);
+    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE);
+    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE);
+    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE);
+    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE);
+    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE);
+
+    //
+    //  +
+    //
+    REGEX_TESTLM("ab+", "abbc", TRUE, FALSE);
+    REGEX_TESTLM("ab+c", "ac", FALSE, FALSE);
+    REGEX_TESTLM("b+", "", FALSE, FALSE);
+    REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE);
+    REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE);
+    REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE);
+
+    //
+    //   ?
+    //
+    REGEX_TESTLM("ab?", "ab", TRUE, TRUE);
+    REGEX_TESTLM("ab?", "a", TRUE, TRUE);
+    REGEX_TESTLM("ab?", "ac", TRUE, FALSE);
+    REGEX_TESTLM("ab?", "abb", TRUE, FALSE);
+    REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE);
+    REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE);
+    REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE);
+    REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE);
+    REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE);
+
+
+};
+
--- a/icu4c/source/test/intltest/regextst.h
+++ b/icu4c/source/test/intltest/regextst.h
@ -0,0 +1,28 @@
+/********************************************************************
+ * COPYRIGHT: 
+ * Copyright (c) 2002, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+
+#ifndef REGEXTST_H
+#define REGEXTST_H
+
+
+#include "intltest.h"
+#include "unicode/regex.h"
+
+
+class RegexTest: public IntlTest {
+public:
+  
+    RegexTest();
+    virtual ~RegexTest();
+
+    virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL );
+
+    virtual void TestRegexAPI();
+    virtual UBool doRegexLMTest(char *pat, char *text, UBool looking, UBool match, int line);
+
+};
+#endif