From 2d39fda4e32bcbb4f5cf5e1c562f7561107ee369 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Thu, 7 Nov 2002 02:34:46 +0000
Subject: [PATCH] ICU-105 Regular Expressions, ongoing development

X-SVN-Rev: 10180
---
 icu4c/source/common/putil.c             |   7 +-
 icu4c/source/common/unicode/uconfig.h   |  12 ++
 icu4c/source/common/unicode/utypes.h    |   3 +
 icu4c/source/i18n/regexcmp.cpp          | 129 +++++++++++-------
 icu4c/source/i18n/regexcmp.h            |  19 ++-
 icu4c/source/i18n/regexcst.h            | 165 +++++++++++++-----------
 icu4c/source/i18n/regexcst.txt          |  35 +++--
 icu4c/source/i18n/regeximp.h            |  10 +-
 icu4c/source/i18n/rematch.cpp           |  38 ++----
 icu4c/source/i18n/repattrn.cpp          |  18 +--
 icu4c/source/i18n/unicode/regex.h       |  13 +-
 icu4c/source/test/intltest/itmajor.cpp  |   2 +
 icu4c/source/test/intltest/regextst.cpp |  36 +++++-
 icu4c/source/test/intltest/regextst.h   |   4 +
 14 files changed, 286 insertions(+), 205 deletions(-)

diff --git a/icu4c/source/common/putil.c b/icu4c/source/common/putil.c
index c570d6b0b19..17bc5b4e93b 100644
--- a/icu4c/source/common/putil.c
+++ b/icu4c/source/common/putil.c
@@ -1833,10 +1833,13 @@ static const char * const
 _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
     "U_REGEX_ERROR_START",
     "U_REGEX_INTERNAL_ERROR",
+    "U_REGEX_RULE_SYNTAX",
     "U_REGEX_INVALID_STATE",
     "U_REGEX_BAD_ESCAPE_SEQUENCE",
     "U_REGEX_PROPERTY_SYNTAX",
-    "U_REGEX_UNIMPLEMENTED"
+    "U_REGEX_UNIMPLEMENTED",
+    "U_REGEX_MISMATCHED_PAREN",
+    "U_REGEX_MATCH_MODE_ERROR"
 };
 
 U_CAPI const char * U_EXPORT2
@@ -1852,7 +1855,7 @@ u_errorName(UErrorCode code) {
     } else if (U_BRK_ERROR_START <= code  && code < U_BRK_ERROR_LIMIT){
         return _uBrkErrorName[code - U_BRK_ERROR_START];
     } else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) {
-        return _uBrkErrorName[code - U_REGEX_ERROR_START];
+        return _uRegexErrorName[code - U_REGEX_ERROR_START];
     } else {
         return "[BOGUS UErrorCode]";
     }
diff --git a/icu4c/source/common/unicode/uconfig.h b/icu4c/source/common/unicode/uconfig.h
index b6af2ea93a0..334ffe2667b 100644
--- a/icu4c/source/common/unicode/uconfig.h
+++ b/icu4c/source/common/unicode/uconfig.h
@@ -52,6 +52,7 @@
 #   endif
 #   define UCONFIG_NO_FORMATTING 1
 #   define UCONFIG_NO_TRANSLITERATION 1
+#   define UCONFIG_NO_REGULAR_EXPRESSIONS 1
 #endif
 
 /* common library switches -------------------------------------------------- */
@@ -114,5 +115,16 @@
 #   define UCONFIG_NO_TRANSLITERATION 0
 #endif
 
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @draft ICU 2.6
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+#   define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+
 
 #endif
diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h
index 9c9372e215a..ffb3bc334af 100644
--- a/icu4c/source/common/unicode/utypes.h
+++ b/icu4c/source/common/unicode/utypes.h
@@ -502,10 +502,13 @@ typedef enum UErrorCode {
      */
      U_REGEX_ERROR_START=0x10300,
      U_REGEX_INTERNAL_ERROR,
+     U_REGEX_RULE_SYNTAX,
      U_REGEX_INVALID_STATE,
      U_REGEX_BAD_ESCAPE_SEQUENCE,
      U_REGEX_PROPERTY_SYNTAX,
      U_REGEX_UNIMPLEMENTED,
+     U_REGEX_MISMATCHED_PAREN,
+     U_REGEX_MATCH_MODE_ERROR,
      U_REGEX_ERROR_LIMIT,
 
     U_ERROR_LIMIT=U_BRK_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp
index 8eb4debccc8..f899837320e 100644
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@@ -5,11 +5,14 @@
 //  Copyright (C) 2002, International Business Machines Corporation and others.
 //  All Rights Reserved.
 //
-//  This file contains the ICU regular expression scanner, which is responsible
-//  for preprocessing a regular expression pattern into the tokenized form that
+//  This file contains the ICU regular expression compiler, which is responsible
+//  for processing a regular expression pattern into the compiled form that
 //  is used by the match finding engine.
 //
 
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
 
 #include "unicode/unistr.h"
 #include "unicode/uniset.h"
@@ -18,18 +21,18 @@
 #include "unicode/parsepos.h"
 #include "unicode/parseerr.h"
 #include "unicode/regex.h"
-#include "regeximp.h"
 #include "uprops.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "uassert.h"
 
 #include "stdio.h"    // TODO:  Get rid of this
 
+#include "regeximp.h"
 #include "regexcst.h"   // Contains state table for the regex pattern parser.
                        //   generated by a Perl script.
 #include "regexcmp.h"
 
-#include "uassert.h"
 
 
 U_NAMESPACE_BEGIN
@@ -52,10 +55,10 @@ static const int RESCAN_DEBUG = 0;
 
 // Characters that have no special meaning, and thus do not need to be escaped.  Expressed
 //    as the inverse of those needing escaping --  [^\*\?\+\[\(\)\{\}\^\$\|\\\.]
-static const UChar gRuleSet_rule_char_pattern[]       = { 
+static const UChar gRuleSet_rule_char_pattern[]       = {
  //   [    ^      \     *     \     ?     \     +     \     [     \     (     /     )
-    0x5b, 0x5e, 0x5c, 0x2a, 0x5c, 0x3f, 0x5c, 0x2b, 0x5c, 0x5b, 0x5c, 0x28, 0x5c, 0x29, 
- //   \     {    \     }     \     ^     \     $     \     |     \     \     \     .     ]   
+    0x5b, 0x5e, 0x5c, 0x2a, 0x5c, 0x3f, 0x5c, 0x2b, 0x5c, 0x5b, 0x5c, 0x28, 0x5c, 0x29,
+ //   \     {    \     }     \     ^     \     $     \     |     \     \     \     .     ]
     0x5c, 0x7b,0x5c, 0x7d, 0x5c, 0x5e, 0x5c, 0x24, 0x5c, 0x7c, 0x5c, 0x5c, 0x5c, 0x2e, 0x5d, 0};
 
 
@@ -72,7 +75,7 @@ static UnicodeSet  *gUnescapeCharSet;
 //    will handle.
 //
 static const UChar gUnescapeCharPattern[] = {
-//    [     a     c     e     f     n     r     t     u     U     ] 
+//    [     a     c     e     f     n     r     t     u     U     ]
     0x5b, 0x61, 0x63, 0x65, 0x66, 0x6e, 0x72, 0x74, 0x75, 0x55, 0x5d, 0};
 
 
@@ -123,7 +126,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
 
     //
     //  Set up the constant (static) Unicode Sets.
-    //    
+    //
     if (gRuleSets[kRuleSet_rule_char-128] == NULL) {
         //  TODO:  Make thread safe.
         //  TODO:  Memory Cleanup on ICU shutdown.
@@ -131,8 +134,8 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
         gRuleSets[kRuleSet_white_space-128]     = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(&status);
         gRuleSets[kRuleSet_digit_char-128]      = new UnicodeSet(gRuleSet_digit_char_pattern,      status);
         gUnescapeCharSet                        = new UnicodeSet(gUnescapeCharPattern,             status);
-        gPropSets[URX_ISWORD_SET]               = new UnicodeSet(gIsWordPattern,                   status); 
-        gPropSets[URX_ISSPACE_SET]              = new UnicodeSet(gIsSpacePattern,                  status); 
+        gPropSets[URX_ISWORD_SET]               = new UnicodeSet(gIsWordPattern,                   status);
+        gPropSets[URX_ISSPACE_SET]              = new UnicodeSet(gIsSpacePattern,                  status);
 
         if (U_FAILURE(status)) {
             delete gRuleSets[kRuleSet_rule_char-128];
@@ -171,7 +174,7 @@ RegexCompile::~RegexCompile() {
 //                         script regexcst.pl
 //
 //---------------------------------------------------------------------------------
-void    RegexCompile::compile(                    
+void    RegexCompile::compile(
                          RegexPattern &rxp,          // User level pattern object to receive
                                                      //    the compiled pattern.
                          const UnicodeString &pat,   // Source pat to be compiled.
@@ -285,7 +288,7 @@ void    RegexCompile::compile(
         if (tableEl->fPushState != 0) {
             fStackPtr++;
             if (fStackPtr >= kStackSize) {
-                error(U_BRK_INTERNAL_ERROR);
+                error(U_REGEX_INTERNAL_ERROR);
                 printf("RegexCompile::parse() - state stack overflow.\n");
                 fStackPtr--;
             }
@@ -304,7 +307,7 @@ void    RegexCompile::compile(
             state = fStack[fStackPtr];
             fStackPtr--;
             if (fStackPtr < 0) {
-                error(U_BRK_INTERNAL_ERROR);
+                error(U_REGEX_INTERNAL_ERROR);
                 printf("RegexCompile::compile() - state stack underflow.\n");
                 fStackPtr++;
             }
@@ -358,12 +361,16 @@ UBool RegexCompile::doParseActions(EParseAction action)
         //  Encountering end of pattern also behaves like a close paren,
         //   and forces fixups of the State Save at the beginning of the compiled pattern
         //   and of any OR operations at the top level.
-        // 
+        //
         handleCloseParen();
-        
+        if (fParenStack.size() > 0) {
+            // Missing close paren in pattern.
+            error(U_REGEX_MISMATCHED_PAREN);
+        }
+
         // add the END operation to the compiled pattern.
         fRXPat->fCompiledPat->addElement(URX_BUILD(URX_END, 0), *fStatus);
-        
+
         // Terminate the pattern compilation state machine.
         returnVal = FALSE;
         break;
@@ -405,7 +412,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
 
     case doOpenCaptureParen:
         // Open Paren.
-        //   Compile to a 
+        //   Compile to a
         //      - NOP, which later may be replaced by a save-state if the
         //         parenthesized group gets a * quantifier, followed by
         //      - START_CAPTURE
@@ -430,7 +437,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
 
     case doOpenNonCaptureParen:
         // Open non-caputuring (grouping only) Paren.
-        //   Compile to a 
+        //   Compile to a
         //      - NOP, which later may be replaced by a save-state if the
         //         parenthesized group gets a * quantifier, followed by
         //      - NOP, which may later be replaced by a save-state if there
@@ -440,7 +447,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
             fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
 
             // On the Parentheses stack, start a new frame and add the postions
-            //   of the two NOPs.  
+            //   of the two NOPs.
             fParenStack.push(-1, *fStatus);                               // Begin a new frame.
             fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The first NOP
             fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP
@@ -475,6 +482,10 @@ UBool RegexCompile::doParseActions(EParseAction action)
 
     case doCloseParen:
         handleCloseParen();
+        if (fParenStack.size() <= 0) {
+            //  Extra close paren, or missing open paren.
+            error(U_REGEX_MISMATCHED_PAREN);
+        }
         break;
 
     case doNOP:
@@ -483,11 +494,16 @@ UBool RegexCompile::doParseActions(EParseAction action)
 
     case doBadOpenParenType:
     case doRuleError:
-        error(U_BRK_RULE_SYNTAX);
+        error(U_REGEX_RULE_SYNTAX);
         returnVal = FALSE;
         break;
 
 
+    case doMismatchedParenErr:
+        error(U_REGEX_MISMATCHED_PAREN);
+        returnVal = FALSE;
+        break;
+
     case doPlus:
         //  Normal '+'  compiles to
         //     1.   stuff to be repeated  (already built)
@@ -532,7 +548,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
         //     3. ...
         // Insert the state save into the compiled pattern, and we're done.
         {
-            int32_t   saveStateLoc = blockTopLoc(TRUE);      
+            int32_t   saveStateLoc = blockTopLoc(TRUE);
             int32_t   saveStateOp  = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size());
             fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
         }
@@ -572,9 +588,9 @@ UBool RegexCompile::doParseActions(EParseAction action)
         //       3.   JMP  0
         //       4.   ...
         //
-        { 
+        {
             // location of item #1, the STATE_SAVE
-            int32_t   saveStateLoc = blockTopLoc(TRUE);       
+            int32_t   saveStateLoc = blockTopLoc(TRUE);
 
             // Locate the position in the compiled pattern where the match will continue
             //   after completing the *.   (4 in the comment above)
@@ -599,7 +615,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
         //     3.   STATE_SAVE  2
         //     4    ...
         {
-            int32_t     jmpLoc  = blockTopLoc(TRUE);                   // loc  1. 
+            int32_t     jmpLoc  = blockTopLoc(TRUE);                   // loc  1.
             int32_t     saveLoc = fRXPat->fCompiledPat->size();        // loc  3.
             int32_t     jmpOp   = URX_BUILD(URX_JMP, saveLoc);
             int32_t     stateSaveOp = URX_BUILD(URX_STATE_SAVE, jmpLoc+1);
@@ -607,7 +623,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
             fRXPat->fCompiledPat->addElement(stateSaveOp, *fStatus);
         }
         break;
-        
+
 
     case doStartString:
         // We've just scanned a single "normal" character from the pattern,
@@ -678,7 +694,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
         //   has only one character, emit the single character token instead.
         {
             int32_t   strLength = fRXPat->fLiteralText.length() - fStringOpStart;
-            U_ASSERT(strLength > 0);  
+            U_ASSERT(strLength > 0);
             int32_t  lastCharIdx = fRXPat->fLiteralText.length()-1;
             lastCharIdx = fRXPat->fLiteralText.getChar32Start(lastCharIdx);
             if (lastCharIdx == fStringOpStart) {
@@ -735,7 +751,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
 
     case doBackslashG:
         fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_G, 0), *fStatus);
-        break;        
+        break;
 
     case doBackslashS:
         fRXPat->fCompiledPat->addElement(
@@ -750,31 +766,31 @@ UBool RegexCompile::doParseActions(EParseAction action)
     case doBackslashW:
         fRXPat->fCompiledPat->addElement(
             URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET | URX_NEG_SET), *fStatus);
-        break;        
+        break;
 
     case doBackslashw:
         fRXPat->fCompiledPat->addElement(
             URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET), *fStatus);
-        break;        
+        break;
 
     case doBackslashX:
         fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_X, 0), *fStatus);
-        break;        
+        break;
 
     case doBackslashx:              // \x{abcd}   alternate hex format
-        //  TODO:  implement 
+        //  TODO:  implement
         error(U_REGEX_UNIMPLEMENTED);
         break;
-            
+
 
 
     case doBackslashZ:
         fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus);
-        break;        
+        break;
 
     case doBackslashz:
         fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_Z, 0), *fStatus);
-        break;        
+        break;
 
     case doExit:
         returnVal = FALSE;
@@ -806,12 +822,23 @@ UBool RegexCompile::doParseActions(EParseAction action)
         break;
 
     case doNamedChar:            // \N{NAMED_CHAR}
-        //  TODO:  implement 
+        //  TODO:  implement
         error(U_REGEX_UNIMPLEMENTED);
         break;
-            
+
+    case doMatchMode:   //  (?i)    and similar
+        // TODO:  implement
+        error(U_REGEX_UNIMPLEMENTED);
+        break;
+
+    case doNotImplementedError:
+        // TODO:  get rid of this once everything is implemented.
+        error(U_REGEX_UNIMPLEMENTED);
+        break;
+
+
     default:
-        error(U_BRK_INTERNAL_ERROR);
+        error(U_REGEX_INTERNAL_ERROR);
         returnVal = FALSE;
         break;
     }
@@ -838,7 +865,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
 //------------------------------------------------------------------------------
 int32_t   RegexCompile::blockTopLoc(UBool reserveLoc) {
     int32_t   theLoc;
-    if (fRXPat->fCompiledPat->size() == fMatchCloseParen)    
+    if (fRXPat->fCompiledPat->size() == fMatchCloseParen)
     {
         // The item just processed is a parenthesized block.
         theLoc = fMatchOpenParen;   // A slot is already reserved for us.
@@ -878,8 +905,11 @@ int32_t   RegexCompile::blockTopLoc(UBool reserveLoc) {
 void  RegexCompile::handleCloseParen() {
     int32_t   patIdx;
     int32_t   patOp;
-    U_ASSERT(fParenStack.size() >= 1);
-    
+    if (fParenStack.size() <= 0) {
+        error(U_REGEX_MISMATCHED_PAREN);
+        return;
+    }
+
     // Fixup any operations within the just-closed parenthesized group
     //    that need to reference the end of the (block).
     //    (The first one on popped from the stack is an unused slot for
@@ -896,17 +926,17 @@ void  RegexCompile::handleCloseParen() {
         fRXPat->fCompiledPat->setElementAt(patOp, patIdx);
         fMatchOpenParen     = patIdx;
     }
-    
+
     // DO any additional fixups, depending on the specific kind of
     // parentesized grouping this is
-    
+
     switch (patIdx) {
     case -1:
         // No additional fixups required.
         //   This is the case with most kinds of groupings.
         break;
     case -2:
-        // Capturing Parentheses.  
+        // Capturing Parentheses.
         //   Insert a End Capture op into the pattern.
         //   Grab the group number from the start capture op
         //      and put it into the end-capture op.
@@ -1039,7 +1069,7 @@ UChar32  RegexCompile::nextCharLL() {
         fLineNum++;
         fCharNum=0;
         if (fQuoteMode) {
-            error(U_BRK_NEW_LINE_IN_QUOTED_STRING);
+            error(U_REGEX_RULE_SYNTAX);
             fQuoteMode = FALSE;
         }
     }
@@ -1120,7 +1150,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
         //  Use UnicodeString::unescapeAt() to handle those that it can.
         //  Otherwise just return the '\', and let the pattern parser deal with it.
         //
-        int32_t startX = fNextIndex;  // start and end positions of the 
+        int32_t startX = fNextIndex;  // start and end positions of the
         int32_t endX   = fNextIndex;  //   sequence following the '\'
         if (c.fChar == chBackSlash) {
             if (gUnescapeCharSet->contains(peekCharLL())) {
@@ -1148,7 +1178,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
 //
 //             The scan position is normally under the control of the state machine
 //             that controls pattern parsing.  UnicodeSets, however, are parsed by
-//             the UnicodeSet constructor, not by the Regex pattern parser.  
+//             the UnicodeSet constructor, not by the Regex pattern parser.
 //
 //---------------------------------------------------------------------------------
 UnicodeSet *RegexCompile::scanSet() {
@@ -1193,7 +1223,7 @@ UnicodeSet *RegexCompile::scanSet() {
 //---------------------------------------------------------------------------------
 //
 //  scanProp   Construct a UnicodeSet from the text at the current scan
-//             position, which will be of the form \p{whaterver} 
+//             position, which will be of the form \p{whaterver}
 //
 //             The scan position will be at the 'p' or 'P'.  On return
 //             the scan position should be just after the '}'
@@ -1240,6 +1270,5 @@ UnicodeSet *RegexCompile::scanProp() {
     return uset;
 };
 
-
 U_NAMESPACE_END
-
+#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
diff --git a/icu4c/source/i18n/regexcmp.h b/icu4c/source/i18n/regexcmp.h
index b023d885b18..fe096b72400 100644
--- a/icu4c/source/i18n/regexcmp.h
+++ b/icu4c/source/i18n/regexcmp.h
@@ -4,8 +4,10 @@
 //  Copyright (C) 2002, International Business Machines Corporation and others.
 //  All Rights Reserved.
 //
-//  This file contains declarations for the class RegexCompile and for compiled
-//  regular expression data format
+//  This file contains declarations for the class RegexCompile
+//
+//  This class is internal to the regular expression implementation.
+//  For the public Regular Expression API, see the file "unicode/regex.h"
 //
 
 
@@ -13,6 +15,8 @@
 #define RBBISCAN_H
 
 #include "unicode/utypes.h"
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
 #include "unicode/uobject.h"
 #include "unicode/uniset.h"
 #include "unicode/parseerr.h"
@@ -28,12 +32,7 @@ static const UBool REGEX_DEBUG = TRUE;
 
 //--------------------------------------------------------------------------------
 //
-//  class RegexCompile    does the lowest level, character-at-a-time
-//                        scanning of a regular expression.  
-//
-//                        The output of the scanner is a tokenized form
-//                        of the RE, plus prebuilt UnicodeSet objects for each
-//                        set of charcters that is referenced.
+//  class RegexCompile    Contains the regular expression compiler.
 //
 //--------------------------------------------------------------------------------
 static const int    kStackSize = 100;               // The size of the state stack for
@@ -161,5 +160,5 @@ private:
 };
 
 U_NAMESPACE_END
-
-#endif
+#endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
+#endif   // RBBISCAN_H
diff --git a/icu4c/source/i18n/regexcst.h b/icu4c/source/i18n/regexcst.h
index 4776f63e5c1..9946301cbce 100644
--- a/icu4c/source/i18n/regexcst.h
+++ b/icu4c/source/i18n/regexcst.h
@@ -24,7 +24,6 @@ U_NAMESPACE_BEGIN
 enum Regex_PatternParseAction {
     doCloseParen,
     doProperty,
-    doTagValue,
     doOrOperator,
     doOpenCaptureParen,
     doBadOpenParenType,
@@ -35,6 +34,7 @@ enum Regex_PatternParseAction {
     doNamedChar,
     doBackslashw,
     doPossesiveStar,
+    doMismatchedParenErr,
     doOpenLookBehind,
     doBackslashx,
     doBackslashz,
@@ -43,6 +43,7 @@ enum Regex_PatternParseAction {
     doEnterQuoteMode,
     doPossesivePlus,
     doNGStar,
+    doMatchMode,
     doOpenLookAheadNeg,
     doPlus,
     doOpenNonCaptureParen,
@@ -51,14 +52,11 @@ enum Regex_PatternParseAction {
     doNGPlus,
     doPatFinish,
     doBackslashD,
-    doIntervalMinValue,
-    doIntervalDigit,
     doPossesiveOpt,
     doBackslashG,
     doOpt,
     doOpenAtomicParen,
     doBackslashS,
-    doNumberExpectedError,
     doStringChar,
     doOpenLookAhead,
     doBackRef,
@@ -74,6 +72,7 @@ enum Regex_PatternParseAction {
     doBackslashb,
     doEndString,
     doBackslashd,
+    doNotImplementedError,
     doOpenLookBehindNeg,
     doSplitString,
     rbbiLastAction};
@@ -100,13 +99,13 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doStartString, 254, 13,0,  TRUE}     //  3      term
     , {doStartString, 130, 13,0,  TRUE}     //  4 
     , {doScanUnicodeSet, 91 /* [ */, 20,0,  TRUE}     //  5 
-    , {doNOP, 40 /* ( */, 27, 20, TRUE}     //  6 
+    , {doNOP, 40 /* ( */, 28, 20, TRUE}     //  6 
     , {doDotAny, 46 /* . */, 20,0,  TRUE}     //  7 
     , {doCaret, 94 /* ^ */, 3,0,  TRUE}     //  8 
     , {doDollar, 36 /* $ */, 3,0,  TRUE}     //  9 
-    , {doNOP, 92 /* \ */, 60,0,  TRUE}     //  10 
+    , {doNOP, 92 /* \ */, 67,0,  TRUE}     //  10 
     , {doNOP, 253, 2,0,  FALSE}     //  11 
-    , {doRuleError, 255, 80,0,  FALSE}     //  12 
+    , {doRuleError, 255, 87,0,  FALSE}     //  12 
     , {doStringChar, 254, 13,0,  TRUE}     //  13      string
     , {doStringChar, 130, 13,0,  TRUE}     //  14 
     , {doSplitString, 63 /* ? */, 20,0,  FALSE}     //  15 
@@ -114,67 +113,74 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doSplitString, 42 /* * */, 20,0,  FALSE}     //  17 
     , {doSplitString, 123 /* { */, 20,0,  FALSE}     //  18 
     , {doEndString, 255, 20,0,  FALSE}     //  19 
-    , {doNOP, 42 /* * */, 41,0,  TRUE}     //  20      expr-quant
-    , {doNOP, 43 /* + */, 44,0,  TRUE}     //  21 
-    , {doNOP, 63 /* ? */, 47,0,  TRUE}     //  22 
-    , {doNOP, 255, 24,0,  FALSE}     //  23 
-    , {doOrOperator, 124 /* | */, 3,0,  TRUE}     //  24      expr-cont
-    , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  25 
-    , {doNOP, 255, 3,0,  FALSE}     //  26 
-    , {doNOP, 63 /* ? */, 29,0,  TRUE}     //  27      open-paren
-    , {doOpenCaptureParen, 255, 3, 20, FALSE}     //  28 
-    , {doOpenNonCaptureParen, 58 /* : */, 3, 20, TRUE}     //  29      open-paren-extended
-    , {doOpenAtomicParen, 62 /* > */, 3, 20, TRUE}     //  30 
-    , {doOpenLookAhead, 61 /* = */, 3, 24, TRUE}     //  31 
-    , {doOpenLookAheadNeg, 33 /* ! */, 3, 24, TRUE}     //  32 
-    , {doNOP, 60 /* < */, 36,0,  TRUE}     //  33 
-    , {doNOP, 35 /* # */, 39,0,  TRUE}     //  34 
-    , {doBadOpenParenType, 255, 80,0,  FALSE}     //  35 
-    , {doOpenLookBehind, 61 /* = */, 3, 24, TRUE}     //  36      open-paren-lookbehind
-    , {doOpenLookBehindNeg, 33 /* ! */, 3, 24, TRUE}     //  37 
-    , {doBadOpenParenType, 255, 80,0,  FALSE}     //  38 
-    , {doNOP, 41 /* ) */, 3,0,  TRUE}     //  39      paren-comment
-    , {doNOP, 255, 39,0,  TRUE}     //  40 
-    , {doNGStar, 63 /* ? */, 24,0,  TRUE}     //  41      quant-star
-    , {doPossesiveStar, 43 /* + */, 24,0,  TRUE}     //  42 
-    , {doStar, 255, 24,0,  FALSE}     //  43 
-    , {doNGPlus, 63 /* ? */, 24,0,  TRUE}     //  44      quant-plus
-    , {doPossesivePlus, 43 /* + */, 24,0,  TRUE}     //  45 
-    , {doPlus, 255, 24,0,  FALSE}     //  46 
-    , {doNGOpt, 63 /* ? */, 24,0,  TRUE}     //  47      quant-opt
-    , {doPossesiveOpt, 43 /* + */, 24,0,  TRUE}     //  48 
-    , {doOpt, 255, 24,0,  FALSE}     //  49 
-    , {doNOP, 129, 50,0,  TRUE}     //  50      interval-open
-    , {doIntervalMinValue, 128, 53,0,  FALSE}     //  51 
-    , {doNumberExpectedError, 255, 80,0,  FALSE}     //  52 
-    , {doNOP, 129, 57,0,  TRUE}     //  53      interval-value
-    , {doNOP, 125 /* } */, 57,0,  FALSE}     //  54 
-    , {doIntervalDigit, 128, 53,0,  TRUE}     //  55 
-    , {doNumberExpectedError, 255, 80,0,  FALSE}     //  56 
-    , {doNOP, 129, 57,0,  TRUE}     //  57      interval-close
-    , {doTagValue, 125 /* } */, 24,0,  TRUE}     //  58 
-    , {doNumberExpectedError, 255, 80,0,  FALSE}     //  59 
-    , {doBackslashA, 65 /* A */, 3,0,  TRUE}     //  60      backslash
-    , {doBackslashB, 66 /* B */, 3,0,  TRUE}     //  61 
-    , {doBackslashb, 98 /* b */, 3,0,  TRUE}     //  62 
-    , {doBackslashd, 100 /* d */, 20,0,  TRUE}     //  63 
-    , {doBackslashD, 68 /* D */, 20,0,  TRUE}     //  64 
-    , {doBackslashG, 71 /* G */, 3,0,  TRUE}     //  65 
-    , {doNamedChar, 78 /* N */, 20,0,  TRUE}     //  66 
-    , {doProperty, 112 /* p */, 20,0,  FALSE}     //  67 
-    , {doProperty, 80 /* P */, 20,0,  FALSE}     //  68 
-    , {doEnterQuoteMode, 81 /* Q */, 3,0,  TRUE}     //  69 
-    , {doBackslashS, 83 /* S */, 20,0,  TRUE}     //  70 
-    , {doBackslashs, 115 /* s */, 20,0,  TRUE}     //  71 
-    , {doBackslashW, 87 /* W */, 20,0,  TRUE}     //  72 
-    , {doBackslashw, 119 /* w */, 20,0,  TRUE}     //  73 
-    , {doBackslashX, 88 /* X */, 20,0,  TRUE}     //  74 
-    , {doBackslashx, 120 /* x */, 20,0,  TRUE}     //  75 
-    , {doBackslashZ, 90 /* Z */, 3,0,  TRUE}     //  76 
-    , {doBackslashz, 122 /* z */, 3,0,  TRUE}     //  77 
-    , {doBackRef, 128, 20,0,  TRUE}     //  78 
-    , {doStartString, 255, 13,0,  TRUE}     //  79 
-    , {doExit, 255, 80,0,  TRUE}     //  80      errorDeath
+    , {doNOP, 42 /* * */, 56,0,  TRUE}     //  20      expr-quant
+    , {doNOP, 43 /* + */, 59,0,  TRUE}     //  21 
+    , {doNOP, 63 /* ? */, 62,0,  TRUE}     //  22 
+    , {doNOP, 123 /* { */, 65,0,  TRUE}     //  23 
+    , {doNOP, 255, 25,0,  FALSE}     //  24 
+    , {doOrOperator, 124 /* | */, 3,0,  TRUE}     //  25      expr-cont
+    , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  26 
+    , {doNOP, 255, 3,0,  FALSE}     //  27 
+    , {doNOP, 63 /* ? */, 30,0,  TRUE}     //  28      open-paren
+    , {doOpenCaptureParen, 255, 3, 20, FALSE}     //  29 
+    , {doOpenNonCaptureParen, 58 /* : */, 3, 20, TRUE}     //  30      open-paren-extended
+    , {doOpenAtomicParen, 62 /* > */, 3, 20, TRUE}     //  31 
+    , {doOpenLookAhead, 61 /* = */, 3, 25, TRUE}     //  32 
+    , {doOpenLookAheadNeg, 33 /* ! */, 3, 25, TRUE}     //  33 
+    , {doNOP, 60 /* < */, 42,0,  TRUE}     //  34 
+    , {doNOP, 35 /* # */, 45,0,  TRUE}     //  35 
+    , {doMatchMode, 105 /* i */, 48,0,  TRUE}     //  36 
+    , {doMatchMode, 120 /* x */, 48,0,  TRUE}     //  37 
+    , {doMatchMode, 115 /* s */, 48,0,  TRUE}     //  38 
+    , {doMatchMode, 109 /* m */, 48,0,  TRUE}     //  39 
+    , {doMatchMode, 45 /* - */, 48,0,  TRUE}     //  40 
+    , {doBadOpenParenType, 255, 87,0,  FALSE}     //  41 
+    , {doOpenLookBehind, 61 /* = */, 3, 25, TRUE}     //  42      open-paren-lookbehind
+    , {doOpenLookBehindNeg, 33 /* ! */, 3, 25, TRUE}     //  43 
+    , {doBadOpenParenType, 255, 87,0,  FALSE}     //  44 
+    , {doNOP, 41 /* ) */, 3,0,  TRUE}     //  45      paren-comment
+    , {doMismatchedParenErr, 253, 87,0,  FALSE}     //  46 
+    , {doNOP, 255, 45,0,  TRUE}     //  47 
+    , {doMatchMode, 105 /* i */, 48,0,  TRUE}     //  48      paren-flag
+    , {doMatchMode, 115 /* s */, 48,0,  TRUE}     //  49 
+    , {doMatchMode, 109 /* m */, 48,0,  TRUE}     //  50 
+    , {doMatchMode, 120 /* x */, 48,0,  TRUE}     //  51 
+    , {doMatchMode, 45 /* - */, 48,0,  TRUE}     //  52 
+    , {doNOP, 41 /* ) */, 3,0,  TRUE}     //  53 
+    , {doOpenNonCaptureParen, 58 /* : */, 3, 20, TRUE}     //  54 
+    , {doNOP, 255, 87,0,  FALSE}     //  55 
+    , {doNGStar, 63 /* ? */, 25,0,  TRUE}     //  56      quant-star
+    , {doPossesiveStar, 43 /* + */, 25,0,  TRUE}     //  57 
+    , {doStar, 255, 25,0,  FALSE}     //  58 
+    , {doNGPlus, 63 /* ? */, 25,0,  TRUE}     //  59      quant-plus
+    , {doPossesivePlus, 43 /* + */, 25,0,  TRUE}     //  60 
+    , {doPlus, 255, 25,0,  FALSE}     //  61 
+    , {doNGOpt, 63 /* ? */, 25,0,  TRUE}     //  62      quant-opt
+    , {doPossesiveOpt, 43 /* + */, 25,0,  TRUE}     //  63 
+    , {doOpt, 255, 25,0,  FALSE}     //  64 
+    , {doNOP, 129, 65,0,  TRUE}     //  65      interval-open
+    , {doNotImplementedError, 255, 87,0,  FALSE}     //  66 
+    , {doBackslashA, 65 /* A */, 3,0,  TRUE}     //  67      backslash
+    , {doBackslashB, 66 /* B */, 3,0,  TRUE}     //  68 
+    , {doBackslashb, 98 /* b */, 3,0,  TRUE}     //  69 
+    , {doBackslashd, 100 /* d */, 20,0,  TRUE}     //  70 
+    , {doBackslashD, 68 /* D */, 20,0,  TRUE}     //  71 
+    , {doBackslashG, 71 /* G */, 3,0,  TRUE}     //  72 
+    , {doNamedChar, 78 /* N */, 20,0,  TRUE}     //  73 
+    , {doProperty, 112 /* p */, 20,0,  FALSE}     //  74 
+    , {doProperty, 80 /* P */, 20,0,  FALSE}     //  75 
+    , {doEnterQuoteMode, 81 /* Q */, 3,0,  TRUE}     //  76 
+    , {doBackslashS, 83 /* S */, 20,0,  TRUE}     //  77 
+    , {doBackslashs, 115 /* s */, 20,0,  TRUE}     //  78 
+    , {doBackslashW, 87 /* W */, 20,0,  TRUE}     //  79 
+    , {doBackslashw, 119 /* w */, 20,0,  TRUE}     //  80 
+    , {doBackslashX, 88 /* X */, 20,0,  TRUE}     //  81 
+    , {doBackslashx, 120 /* x */, 20,0,  TRUE}     //  82 
+    , {doBackslashZ, 90 /* Z */, 3,0,  TRUE}     //  83 
+    , {doBackslashz, 122 /* z */, 3,0,  TRUE}     //  84 
+    , {doBackRef, 128, 20,0,  TRUE}     //  85 
+    , {doStartString, 255, 13,0,  TRUE}     //  86 
+    , {doExit, 255, 87,0,  TRUE}     //  87      errorDeath
  };
 static const char *RegexStateNames[] = {    0,
      "start",
@@ -199,6 +205,7 @@ static const char *RegexStateNames[] = {    0,
      "expr-quant",
     0,
     0,
+    0,
     0,
      "expr-cont",
     0,
@@ -211,11 +218,25 @@ static const char *RegexStateNames[] = {    0,
     0,
     0,
     0,
+    0,
+    0,
+    0,
+    0,
+    0,
     0,
      "open-paren-lookbehind",
     0,
     0,
      "paren-comment",
+    0,
+    0,
+     "paren-flag",
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
     0,
      "quant-star",
     0,
@@ -227,14 +248,6 @@ static const char *RegexStateNames[] = {    0,
     0,
     0,
      "interval-open",
-    0,
-    0,
-     "interval-value",
-    0,
-    0,
-    0,
-     "interval-close",
-    0,
     0,
      "backslash",
     0,
diff --git a/icu4c/source/i18n/regexcst.txt b/icu4c/source/i18n/regexcst.txt
index 5438cdacf67..1a39d5b4335 100644
--- a/icu4c/source/i18n/regexcst.txt
+++ b/icu4c/source/i18n/regexcst.txt
@@ -107,7 +107,8 @@ string:
 expr-quant:
     '*'                  n  quant-star                       
     '+'                  n  quant-plus                              
-    '?'                  n  quant-opt        
+    '?'                  n  quant-opt     
+    '{'                  n  interval-open
     default                 expr-cont 
     
     
@@ -136,6 +137,11 @@ open-paren-extended:
     '!'                  n  term            ^expr-cont              doOpenLookAheadNeg     #  (?!
     '<'                  n  open-paren-lookbehind
     '#'                  n  paren-comment
+    'i'                  n  paren-flag                              doMatchMode
+    'x'                  n  paren-flag                              doMatchMode
+    's'                  n  paren-flag                              doMatchMode
+    'm'                  n  paren-flag                              doMatchMode
+    '-'                  n  paren-flag                              doMatchMode
     default                 errorDeath                              doBadOpenParenType
     
 open-paren-lookbehind:
@@ -150,7 +156,21 @@ open-paren-lookbehind:
 #
 paren-comment:
     ')'                  n  term
+    eof		            errorDeath                              doMismatchedParenErr
     default              n  paren-comment
+
+#
+#  paren-flag    Scanned a (?ismx-ismx  flag setting thing
+#                TODO:  this is not fully implemented yet.
+paren-flag:
+    'i'                  n  paren-flag                              doMatchMode
+    's'                  n  paren-flag                              doMatchMode
+    'm'                  n  paren-flag                              doMatchMode
+    'x'                  n  paren-flag                              doMatchMode
+    '-'                  n  paren-flag                              doMatchMode
+    ')'                  n  term
+    ':'                  n  term              ^expr-quant           doOpenNonCaptureParen
+    default                 errorDeath
     
     
 #
@@ -189,19 +209,8 @@ quant-opt:
 #
 interval-open:
     white_space          n  interval-open
-    digit_char              interval-value                          doIntervalMinValue
-    default                 errorDeath                              doNumberExpectedError
+    default                 errorDeath                              doNotImplementedError
     
-interval-value:
-    white_space          n  interval-close
-    '}'                     interval-close
-    digit_char           n  interval-value                          doIntervalDigit
-    default                 errorDeath                              doNumberExpectedError
-    
-interval-close:
-    white_space          n  interval-close
-    '}'                  n  expr-cont                               doTagValue
-    default                 errorDeath                              doNumberExpectedError
     
     
     
diff --git a/icu4c/source/i18n/regeximp.h b/icu4c/source/i18n/regeximp.h
index c21bd05a9f2..5b8f1e0df43 100644
--- a/icu4c/source/i18n/regeximp.h
+++ b/icu4c/source/i18n/regeximp.h
@@ -4,9 +4,9 @@
 //
 //   file:  regeximp.h
 //
-//           ICU Regular Expressions, declarations of internal implementation types
-//           and constants that are common between the pattern compiler and the 
-//           runtime execution engine.
+//           ICU Regular Expressions,
+//               Definitions of constant values used in the compiled form of
+//               a regular expression pattern.
 //
 
 #ifndef _REGEXIMP_H
@@ -19,7 +19,7 @@
 //
 static const uint32_t     URX_UNUSED1       = 1;
 static const uint32_t     URX_END           = 2;
-static const uint32_t     URX_ONECHAR       = 3;
+static const uint32_t     URX_ONECHAR       = 3;    // Value field is the 21 bit unicode char to match
 static const uint32_t     URX_STRING        = 4;    // Value field is index of string start
 static const uint32_t     URX_STRING_LEN    = 5;    // Value field is string length (code units)
 static const uint32_t     URX_STATE_SAVE    = 6;    // Value field is pattern position to push
@@ -55,7 +55,7 @@ static const uint32_t     URX_DOLLAR        = 24;   // Also for \Z
 
                 
 //
-//  Access to Unicode Sets for composite properties
+//  Access to Unicode Sets for Perl-like composite character properties
 //     The sets are accessed by the match engine for things like \w (word boundary)
 //     
 static const uint32_t     URX_ISWORD_SET  = 1;
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp
index f5d9ec1bfc8..25193e7c3f1 100644
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -1,6 +1,9 @@
 //
 //  file:  rematch.cpp    
 //
+//         Contains the implementation of class RegexMatcher,
+//         which is one of the main API classes for the ICU regular expression package.
+//
 /*
 **********************************************************************
 *   Copyright (C) 2002 International Business Machines Corporation   *
@@ -9,6 +12,8 @@
 */
 
 #include "unicode/utypes.h"
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
 #include "unicode/regex.h"
 #include "unicode/uniset.h"
 #include "unicode/uchar.h"
@@ -443,10 +448,11 @@ int32_t RegexMatcher::start(int group, UErrorCode &err) const {
 //   isWordBoundary 
 //                     in perl, "xab..cd..", \b is true at positions 0,3,5,7
 //                     For us,
-//                       If the current char is a combining mark, \b is FALSE
-//                       Scan backwards to the first non-combining char
-//                       Pos is a boundary if the current and previous chars are
-//                            opposite in membership in \w set
+//                       If the current char is a combining mark,
+//                          \b is FALSE.
+//                       Else Scan backwards to the first non-combining char.
+//                            We are at a boundary if the this char and the original chars are
+//                               opposite in membership in \w set
 //
 //--------------------------------------------------------------------------------
 UBool RegexMatcher::isWordBoundary(int32_t pos) {
@@ -486,27 +492,6 @@ UBool RegexMatcher::isWordBoundary(int32_t pos) {
 }
 
 
-//--------------------------------------------------------------------------------
-//
-//    getCaptureText    We have encountered a '\' that might preceed a
-//                      capture group specification. 
-//                      If a valid capture group number follows the '\', 
-//                      return the indicies to the start & end of the captured
-//                      text, and update the patIdx to the position following the
-//                      \n sequence.
-//
-//                      This function is used during find and replace operations when
-//                      processing caputure references in the replacement text.
-//
-//--------------------------------------------------------------------------------
-UBool  RegexMatcher::getCaptureText(const UnicodeString &rep,
-                                int32_t &repIdx,
-                                int32_t &textStart,
-                                int32_t &textEnd)
-{
-    return FALSE;
-}
-
 //--------------------------------------------------------------------------------
 //
 //     backTrack    Within the match engine, this function is called when
@@ -915,10 +900,9 @@ breakFromLoop:
 
 
 
-
-
 const char RegexMatcher::fgClassID = 0;
 
 U_NAMESPACE_END
 
+#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
 
diff --git a/icu4c/source/i18n/repattrn.cpp b/icu4c/source/i18n/repattrn.cpp
index b28c0a90a75..1fdf37f778c 100644
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@@ -9,6 +9,9 @@
 */
 
 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
 #include "unicode/regex.h"
 #include "uassert.h"
 #include "uvector.h"
@@ -66,6 +69,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     fBadState         = other.fBadState;
     fNumCaptureGroups = other.fNumCaptureGroups;
     fMaxCaptureDigits = other.fMaxCaptureDigits;
+    fStaticSets       = other.fStaticSets;    
     if (fBadState) {
         return *this;
     }
@@ -110,6 +114,7 @@ void RegexPattern::init() {
     fBadState         = FALSE;
     fNumCaptureGroups = 0;
     fMaxCaptureDigits = 1;     // TODO:  calculate for real.
+    fStaticSets       = NULL;
     fMatcher          = NULL;
     
     UErrorCode status=U_ZERO_ERROR;
@@ -384,15 +389,6 @@ int32_t  RegexPattern::split(const UnicodeString &input,
 
 
 
-//---------------------------------------------------------------------
-//
-//   hashcode
-//
-//---------------------------------------------------------------------
-int32_t   RegexPattern::hashCode(void) const {
-    return 0;           // TODO:   Do something better here
-};
-
 
 //---------------------------------------------------------------------
 //
@@ -512,8 +508,8 @@ breakFromLoop:
     printf("\n\n");
 };
 
-
-
 const char RegexPattern::fgClassID = 0;
 
+
 U_NAMESPACE_END
+#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h
index 412210f0ed4..0b10329dfb6 100644
--- a/icu4c/source/i18n/unicode/regex.h
+++ b/icu4c/source/i18n/unicode/regex.h
@@ -9,6 +9,9 @@
 #define REGEX_H
 
 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
 #include "unicode/uobject.h"
 #include "unicode/unistr.h"
 #include "unicode/parseerr.h"
@@ -69,9 +72,6 @@ public:
     RegexPattern  &operator =(const RegexPattern &other);
     virtual RegexPattern  *clone() const;
 
-    // TODO:  Do we really want a hashCode function on this class?
-    virtual int32_t         hashCode(void) const;
-    
     
    /**
     *     Compiles the given regular expression into a pattern 
@@ -428,10 +428,6 @@ private:
     //
     void         MatchAt(int32_t startIdx, UErrorCode &status);   
     inline  void backTrack(int32_t &inputIdx, int32_t &patIdx);
-    UBool        getCaptureText(const UnicodeString &rep,
-                                int32_t &repIdx,
-                                int32_t &textStart,
-                                int32_t &textEnd);
     UBool        isWordBoundary(int32_t pos);         // perform the \b test
 
 
@@ -448,7 +444,6 @@ private:
 
 };  
 
-
-
 U_NAMESPACE_END
+#endif  // UCONFIG_NO_REGULAR_EXPRESSIONS
 #endif
diff --git a/icu4c/source/test/intltest/itmajor.cpp b/icu4c/source/test/intltest/itmajor.cpp
index 08e009bb878..eb9b2e1d7c2 100644
--- a/icu4c/source/test/intltest/itmajor.cpp
+++ b/icu4c/source/test/intltest/itmajor.cpp
@@ -70,11 +70,13 @@ void MajorTestLevel::runIndexedTest( int32_t index, UBool exec, const char* &nam
                 break;
 
         case 3: name = "regex";
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
                 if (exec) {
                     logln("TestSuite Regex---"); logln();
                     RegexTest test;
                     callTest( test, par );
                 }
+#endif
                 break;
 
         case 4: name = "format";
diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp
index 492ac16a46a..f551d764de1 100644
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@@ -11,6 +11,8 @@
 //
 
 #include "unicode/utypes.h"
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
 #include "unicode/uchar.h"
 #include "intltest.h"
 #include "regextst.h"
@@ -1195,8 +1197,38 @@ void RegexTest::Errors() {
     REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED);
 
     // Missing close parentheses
-    //REGEX_ERR("Comment (?# with no close", 1, 0, U_REGEX_INTERNAL_ERROR);
+    REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN);
+    REGEX_ERR("Capturing Parenthesis(...", 1, 25, U_REGEX_MISMATCHED_PAREN);
+    REGEX_ERR("Grouping only parens (?: blah blah", 1, 34, U_REGEX_MISMATCHED_PAREN);
+
+    // Extra close paren
+    REGEX_ERR("Grouping only parens (?: blah)) blah", 1, 31, U_REGEX_MISMATCHED_PAREN);
+    REGEX_ERR(")))))))", 1, 1, U_REGEX_RULE_SYNTAX);
+    REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN);
+
+    // Flag settings not yet implemented
+    REGEX_ERR("(?i:stuff*)", 1, 3, U_REGEX_UNIMPLEMENTED);
+    REGEX_ERR("(?-si) stuff", 1, 3, U_REGEX_UNIMPLEMENTED);
+
+    // Look-ahead, Look-behind
+    REGEX_ERR("abc(?=xyz).*", 1, 6, U_REGEX_UNIMPLEMENTED);    // look-ahead
+    REGEX_ERR("abc(?!xyz).*", 1, 6, U_REGEX_UNIMPLEMENTED);    // negated look-ahead
+    REGEX_ERR("abc(?<=xyz).*", 1, 7, U_REGEX_UNIMPLEMENTED);   // look-behind
+    REGEX_ERR("abc(?<!xyz).*", 1, 7, U_REGEX_UNIMPLEMENTED);   // negated look-behind
+    REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX);       // illegal construct
+
+    // Atomic Grouping
+    REGEX_ERR("abc(?>xyz)", 1, 6, U_REGEX_UNIMPLEMENTED);
+
+    // {Numeric Quantifiers}
+    REGEX_ERR("abc{4}", 1, 5, U_REGEX_UNIMPLEMENTED);
+
+
+    // Quantifiers are allowed only after something that can be quantified.
+    REGEX_ERR("+", 1, 1, U_REGEX_RULE_SYNTAX);
+    REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX);
+    REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX);
 }
 
-
+#endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
 
diff --git a/icu4c/source/test/intltest/regextst.h b/icu4c/source/test/intltest/regextst.h
index 0718535afb0..fb249b6b95f 100644
--- a/icu4c/source/test/intltest/regextst.h
+++ b/icu4c/source/test/intltest/regextst.h
@@ -8,6 +8,8 @@
 #ifndef REGEXTST_H
 #define REGEXTST_H
 
+#include "unicode/utypes.h"
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
 
 #include "intltest.h"
 #include "unicode/regex.h"
@@ -35,4 +37,6 @@ public:
     virtual void regex_err(const char *pat, int32_t errline, int32_t errcol,
                             UErrorCode expectedStatus, int line);
 };
+
+#endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
 #endif