From ce09d8a4bcee3cf83ad34a7cc1ac6759eca19bf0 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Tue, 24 Feb 2015 00:24:59 +0000
Subject: [PATCH] ICU-11393 Regex, add pattern chars R v and h

X-SVN-Rev: 37057
---
 icu4c/source/i18n/regexcmp.cpp          | 113 ++++++
 icu4c/source/i18n/regexcst.h            | 461 +++++++++++++-----------
 icu4c/source/i18n/regexcst.pl           |   4 +-
 icu4c/source/i18n/regexcst.txt          |   9 +
 icu4c/source/i18n/regeximp.h            |  12 +-
 icu4c/source/i18n/rematch.cpp           | 189 ++++++++--
 icu4c/source/i18n/repattrn.cpp          |   3 +
 icu4c/source/test/testdata/regextst.txt |  72 ++++
 8 files changed, 611 insertions(+), 252 deletions(-)

diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp
index cd6ca2b2467..e518e84cd35 100644
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@@ -1188,6 +1188,21 @@ UBool RegexCompile::doParseActions(int32_t action)
         appendOp(URX_BACKSLASH_G, 0);
         break;
 
+    case doBackslashH:
+        fixLiterals(FALSE);
+        appendOp(URX_BACKSLASH_H, 1);
+        break;
+
+    case doBackslashh:
+        fixLiterals(FALSE);
+        appendOp(URX_BACKSLASH_H, 0);
+        break;
+
+    case doBackslashR:
+        fixLiterals(FALSE);
+        appendOp(URX_BACKSLASH_R, 0);
+        break;
+
     case doBackslashS:
         fixLiterals(FALSE);
         appendOp(URX_STAT_SETREF_N, URX_ISSPACE_SET);
@@ -1198,6 +1213,16 @@ UBool RegexCompile::doParseActions(int32_t action)
         appendOp(URX_STATIC_SETREF, URX_ISSPACE_SET);
         break;
 
+    case doBackslashV:
+        fixLiterals(FALSE);
+        appendOp(URX_BACKSLASH_V, 1);
+        break;
+
+    case doBackslashv:
+        fixLiterals(FALSE);
+        appendOp(URX_BACKSLASH_V, 0);
+        break;
+
     case doBackslashW:
         fixLiterals(FALSE);
         appendOp(URX_STAT_SETREF_N, URX_ISWORD_SET);
@@ -1548,6 +1573,48 @@ UBool RegexCompile::doParseActions(int32_t action)
             break;
         }
 
+    case doSetBackslash_h:
+        {
+            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
+            UnicodeSet h;
+            h.applyIntPropertyValue(UCHAR_GENERAL_CATEGORY_MASK, U_GC_ZS_MASK, *fStatus);
+            h.add((UChar32)9);   // Tab
+            set->addAll(h);
+            break;
+        }
+
+    case doSetBackslash_H:
+        {
+            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
+            UnicodeSet h;
+            h.applyIntPropertyValue(UCHAR_GENERAL_CATEGORY_MASK, U_GC_ZS_MASK, *fStatus);
+            h.add((UChar32)9);   // Tab
+            h.complement();
+            set->addAll(h);
+            break;
+        }
+
+    case doSetBackslash_v:
+        {
+            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
+            set->add((UChar32)0x0a, (UChar32)0x0d);  // add range
+            set->add((UChar32)0x85);
+            set->add((UChar32)0x2028, (UChar32)0x2029);
+            break;
+        }
+
+    case doSetBackslash_V:
+        {
+            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
+            UnicodeSet v;
+            v.add((UChar32)0x0a, (UChar32)0x0d);  // add range
+            v.add((UChar32)0x85);
+            v.add((UChar32)0x2028, (UChar32)0x2029);
+            v.complement();
+            set->addAll(v);
+            break;
+        }
+
     case doSetBackslash_w:
         {
             UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
@@ -2749,6 +2816,43 @@ void   RegexCompile::matchStartType() {
             break;
 
 
+        case URX_BACKSLASH_H:
+            // Horiz white space
+            if (currentLen == 0) {
+                UnicodeSet s;
+                s.applyIntPropertyValue(UCHAR_GENERAL_CATEGORY_MASK, U_GC_ZS_MASK, *fStatus);
+                s.add((UChar32)9);   // Tab
+                if (URX_VAL(op) != 0) {
+                    s.complement();
+                }
+                fRXPat->fInitialChars->addAll(s);
+                numInitialStrings += 2;
+            }
+            currentLen++;
+            atStart = FALSE;
+            break;
+
+
+        case URX_BACKSLASH_R:       // Any line ending sequence
+        case URX_BACKSLASH_V:       // Any line ending code point, with optional negation
+            if (currentLen == 0) {
+                UnicodeSet s;
+                s.add((UChar32)0x0a, (UChar32)0x0d);  // add range
+                s.add((UChar32)0x85);
+                s.add((UChar32)0x2028, (UChar32)0x2029);
+                if (URX_VAL(op) != 0) {
+                     // Complement option applies to URX_BACKSLASH_V only.
+                     s.complement();
+                }
+                fRXPat->fInitialChars->addAll(s);
+                numInitialStrings += 2;
+            }
+            currentLen++;
+            atStart = FALSE;
+            break;
+
+
+
         case URX_ONECHAR_I:
             // Case Insensitive Single Character.
             if (currentLen == 0) {
@@ -3137,6 +3241,9 @@ int32_t   RegexCompile::minMatchLength(int32_t start, int32_t end) {
         case URX_STAT_SETREF_N:
         case URX_SETREF:
         case URX_BACKSLASH_D:
+        case URX_BACKSLASH_H:
+        case URX_BACKSLASH_R:
+        case URX_BACKSLASH_V:
         case URX_ONECHAR_I:
         case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
         case URX_DOTANY_ALL:    // . matches one or two.
@@ -3418,6 +3525,9 @@ int32_t   RegexCompile::maxMatchLength(int32_t start, int32_t end) {
         case URX_STAT_SETREF_N:
         case URX_SETREF:
         case URX_BACKSLASH_D:
+        case URX_BACKSLASH_H:
+        case URX_BACKSLASH_R:
+        case URX_BACKSLASH_V:
         case URX_ONECHAR_I:
         case URX_DOTANY_ALL:
         case URX_DOTANY:
@@ -3746,6 +3856,9 @@ void RegexCompile::stripNOPs() {
         case URX_LOOP_C:
         case URX_DOLLAR_D:
         case URX_DOLLAR_MD:
+        case URX_BACKSLASH_H:
+        case URX_BACKSLASH_R:
+        case URX_BACKSLASH_V:
             // These instructions are unaltered by the relocation.
             fRXPat->fCompiledPat->setElementAt(op, dst);
             dst++;
diff --git a/icu4c/source/i18n/regexcst.h b/icu4c/source/i18n/regexcst.h
index e754be4bd1a..3e62485f7c9 100644
--- a/icu4c/source/i18n/regexcst.h
+++ b/icu4c/source/i18n/regexcst.h
@@ -16,108 +16,117 @@ U_NAMESPACE_BEGIN
 //
 // Character classes for regex pattern scanning.
 //
-    static const uint8_t kRuleSet_digit_char = 128;
-    static const uint8_t kRuleSet_ascii_letter = 129;
+    static const uint8_t kRuleSet_ascii_letter = 128;
+    static const uint8_t kRuleSet_digit_char = 129;
     static const uint8_t kRuleSet_rule_char = 130;
 
 
 enum Regex_PatternParseAction {
-    doIntervalUpperDigit,
-    doPossessiveOpt,
-    doOpenLookBehindNeg,
-    doDotAny,
-    doSetBackslash_D,
-    doSetLiteral,
-    doSetBackslash_S,
-    doEscapeError,
-    doSetBackslash_W,
-    doDollar,
-    doBackslashb,
-    doSetOpError,
-    doBackslashG,
-    doPatStart,
-    doMismatchedParenErr,
-    doPossessivePlus,
-    doBackslashX,
-    doSetBackslash_s,
-    doSetBackslash_w,
-    doBackslashW,
-    doBackslashw,
-    doSetMatchMode,
-    doOrOperator,
-    doOpenLookAheadNeg,
-    doOpenLookBehind,
-    doBackslashS,
-    doBeginMatchMode,
-    doNOP,
-    doSetProp,
-    doBackslashA,
-    doIntervalInit,
-    doOpenCaptureParen,
-    doNGPlus,
-    doIntervalError,
-    doSetDifference2,
-    doNGOpt,
-    doEscapedLiteralChar,
-    doSetNegate,
-    doSetBegin,
-    doMatchModeParen,
-    doLiteralChar,
-    doOpt,
-    doSetIntersection2,
-    doBadOpenParenType,
-    doSuppressComments,
-    doCloseParen,
-    doPatFinish,
-    doSetBeginUnion,
-    doSetBackslash_d,
-    doProperty,
-    doNGInterval,
-    doNGStar,
-    doOpenLookAhead,
-    doSetBeginIntersection1,
-    doBeginNamedCapture,
-    doInterval,
-    doMatchMode,
-    doSetNoCloseError,
-    doSetBeginDifference1,
-    doPlus,
-    doBackslashD,
-    doSetLiteralEscaped,
-    doContinueNamedCapture,
-    doSetPosixProp,
-    doBackslashz,
-    doSetNamedRange,
-    doPossessiveStar,
-    doBadModeFlag,
-    doContinueNamedBackRef,
-    doPerlInline,
-    doBackslashd,
-    doOpenNonCaptureParen,
-    doSetEnd,
-    doSetAddDash,
-    doSetFinish,
-    doCaret,
-    doConditionalExpr,
-    doExit,
-    doNamedChar,
-    doSetRange,
-    doPossessiveInterval,
-    doBackslashs,
-    doIntervalSame,
-    doEnterQuoteMode,
-    doOpenAtomicParen,
-    doSetNamedChar,
-    doRuleError,
-    doStar,
-    doSetAddAmp,
-    doBackslashB,
-    doCompleteNamedBackRef,
-    doBackslashZ,
-    doIntevalLowerDigit,
+    doSetBackslash_V,
+    doSetBackslash_h,
     doBeginNamedBackRef,
-    doBackRef,
+    doSetMatchMode,
+    doEnterQuoteMode,
+    doOpenCaptureParen,
+    doContinueNamedCapture,
+    doSetBackslash_d,
+    doBeginMatchMode,
+    doBackslashX,
+    doSetPosixProp,
+    doIntervalError,
+    doSetLiteralEscaped,
+    doSetBackslash_s,
+    doNOP,
+    doBackslashv,
+    doOpenLookBehind,
+    doPatStart,
+    doPossessiveInterval,
+    doOpenAtomicParen,
+    doOpenLookAheadNeg,
+    doBackslashd,
+    doBackslashZ,
+    doIntervalUpperDigit,
     doBadNamedCapture,
+    doSetDifference2,
+    doSetAddAmp,
+    doSetNamedChar,
+    doNamedChar,
+    doSetBackslash_H,
+    doBackslashb,
+    doBackslashz,
+    doSetBeginDifference1,
+    doOpenLookAhead,
+    doMatchModeParen,
+    doBackslashV,
+    doIntevalLowerDigit,
+    doCaret,
+    doSetEnd,
+    doSetNegate,
+    doBackslashS,
+    doOrOperator,
+    doBackslashB,
+    doBackslashw,
+    doBackslashR,
+    doRuleError,
+    doDotAny,
+    doMatchMode,
+    doSetBackslash_W,
+    doNGPlus,
+    doSetBackslash_D,
+    doPossessiveOpt,
+    doSetNamedRange,
+    doConditionalExpr,
+    doBackslashs,
+    doPossessiveStar,
+    doPlus,
+    doBadOpenParenType,
+    doCloseParen,
+    doNGInterval,
+    doSetProp,
+    doBackRef,
+    doSetBeginUnion,
+    doEscapeError,
+    doOpt,
+    doSetBeginIntersection1,
+    doPossessivePlus,
+    doBackslashD,
+    doOpenLookBehindNeg,
+    doSetBegin,
+    doSetIntersection2,
+    doCompleteNamedBackRef,
+    doSetRange,
+    doDollar,
+    doBackslashH,
+    doExit,
+    doNGOpt,
+    doOpenNonCaptureParen,
+    doBackslashA,
+    doSetBackslash_v,
+    doBackslashh,
+    doBadModeFlag,
+    doSetNoCloseError,
+    doIntervalSame,
+    doSetAddDash,
+    doBackslashW,
+    doPerlInline,
+    doSetOpError,
+    doSetLiteral,
+    doPatFinish,
+    doBeginNamedCapture,
+    doEscapedLiteralChar,
+    doLiteralChar,
+    doSuppressComments,
+    doMismatchedParenErr,
+    doNGStar,
+    doSetFinish,
+    doInterval,
+    doBackslashG,
+    doStar,
+    doSetBackslash_w,
+    doSetBackslash_S,
+    doProperty,
+    doContinueNamedBackRef,
+    doIntervalInit,
     rbbiLastAction};
 
 //-------------------------------------------------------------------------------
@@ -140,7 +149,7 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doPatStart, 255, 2,0,  FALSE}     //  1      start
     , {doLiteralChar, 254, 14,0,  TRUE}     //  2      term
     , {doLiteralChar, 130, 14,0,  TRUE}     //  3 
-    , {doSetBegin, 91 /* [ */, 118, 196, TRUE}     //  4 
+    , {doSetBegin, 91 /* [ */, 123, 205, TRUE}     //  4 
     , {doNOP, 40 /* ( */, 27,0,  TRUE}     //  5 
     , {doDotAny, 46 /* . */, 14,0,  TRUE}     //  6 
     , {doCaret, 94 /* ^ */, 14,0,  TRUE}     //  7 
@@ -149,7 +158,7 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doOrOperator, 124 /* | */, 2,0,  TRUE}     //  10 
     , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  11 
     , {doPatFinish, 253, 2,0,  FALSE}     //  12 
-    , {doRuleError, 255, 197,0,  FALSE}     //  13 
+    , {doRuleError, 255, 206,0,  FALSE}     //  13 
     , {doNOP, 42 /* * */, 68,0,  TRUE}     //  14      expr-quant
     , {doNOP, 43 /* + */, 71,0,  TRUE}     //  15 
     , {doNOP, 63 /* ? */, 74,0,  TRUE}     //  16 
@@ -179,15 +188,15 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doBeginMatchMode, 119 /* w */, 53,0,  FALSE}     //  40 
     , {doBeginMatchMode, 120 /* x */, 53,0,  FALSE}     //  41 
     , {doBeginMatchMode, 45 /* - */, 53,0,  FALSE}     //  42 
-    , {doConditionalExpr, 40 /* ( */, 197,0,  TRUE}     //  43 
-    , {doPerlInline, 123 /* { */, 197,0,  TRUE}     //  44 
-    , {doBadOpenParenType, 255, 197,0,  FALSE}     //  45 
+    , {doConditionalExpr, 40 /* ( */, 206,0,  TRUE}     //  43 
+    , {doPerlInline, 123 /* { */, 206,0,  TRUE}     //  44 
+    , {doBadOpenParenType, 255, 206,0,  FALSE}     //  45 
     , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE}     //  46      open-paren-lookbehind
     , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE}     //  47 
-    , {doBeginNamedCapture, 129, 64,0,  FALSE}     //  48 
-    , {doBadOpenParenType, 255, 197,0,  FALSE}     //  49 
+    , {doBeginNamedCapture, 128, 64,0,  FALSE}     //  48 
+    , {doBadOpenParenType, 255, 206,0,  FALSE}     //  49 
     , {doNOP, 41 /* ) */, 255,0,  TRUE}     //  50      paren-comment
-    , {doMismatchedParenErr, 253, 197,0,  FALSE}     //  51 
+    , {doMismatchedParenErr, 253, 206,0,  FALSE}     //  51 
     , {doNOP, 255, 50,0,  TRUE}     //  52 
     , {doMatchMode, 105 /* i */, 53,0,  TRUE}     //  53      paren-flag
     , {doMatchMode, 100 /* d */, 53,0,  TRUE}     //  54 
@@ -199,11 +208,11 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doMatchMode, 45 /* - */, 53,0,  TRUE}     //  60 
     , {doSetMatchMode, 41 /* ) */, 2,0,  TRUE}     //  61 
     , {doMatchModeParen, 58 /* : */, 2, 14, TRUE}     //  62 
-    , {doBadModeFlag, 255, 197,0,  FALSE}     //  63 
-    , {doContinueNamedCapture, 129, 64,0,  TRUE}     //  64      named-capture
-    , {doContinueNamedCapture, 128, 64,0,  TRUE}     //  65 
+    , {doBadModeFlag, 255, 206,0,  FALSE}     //  63 
+    , {doContinueNamedCapture, 128, 64,0,  TRUE}     //  64      named-capture
+    , {doContinueNamedCapture, 129, 64,0,  TRUE}     //  65 
     , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE}     //  66 
-    , {doBadNamedCapture, 255, 197,0,  FALSE}     //  67 
+    , {doBadNamedCapture, 255, 206,0,  FALSE}     //  67 
     , {doNGStar, 63 /* ? */, 20,0,  TRUE}     //  68      quant-star
     , {doPossessiveStar, 43 /* + */, 20,0,  TRUE}     //  69 
     , {doStar, 255, 20,0,  FALSE}     //  70 
@@ -213,15 +222,15 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doNGOpt, 63 /* ? */, 20,0,  TRUE}     //  74      quant-opt
     , {doPossessiveOpt, 43 /* + */, 20,0,  TRUE}     //  75 
     , {doOpt, 255, 20,0,  FALSE}     //  76 
-    , {doNOP, 128, 79,0,  FALSE}     //  77      interval-open
-    , {doIntervalError, 255, 197,0,  FALSE}     //  78 
-    , {doIntevalLowerDigit, 128, 79,0,  TRUE}     //  79      interval-lower
+    , {doNOP, 129, 79,0,  FALSE}     //  77      interval-open
+    , {doIntervalError, 255, 206,0,  FALSE}     //  78 
+    , {doIntevalLowerDigit, 129, 79,0,  TRUE}     //  79      interval-lower
     , {doNOP, 44 /* , */, 83,0,  TRUE}     //  80 
     , {doIntervalSame, 125 /* } */, 86,0,  TRUE}     //  81 
-    , {doIntervalError, 255, 197,0,  FALSE}     //  82 
-    , {doIntervalUpperDigit, 128, 83,0,  TRUE}     //  83      interval-upper
+    , {doIntervalError, 255, 206,0,  FALSE}     //  82 
+    , {doIntervalUpperDigit, 129, 83,0,  TRUE}     //  83      interval-upper
     , {doNOP, 125 /* } */, 86,0,  TRUE}     //  84 
-    , {doIntervalError, 255, 197,0,  FALSE}     //  85 
+    , {doIntervalError, 255, 206,0,  FALSE}     //  85 
     , {doNGInterval, 63 /* ? */, 20,0,  TRUE}     //  86      interval-type
     , {doPossessiveInterval, 43 /* + */, 20,0,  TRUE}     //  87 
     , {doInterval, 255, 20,0,  FALSE}     //  88 
@@ -231,109 +240,118 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
     , {doBackslashd, 100 /* d */, 14,0,  TRUE}     //  92 
     , {doBackslashD, 68 /* D */, 14,0,  TRUE}     //  93 
     , {doBackslashG, 71 /* G */, 2,0,  TRUE}     //  94 
-    , {doNOP, 107 /* k */, 110,0,  TRUE}     //  95 
-    , {doNamedChar, 78 /* N */, 14,0,  FALSE}     //  96 
-    , {doProperty, 112 /* p */, 14,0,  FALSE}     //  97 
-    , {doProperty, 80 /* P */, 14,0,  FALSE}     //  98 
-    , {doEnterQuoteMode, 81 /* Q */, 2,0,  TRUE}     //  99 
-    , {doBackslashS, 83 /* S */, 14,0,  TRUE}     //  100 
-    , {doBackslashs, 115 /* s */, 14,0,  TRUE}     //  101 
-    , {doBackslashW, 87 /* W */, 14,0,  TRUE}     //  102 
-    , {doBackslashw, 119 /* w */, 14,0,  TRUE}     //  103 
-    , {doBackslashX, 88 /* X */, 14,0,  TRUE}     //  104 
-    , {doBackslashZ, 90 /* Z */, 2,0,  TRUE}     //  105 
-    , {doBackslashz, 122 /* z */, 2,0,  TRUE}     //  106 
-    , {doBackRef, 128, 14,0,  TRUE}     //  107 
-    , {doEscapeError, 253, 197,0,  FALSE}     //  108 
-    , {doEscapedLiteralChar, 255, 14,0,  TRUE}     //  109 
-    , {doBeginNamedBackRef, 60 /* < */, 112,0,  TRUE}     //  110      named-backref
-    , {doBadNamedCapture, 255, 197,0,  FALSE}     //  111 
-    , {doContinueNamedBackRef, 129, 114,0,  TRUE}     //  112      named-backref-2
-    , {doBadNamedCapture, 255, 197,0,  FALSE}     //  113 
-    , {doContinueNamedBackRef, 129, 114,0,  TRUE}     //  114      named-backref-3
-    , {doContinueNamedBackRef, 128, 114,0,  TRUE}     //  115 
-    , {doCompleteNamedBackRef, 62 /* > */, 14,0,  TRUE}     //  116 
-    , {doBadNamedCapture, 255, 197,0,  FALSE}     //  117 
-    , {doSetNegate, 94 /* ^ */, 121,0,  TRUE}     //  118      set-open
-    , {doSetPosixProp, 58 /* : */, 123,0,  FALSE}     //  119 
-    , {doNOP, 255, 121,0,  FALSE}     //  120 
-    , {doSetLiteral, 93 /* ] */, 136,0,  TRUE}     //  121      set-open2
-    , {doNOP, 255, 126,0,  FALSE}     //  122 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  123      set-posix
-    , {doNOP, 58 /* : */, 126,0,  FALSE}     //  124 
-    , {doRuleError, 255, 197,0,  FALSE}     //  125 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  126      set-start
-    , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE}     //  127 
-    , {doNOP, 92 /* \ */, 186,0,  TRUE}     //  128 
-    , {doNOP, 45 /* - */, 132,0,  TRUE}     //  129 
-    , {doNOP, 38 /* & */, 134,0,  TRUE}     //  130 
-    , {doSetLiteral, 255, 136,0,  TRUE}     //  131 
-    , {doRuleError, 45 /* - */, 197,0,  FALSE}     //  132      set-start-dash
-    , {doSetAddDash, 255, 136,0,  FALSE}     //  133 
-    , {doRuleError, 38 /* & */, 197,0,  FALSE}     //  134      set-start-amp
-    , {doSetAddAmp, 255, 136,0,  FALSE}     //  135 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  136      set-after-lit
-    , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE}     //  137 
-    , {doNOP, 45 /* - */, 173,0,  TRUE}     //  138 
-    , {doNOP, 38 /* & */, 164,0,  TRUE}     //  139 
-    , {doNOP, 92 /* \ */, 186,0,  TRUE}     //  140 
-    , {doSetNoCloseError, 253, 197,0,  FALSE}     //  141 
-    , {doSetLiteral, 255, 136,0,  TRUE}     //  142 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  143      set-after-set
-    , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE}     //  144 
-    , {doNOP, 45 /* - */, 166,0,  TRUE}     //  145 
-    , {doNOP, 38 /* & */, 161,0,  TRUE}     //  146 
-    , {doNOP, 92 /* \ */, 186,0,  TRUE}     //  147 
-    , {doSetNoCloseError, 253, 197,0,  FALSE}     //  148 
-    , {doSetLiteral, 255, 136,0,  TRUE}     //  149 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  150      set-after-range
-    , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE}     //  151 
-    , {doNOP, 45 /* - */, 169,0,  TRUE}     //  152 
-    , {doNOP, 38 /* & */, 171,0,  TRUE}     //  153 
-    , {doNOP, 92 /* \ */, 186,0,  TRUE}     //  154 
-    , {doSetNoCloseError, 253, 197,0,  FALSE}     //  155 
-    , {doSetLiteral, 255, 136,0,  TRUE}     //  156 
-    , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE}     //  157      set-after-op
-    , {doSetOpError, 93 /* ] */, 197,0,  FALSE}     //  158 
-    , {doNOP, 92 /* \ */, 186,0,  TRUE}     //  159 
-    , {doSetLiteral, 255, 136,0,  TRUE}     //  160 
-    , {doSetBeginIntersection1, 91 /* [ */, 118, 143, TRUE}     //  161      set-set-amp
-    , {doSetIntersection2, 38 /* & */, 157,0,  TRUE}     //  162 
-    , {doSetAddAmp, 255, 136,0,  FALSE}     //  163 
-    , {doSetIntersection2, 38 /* & */, 157,0,  TRUE}     //  164      set-lit-amp
-    , {doSetAddAmp, 255, 136,0,  FALSE}     //  165 
-    , {doSetBeginDifference1, 91 /* [ */, 118, 143, TRUE}     //  166      set-set-dash
-    , {doSetDifference2, 45 /* - */, 157,0,  TRUE}     //  167 
-    , {doSetAddDash, 255, 136,0,  FALSE}     //  168 
-    , {doSetDifference2, 45 /* - */, 157,0,  TRUE}     //  169      set-range-dash
-    , {doSetAddDash, 255, 136,0,  FALSE}     //  170 
-    , {doSetIntersection2, 38 /* & */, 157,0,  TRUE}     //  171      set-range-amp
-    , {doSetAddAmp, 255, 136,0,  FALSE}     //  172 
-    , {doSetDifference2, 45 /* - */, 157,0,  TRUE}     //  173      set-lit-dash
-    , {doSetAddDash, 91 /* [ */, 136,0,  FALSE}     //  174 
-    , {doSetAddDash, 93 /* ] */, 136,0,  FALSE}     //  175 
-    , {doNOP, 92 /* \ */, 178,0,  TRUE}     //  176 
-    , {doSetRange, 255, 150,0,  TRUE}     //  177 
-    , {doSetOpError, 115 /* s */, 197,0,  FALSE}     //  178      set-lit-dash-escape
-    , {doSetOpError, 83 /* S */, 197,0,  FALSE}     //  179 
-    , {doSetOpError, 119 /* w */, 197,0,  FALSE}     //  180 
-    , {doSetOpError, 87 /* W */, 197,0,  FALSE}     //  181 
-    , {doSetOpError, 100 /* d */, 197,0,  FALSE}     //  182 
-    , {doSetOpError, 68 /* D */, 197,0,  FALSE}     //  183 
-    , {doSetNamedRange, 78 /* N */, 150,0,  FALSE}     //  184 
-    , {doSetRange, 255, 150,0,  TRUE}     //  185 
-    , {doSetProp, 112 /* p */, 143,0,  FALSE}     //  186      set-escape
-    , {doSetProp, 80 /* P */, 143,0,  FALSE}     //  187 
-    , {doSetNamedChar, 78 /* N */, 136,0,  FALSE}     //  188 
-    , {doSetBackslash_s, 115 /* s */, 150,0,  TRUE}     //  189 
-    , {doSetBackslash_S, 83 /* S */, 150,0,  TRUE}     //  190 
-    , {doSetBackslash_w, 119 /* w */, 150,0,  TRUE}     //  191 
-    , {doSetBackslash_W, 87 /* W */, 150,0,  TRUE}     //  192 
-    , {doSetBackslash_d, 100 /* d */, 150,0,  TRUE}     //  193 
-    , {doSetBackslash_D, 68 /* D */, 150,0,  TRUE}     //  194 
-    , {doSetLiteralEscaped, 255, 136,0,  TRUE}     //  195 
-    , {doSetFinish, 255, 14,0,  FALSE}     //  196      set-finish
-    , {doExit, 255, 197,0,  TRUE}     //  197      errorDeath
+    , {doBackslashh, 104 /* h */, 14,0,  TRUE}     //  95 
+    , {doBackslashH, 72 /* H */, 14,0,  TRUE}     //  96 
+    , {doNOP, 107 /* k */, 115,0,  TRUE}     //  97 
+    , {doNamedChar, 78 /* N */, 14,0,  FALSE}     //  98 
+    , {doProperty, 112 /* p */, 14,0,  FALSE}     //  99 
+    , {doProperty, 80 /* P */, 14,0,  FALSE}     //  100 
+    , {doBackslashR, 82 /* R */, 14,0,  TRUE}     //  101 
+    , {doEnterQuoteMode, 81 /* Q */, 2,0,  TRUE}     //  102 
+    , {doBackslashS, 83 /* S */, 14,0,  TRUE}     //  103 
+    , {doBackslashs, 115 /* s */, 14,0,  TRUE}     //  104 
+    , {doBackslashv, 118 /* v */, 14,0,  TRUE}     //  105 
+    , {doBackslashV, 86 /* V */, 14,0,  TRUE}     //  106 
+    , {doBackslashW, 87 /* W */, 14,0,  TRUE}     //  107 
+    , {doBackslashw, 119 /* w */, 14,0,  TRUE}     //  108 
+    , {doBackslashX, 88 /* X */, 14,0,  TRUE}     //  109 
+    , {doBackslashZ, 90 /* Z */, 2,0,  TRUE}     //  110 
+    , {doBackslashz, 122 /* z */, 2,0,  TRUE}     //  111 
+    , {doBackRef, 129, 14,0,  TRUE}     //  112 
+    , {doEscapeError, 253, 206,0,  FALSE}     //  113 
+    , {doEscapedLiteralChar, 255, 14,0,  TRUE}     //  114 
+    , {doBeginNamedBackRef, 60 /* < */, 117,0,  TRUE}     //  115      named-backref
+    , {doBadNamedCapture, 255, 206,0,  FALSE}     //  116 
+    , {doContinueNamedBackRef, 128, 119,0,  TRUE}     //  117      named-backref-2
+    , {doBadNamedCapture, 255, 206,0,  FALSE}     //  118 
+    , {doContinueNamedBackRef, 128, 119,0,  TRUE}     //  119      named-backref-3
+    , {doContinueNamedBackRef, 129, 119,0,  TRUE}     //  120 
+    , {doCompleteNamedBackRef, 62 /* > */, 14,0,  TRUE}     //  121 
+    , {doBadNamedCapture, 255, 206,0,  FALSE}     //  122 
+    , {doSetNegate, 94 /* ^ */, 126,0,  TRUE}     //  123      set-open
+    , {doSetPosixProp, 58 /* : */, 128,0,  FALSE}     //  124 
+    , {doNOP, 255, 126,0,  FALSE}     //  125 
+    , {doSetLiteral, 93 /* ] */, 141,0,  TRUE}     //  126      set-open2
+    , {doNOP, 255, 131,0,  FALSE}     //  127 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  128      set-posix
+    , {doNOP, 58 /* : */, 131,0,  FALSE}     //  129 
+    , {doRuleError, 255, 206,0,  FALSE}     //  130 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  131      set-start
+    , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE}     //  132 
+    , {doNOP, 92 /* \ */, 191,0,  TRUE}     //  133 
+    , {doNOP, 45 /* - */, 137,0,  TRUE}     //  134 
+    , {doNOP, 38 /* & */, 139,0,  TRUE}     //  135 
+    , {doSetLiteral, 255, 141,0,  TRUE}     //  136 
+    , {doRuleError, 45 /* - */, 206,0,  FALSE}     //  137      set-start-dash
+    , {doSetAddDash, 255, 141,0,  FALSE}     //  138 
+    , {doRuleError, 38 /* & */, 206,0,  FALSE}     //  139      set-start-amp
+    , {doSetAddAmp, 255, 141,0,  FALSE}     //  140 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  141      set-after-lit
+    , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE}     //  142 
+    , {doNOP, 45 /* - */, 178,0,  TRUE}     //  143 
+    , {doNOP, 38 /* & */, 169,0,  TRUE}     //  144 
+    , {doNOP, 92 /* \ */, 191,0,  TRUE}     //  145 
+    , {doSetNoCloseError, 253, 206,0,  FALSE}     //  146 
+    , {doSetLiteral, 255, 141,0,  TRUE}     //  147 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  148      set-after-set
+    , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE}     //  149 
+    , {doNOP, 45 /* - */, 171,0,  TRUE}     //  150 
+    , {doNOP, 38 /* & */, 166,0,  TRUE}     //  151 
+    , {doNOP, 92 /* \ */, 191,0,  TRUE}     //  152 
+    , {doSetNoCloseError, 253, 206,0,  FALSE}     //  153 
+    , {doSetLiteral, 255, 141,0,  TRUE}     //  154 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  155      set-after-range
+    , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE}     //  156 
+    , {doNOP, 45 /* - */, 174,0,  TRUE}     //  157 
+    , {doNOP, 38 /* & */, 176,0,  TRUE}     //  158 
+    , {doNOP, 92 /* \ */, 191,0,  TRUE}     //  159 
+    , {doSetNoCloseError, 253, 206,0,  FALSE}     //  160 
+    , {doSetLiteral, 255, 141,0,  TRUE}     //  161 
+    , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE}     //  162      set-after-op
+    , {doSetOpError, 93 /* ] */, 206,0,  FALSE}     //  163 
+    , {doNOP, 92 /* \ */, 191,0,  TRUE}     //  164 
+    , {doSetLiteral, 255, 141,0,  TRUE}     //  165 
+    , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE}     //  166      set-set-amp
+    , {doSetIntersection2, 38 /* & */, 162,0,  TRUE}     //  167 
+    , {doSetAddAmp, 255, 141,0,  FALSE}     //  168 
+    , {doSetIntersection2, 38 /* & */, 162,0,  TRUE}     //  169      set-lit-amp
+    , {doSetAddAmp, 255, 141,0,  FALSE}     //  170 
+    , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE}     //  171      set-set-dash
+    , {doSetDifference2, 45 /* - */, 162,0,  TRUE}     //  172 
+    , {doSetAddDash, 255, 141,0,  FALSE}     //  173 
+    , {doSetDifference2, 45 /* - */, 162,0,  TRUE}     //  174      set-range-dash
+    , {doSetAddDash, 255, 141,0,  FALSE}     //  175 
+    , {doSetIntersection2, 38 /* & */, 162,0,  TRUE}     //  176      set-range-amp
+    , {doSetAddAmp, 255, 141,0,  FALSE}     //  177 
+    , {doSetDifference2, 45 /* - */, 162,0,  TRUE}     //  178      set-lit-dash
+    , {doSetAddDash, 91 /* [ */, 141,0,  FALSE}     //  179 
+    , {doSetAddDash, 93 /* ] */, 141,0,  FALSE}     //  180 
+    , {doNOP, 92 /* \ */, 183,0,  TRUE}     //  181 
+    , {doSetRange, 255, 155,0,  TRUE}     //  182 
+    , {doSetOpError, 115 /* s */, 206,0,  FALSE}     //  183      set-lit-dash-escape
+    , {doSetOpError, 83 /* S */, 206,0,  FALSE}     //  184 
+    , {doSetOpError, 119 /* w */, 206,0,  FALSE}     //  185 
+    , {doSetOpError, 87 /* W */, 206,0,  FALSE}     //  186 
+    , {doSetOpError, 100 /* d */, 206,0,  FALSE}     //  187 
+    , {doSetOpError, 68 /* D */, 206,0,  FALSE}     //  188 
+    , {doSetNamedRange, 78 /* N */, 155,0,  FALSE}     //  189 
+    , {doSetRange, 255, 155,0,  TRUE}     //  190 
+    , {doSetProp, 112 /* p */, 148,0,  FALSE}     //  191      set-escape
+    , {doSetProp, 80 /* P */, 148,0,  FALSE}     //  192 
+    , {doSetNamedChar, 78 /* N */, 141,0,  FALSE}     //  193 
+    , {doSetBackslash_s, 115 /* s */, 155,0,  TRUE}     //  194 
+    , {doSetBackslash_S, 83 /* S */, 155,0,  TRUE}     //  195 
+    , {doSetBackslash_w, 119 /* w */, 155,0,  TRUE}     //  196 
+    , {doSetBackslash_W, 87 /* W */, 155,0,  TRUE}     //  197 
+    , {doSetBackslash_d, 100 /* d */, 155,0,  TRUE}     //  198 
+    , {doSetBackslash_D, 68 /* D */, 155,0,  TRUE}     //  199 
+    , {doSetBackslash_h, 104 /* h */, 155,0,  TRUE}     //  200 
+    , {doSetBackslash_H, 72 /* H */, 155,0,  TRUE}     //  201 
+    , {doSetBackslash_v, 118 /* v */, 155,0,  TRUE}     //  202 
+    , {doSetBackslash_V, 86 /* V */, 155,0,  TRUE}     //  203 
+    , {doSetLiteralEscaped, 255, 141,0,  TRUE}     //  204 
+    , {doSetFinish, 255, 14,0,  FALSE}     //  205      set-finish
+    , {doExit, 255, 206,0,  TRUE}     //  206      errorDeath
  };
 static const char * const RegexStateNames[] = {    0,
      "start",
@@ -444,6 +462,11 @@ static const char * const RegexStateNames[] = {    0,
     0,
     0,
     0,
+    0,
+    0,
+    0,
+    0,
+    0,
     0,
      "named-backref",
     0,
@@ -530,6 +553,10 @@ static const char * const RegexStateNames[] = {    0,
     0,
     0,
     0,
+    0,
+    0,
+    0,
+    0,
     0,
      "set-finish",
      "errorDeath",
diff --git a/icu4c/source/i18n/regexcst.pl b/icu4c/source/i18n/regexcst.pl
index f1dc06af7ec..d52093629c5 100755
--- a/icu4c/source/i18n/regexcst.pl
+++ b/icu4c/source/i18n/regexcst.pl
@@ -1,7 +1,7 @@
 #!/usr/bin/perl
 #  ********************************************************************
 #  * COPYRIGHT:
-#  * Copyright (c) 2002-2007, International Business Machines Corporation and
+#  * Copyright (c) 2002-2015, International Business Machines Corporation and
 #  * others. All Rights Reserved.
 #  ********************************************************************
 #
@@ -206,7 +206,7 @@ print "//    This file contains the state table for the ICU Regular Expression P
 print "//    It is generated by the Perl script \"regexcst.pl\" from\n";
 print "//    the rule parser state definitions file \"regexcst.txt\".\n";
 print "//\n";
-print "//   Copyright (C) 2002-2007 International Business Machines Corporation \n";
+print "//   Copyright (C) 2002-2015 International Business Machines Corporation \n";
 print "//   and others. All rights reserved.  \n";
 print "//\n";
 print "//---------------------------------------------------------------------------------\n";
diff --git a/icu4c/source/i18n/regexcst.txt b/icu4c/source/i18n/regexcst.txt
index fe9bc6e74cb..1b88e446540 100644
--- a/icu4c/source/i18n/regexcst.txt
+++ b/icu4c/source/i18n/regexcst.txt
@@ -250,13 +250,18 @@ backslash:
    'd'                   n  expr-quant                              doBackslashd
    'D'                   n  expr-quant                              doBackslashD
    'G'                   n  term                                    doBackslashG
+   'h'                   n  expr-quant                              doBackslashh
+   'H'                   n  expr-quant                              doBackslashH
    'k'                   n  named-backref
    'N'                      expr-quant                              doNamedChar      #   \N{NAME}  named char
    'p'                      expr-quant                              doProperty       #   \p{Lu}  style property
    'P'                      expr-quant                              doProperty
+   'R'                   n  expr-quant                              doBackslashR
    'Q'                   n  term                                    doEnterQuoteMode
    'S'                   n  expr-quant                              doBackslashS
    's'                   n  expr-quant                              doBackslashs
+   'v'                   n  expr-quant                              doBackslashv
+   'V'                   n  expr-quant                              doBackslashV
    'W'                   n  expr-quant                              doBackslashW
    'w'                   n  expr-quant                              doBackslashw
    'X'                   n  expr-quant                              doBackslashX
@@ -472,6 +477,10 @@ set-escape:
    'W'                   n  set-after-range                         doSetBackslash_W
    'd'                   n  set-after-range                         doSetBackslash_d
    'D'                   n  set-after-range                         doSetBackslash_D
+   'h'                   n  set-after-range                         doSetBackslash_h
+   'H'                   n  set-after-range                         doSetBackslash_H
+   'v'                   n  set-after-range                         doSetBackslash_v
+   'V'                   n  set-after-range                         doSetBackslash_V
    default               n  set-after-lit                           doSetLiteralEscaped 
 
 #
diff --git a/icu4c/source/i18n/regeximp.h b/icu4c/source/i18n/regeximp.h
index fdd9c76e6f4..52ea662633e 100644
--- a/icu4c/source/i18n/regeximp.h
+++ b/icu4c/source/i18n/regeximp.h
@@ -1,5 +1,5 @@
 //
-//   Copyright (C) 2002-2014 International Business Machines Corporation
+//   Copyright (C) 2002-2015 International Business Machines Corporation
 //   and others. All rights reserved.
 //
 //   file:  regeximp.h
@@ -173,7 +173,10 @@ enum {
      URX_BACKSLASH_BU  = 53,   // \b or \B in UREGEX_UWORD mode, using Unicode style
                                //   word boundaries.
      URX_DOLLAR_D      = 54,   // $ end of input test, in UNIX_LINES mode.
-     URX_DOLLAR_MD     = 55    // $ end of input test, in MULTI_LINE and UNIX_LINES mode.
+     URX_DOLLAR_MD     = 55,   // $ end of input test, in MULTI_LINE and UNIX_LINES mode.
+     URX_BACKSLASH_H   = 56,   // Value field:  0:  \h    1:  \H
+     URX_BACKSLASH_R   = 57,   // Any line break sequence.
+     URX_BACKSLASH_V   = 58    // Value field:  0:  \v    1:  \V
 
 };
 
@@ -235,7 +238,10 @@ enum {
         "LOOP_DOT_I",          \
         "BACKSLASH_BU",        \
         "DOLLAR_D",            \
-        "DOLLAR_MD"
+        "DOLLAR_MD",           \
+        "URX_BACKSLASH_H",     \
+        "URX_BACKSLASH_R",     \
+        "URX_BACKSLASH_V" 
 
 
 //
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp
index 41330332a50..341d29f13a8 100644
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -49,6 +49,15 @@ static const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000;
 //   This constant determines that state saves per tick number.
 static const int32_t TIMER_INITIAL_VALUE = 10000;
 
+
+// Test for any of the Unicode line terminating characters.
+static inline UBool isLineTerminator(UChar32 c) {
+    if (c & ~(0x0a | 0x0b | 0x0c | 0x0d | 0x85 | 0x2028 | 0x2029)) {
+        return false;
+    }
+    return (c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029;
+}
+
 //-----------------------------------------------------------------------------
 //
 //   Constructor and Destructor
@@ -837,20 +846,19 @@ UBool RegexMatcher::find(UErrorCode &status) {
                 }
             } else {
                 for (;;) {
-                    if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
-                        ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
-                            if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
-                                (void)UTEXT_NEXT32(fInputText);
-                                startPos = UTEXT_GETNATIVEINDEX(fInputText);
-                            }
-                            MatchAt(startPos, FALSE, status);
-                            if (U_FAILURE(status)) {
-                                return FALSE;
-                            }
-                            if (fMatch) {
-                                return TRUE;
-                            }
-                            UTEXT_SETNATIVEINDEX(fInputText, startPos);
+                    if (isLineTerminator(c)) {
+                        if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
+                            (void)UTEXT_NEXT32(fInputText);
+                            startPos = UTEXT_GETNATIVEINDEX(fInputText);
+                        }
+                        MatchAt(startPos, FALSE, status);
+                        if (U_FAILURE(status)) {
+                            return FALSE;
+                        }
+                        if (fMatch) {
+                            return TRUE;
+                        }
+                        UTEXT_SETNATIVEINDEX(fInputText, startPos);
                     }
                     if (startPos >= testStartLimit) {
                         fMatch = FALSE;
@@ -1098,8 +1106,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) {
         } else {
             for (;;) {
                 c = inputBuf[startPos-1];
-                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
-                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
+                if (isLineTerminator(c)) {
                     if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
                         startPos++;
                     }
@@ -2927,9 +2934,9 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
                 //   end of input, succeed.
                 UChar32 c = UTEXT_NEXT32(fInputText);
                 if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) {
-                    if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) {
+                    if (isLineTerminator(c)) {
                         // If not in the middle of a CR/LF sequence
-                      if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) {
+                        if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) {
                             // At new-line at end of input. Success
                             fHitEnd = TRUE;
                             fRequireEnd = TRUE;
@@ -2985,7 +2992,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
                  // It makes no difference where the new-line is within the input.
                  UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
                  UChar32 c = UTEXT_CURRENT32(fInputText);
-                 if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) {
+                 if (isLineTerminator(c)) {
                      // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
                      //  In multi-line mode, hitting a new-line just before the end of input does not
                      //   set the hitEnd or requireEnd flags
@@ -3034,8 +3041,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
                //   unless we are at the end of input
                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
                UChar32  c = UTEXT_PREVIOUS32(fInputText);
-               if ((fp->fInputIdx < fAnchorLimit) &&
-                   ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
+               if ((fp->fInputIdx < fAnchorLimit) && isLineTerminator(c)) {
                    //  It's a new-line.  ^ is true.  Success.
                    //  TODO:  what should be done with positions between a CR and LF?
                    break;
@@ -3116,6 +3122,68 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
             break;
 
 
+        case URX_BACKSLASH_H:            // Test for \h, horizontal white space.
+            {
+                if (fp->fInputIdx >= fActiveLimit) {
+                    fHitEnd = TRUE;
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                    break;
+                }
+                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
+                UChar32 c = UTEXT_NEXT32(fInputText);
+                int8_t ctype = u_charType(c);
+                UBool success = (ctype == U_SPACE_SEPARATOR || c == 9);  // SPACE_SEPARATOR || TAB
+                success ^= (UBool)(opValue != 0);        // flip sense for \H
+                if (success) {
+                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
+                } else {
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                }
+            }
+            break;
+
+
+        case URX_BACKSLASH_R:            // Test for \R, any line break sequence.
+            {
+                if (fp->fInputIdx >= fActiveLimit) {
+                    fHitEnd = TRUE;
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                    break;
+                }
+                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
+                UChar32 c = UTEXT_NEXT32(fInputText);
+                if (isLineTerminator(c)) {
+                    if (c == 0x0d && utext_current32(fInputText) == 0x0a) {
+                        utext_next32(fInputText);
+                    }
+                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
+                } else {
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                }
+            }
+            break;
+
+
+        case URX_BACKSLASH_V:            // \v, any single line ending character.
+            {
+                if (fp->fInputIdx >= fActiveLimit) {
+                    fHitEnd = TRUE;
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                    break;
+                }
+                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
+                UChar32 c = UTEXT_NEXT32(fInputText);
+                UBool success = isLineTerminator(c);
+                success ^= (UBool)(opValue != 0);        // flip sense for \V
+                if (success) {
+                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
+                } else {
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                }
+            }
+            break;
+
+
         case URX_BACKSLASH_X:
             //  Match a Grapheme, as defined by Unicode TR 29.
             //  Differs slightly from Perl, which consumes combining marks independently
@@ -3343,8 +3411,7 @@ GC_Done:
 
                 // There is input left.  Advance over one char, unless we've hit end-of-line
                 UChar32 c = UTEXT_NEXT32(fInputText);
-                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
-                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
+                if (isLineTerminator(c)) {
                     // End of line in normal mode.   . does not match.
                         fp = (REStackFrame *)fStack->popFrame(fFrameSize);
                     break;
@@ -4101,7 +4168,7 @@ GC_Done:
                         if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
                             if ((c == 0x0a) ||             //  0x0a is newline in both modes.
                                (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
-                                    (c<=0x0d && c>=0x0a)) || c==0x85 ||c==0x2028 || c==0x2029) {
+                                    isLineTerminator(c))) {
                                 //  char is a line ending.  Exit the scanning loop.
                                 break;
                             }
@@ -4432,7 +4499,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
                 UChar32 c;
                 U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c);
 
-                if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) {
+                if (isLineTerminator(c)) {
                     if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
                         // At new-line at end of input. Success
                         fHitEnd = TRUE;
@@ -4486,7 +4553,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
                 // If we are positioned just before a new-line, succeed.
                 // It makes no difference where the new-line is within the input.
                 UChar32 c = inputBuf[fp->fInputIdx];
-                if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) {
+                if (isLineTerminator(c)) {
                     // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
                     //  In multi-line mode, hitting a new-line just before the end of input does not
                     //   set the hitEnd or requireEnd flags
@@ -4534,7 +4601,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
                 //   unless we are at the end of input
                 UChar  c = inputBuf[fp->fInputIdx - 1];
                 if ((fp->fInputIdx < fAnchorLimit) &&
-                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
+                    isLineTerminator(c)) {
                     //  It's a new-line.  ^ is true.  Success.
                     //  TODO:  what should be done with positions between a CR and LF?
                     break;
@@ -4611,6 +4678,69 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
             break;
 
 
+        case URX_BACKSLASH_H:            // Test for \h, horizontal white space.
+            {
+                if (fp->fInputIdx >= fActiveLimit) {
+                    fHitEnd = TRUE;
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                    break;
+                }
+                UChar32 c;
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+                int8_t ctype = u_charType(c);
+                UBool success = (ctype == U_SPACE_SEPARATOR || c == 9);  // SPACE_SEPARATOR || TAB
+                success ^= (UBool)(opValue != 0);        // flip sense for \H
+                if (!success) {
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                }
+            }
+            break;
+
+
+        case URX_BACKSLASH_R:            // Test for \R, any line break sequence.
+            {
+                if (fp->fInputIdx >= fActiveLimit) {
+                    fHitEnd = TRUE;
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                    break;
+                }
+                UChar32 c;
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+                if (isLineTerminator(c)) {
+                    if (c == 0x0d && fp->fInputIdx < fActiveLimit) {
+                        // Check for CR/LF sequence. Consume both together when found.
+                        UChar c2;
+                        U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c2);
+                        if (c2 != 0x0a) {
+                            U16_PREV(inputBuf, 0, fp->fInputIdx, c2);
+                        }
+                    }
+                } else {
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                }
+            }
+            break;
+
+
+        case URX_BACKSLASH_V:         // Any single code point line ending.
+            {
+                if (fp->fInputIdx >= fActiveLimit) {
+                    fHitEnd = TRUE;
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                    break;
+                }
+                UChar32 c;
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+                UBool success = isLineTerminator(c);
+                success ^= (UBool)(opValue != 0);        // flip sense for \V
+                if (!success) {
+                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
+                }
+            }
+            break;
+
+
+
         case URX_BACKSLASH_X:
         //  Match a Grapheme, as defined by Unicode TR 29.
         //  Differs slightly from Perl, which consumes combining marks independently
@@ -4820,8 +4950,7 @@ GC_Done:
                 // There is input left.  Advance over one char, unless we've hit end-of-line
                 UChar32  c;
                 U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
-                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
-                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
+                if (isLineTerminator(c)) {
                     // End of line in normal mode.   . does not match.
                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);
                     break;
@@ -5535,7 +5664,7 @@ GC_Done:
                         if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
                             if ((c == 0x0a) ||             //  0x0a is newline in both modes.
                                 (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
-                                   ((c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029))) {
+                                   isLineTerminator(c))) {
                                 //  char is a line ending.  Put the input pos back to the
                                 //    line ending char, and exit the scanning loop.
                                 U16_BACK_1(inputBuf, 0, ix);
diff --git a/icu4c/source/i18n/repattrn.cpp b/icu4c/source/i18n/repattrn.cpp
index 14454e25f8f..58650d11374 100644
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@@ -742,6 +742,9 @@ void   RegexPattern::dumpOp(int32_t index) const {
     case URX_LBN_END:
     case URX_LOOP_C:
     case URX_LOOP_DOT_I:
+    case URX_BACKSLASH_H:
+    case URX_BACKSLASH_R:
+    case URX_BACKSLASH_V:
         // types with an integer operand field.
         printf("%d", val);
         break;
diff --git a/icu4c/source/test/testdata/regextst.txt b/icu4c/source/test/testdata/regextst.txt
index e0f8b27d758..15d13bf156e 100644
--- a/icu4c/source/test/testdata/regextst.txt
+++ b/icu4c/source/test/testdata/regextst.txt
@@ -693,6 +693,78 @@
 "abc\jkl"                         "<0>abcjkl</0>"    # escape of a non-special letter is just itself.
 "abc[ \j]kl"                      "<0>abcjkl</0>"
 
+#
+# \R  all newline sequences.
+#
+"abc\Rxyz"                        "<0>abc\u000axyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000bxyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000cxyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000dxyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u0085xyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u2028xyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u2029xyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000d\u000axyz</0>gh"
+
+"abc\R\nxyz"                      "abc\u000d\u000axyzgh"          # \R cannot match only the CR from a CR/LF sequence.
+"abc\r\nxyz"                      "<0>abc\u000d\u000axyz</0>gh"
+
+"abc\Rxyz"                        "abc\u0009xyz"                  # Assorted non-matches.
+"abc\Rxyz"                        "abc\u000exyz"
+"abc\Rxyz"                        "abc\u202axyz"
+
+# \v \V single character new line sequences.
+
+"abc\vxyz"                        "<0>abc\u000axyz</0>gh"
+"abc\vxyz"                        "<0>abc\u000bxyz</0>gh"
+"abc\vxyz"                        "<0>abc\u000cxyz</0>gh"
+"abc\vxyz"                        "<0>abc\u000dxyz</0>gh"
+"abc\vxyz"                        "<0>abc\u0085xyz</0>gh"
+"abc\vxyz"                        "<0>abc\u2028xyz</0>gh"
+"abc\vxyz"                        "<0>abc\u2029xyz</0>gh"
+"abc\vxyz"                        "abc\u000d\u000axyzgh"
+"abc\vxyz"                        "abc?xyzgh"
+
+"abc[\v]xyz"                      "<0>abc\u000axyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u000bxyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u000cxyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u000dxyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u0085xyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u2028xyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u2029xyz</0>gh"
+"abc[\v]xyz"                      "abc\u000d\u000axyzgh"
+"abc[\v]xyz"                      "abc?xyzgh"
+
+"abc\Vxyz"                        "abc\u000axyzgh"
+"abc\Vxyz"                        "abc\u000bxyzgh"
+"abc\Vxyz"                        "abc\u000cxyzgh"
+"abc\Vxyz"                        "abc\u000dxyzgh"
+"abc\Vxyz"                        "abc\u0085xyzgh"
+"abc\Vxyz"                        "abc\u2028xyzgh"
+"abc\Vxyz"                        "abc\u2029xyzgh"
+"abc\Vxyz"                        "abc\u000d\u000axyzgh"
+"abc\Vxyz"                        "<0>abc?xyz</0>gh"
+
+# \h \H horizontal white space. Defined as gc=space_separator plus ascii tab
+
+"abc\hxyz"                        "<0>abc xyz</0>gh"
+"abc\Hxyz"                        "abc xyzgh"
+"abc\hxyz"                        "<0>abc\u2003xyz</0>gh"
+"abc\Hxyz"                        "abc\u2003xyzgh"
+"abc\hxyz"                        "<0>abc\u0009xyz</0>gh"
+"abc\Hxyz"                        "abc\u0009xyzgh"
+"abc\hxyz"                        "abc?xyzgh"
+"abc\Hxyz"                        "<0>abc?xyz</0>gh"
+
+"abc[\h]xyz"                      "<0>abc xyz</0>gh"
+"abc[\H]xyz"                      "abc xyzgh"
+"abc[\h]xyz"                      "<0>abc\u2003xyz</0>gh"
+"abc[\H]xyz"                      "abc\u2003xyzgh"
+"abc[\h]xyz"                      "<0>abc\u0009xyz</0>gh"
+"abc[\H]xyz"                      "abc\u0009xyzgh"
+"abc[\h]xyz"                      "abc?xyzgh"
+"abc[\H]xyz"                      "<0>abc?xyz</0>gh"
+
+
 #
 # Bug xxxx
 #