diff --git a/icu4c/source/data/brkitr/char.txt b/icu4c/source/data/brkitr/char.txt
index 04272b32768..e48b99f1d35 100644
--- a/icu4c/source/data/brkitr/char.txt
+++ b/icu4c/source/data/brkitr/char.txt
@@ -15,9 +15,9 @@
 #
 $CR = \r;
 $LF = \n;
-$Control    = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
+$Control    = [[:Zl:] [:Zp:] [:Cc:] [:Cf:] - [:Grapheme_Extend = TRUE:]];
 
-$Extend     = [[:Grapheme_Extend = TRUE:] - [$Control]];
+$Extend     = [[:Grapheme_Extend = TRUE:]];
 
 #
 # Korean Syllable Definitions
diff --git a/icu4c/source/data/brkitr/sent.txt b/icu4c/source/data/brkitr/sent.txt
index 0656be96c49..7ae215caaa6 100644
--- a/icu4c/source/data/brkitr/sent.txt
+++ b/icu4c/source/data/brkitr/sent.txt
@@ -14,7 +14,7 @@
 # Character categories as defined in TR 29
 #
 $Sep     = [\u000a \u000d \u0085 \u2028 \u2029];
-$Format  = [[:Format:]];
+$Format  = [[:Format:] - [:Grapheme_Extend:]];
 $Sp      = [[:Whitespace:] - $Sep];
 $Lower   = [[:Lowercase:]];
 $Upper   = [[:TitleCase_Letter:] [:Uppercase:]];
diff --git a/icu4c/source/data/brkitr/word.txt b/icu4c/source/data/brkitr/word.txt
index aaea0506db0..21de6ed71fa 100644
--- a/icu4c/source/data/brkitr/word.txt
+++ b/icu4c/source/data/brkitr/word.txt
@@ -54,8 +54,8 @@ $Numeric   = [:LineBreak = Numeric:];
 $CR      = \u000d;
 $LF      = \u000a;
 $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
-$Extend  = [[:Grapheme_Extend = TRUE:] - $Control];
-$Format  = [[:Cf:]];
+$Extend  = [[:Grapheme_Extend = TRUE:]];
+$Format  = [[:Cf:] - $Extend];
 $Hiragana = [:Hiragana:];
 $Ideographic = [:IDEOGRAPHIC:];
 
diff --git a/icu4c/source/i18n/regexst.cpp b/icu4c/source/i18n/regexst.cpp
index 4a14f975517..1aa55126ec4 100644
--- a/icu4c/source/i18n/regexst.cpp
+++ b/icu4c/source/i18n/regexst.cpp
@@ -108,14 +108,18 @@ static const UChar gIsWordPattern[] = {
     static const UChar gGC_ControlPattern[] = {
 //    [     [     :     Z     l     :     ]     [     :     Z     p     :     ]    
     0x5b, 0x5b, 0x3a, 0x5A, 0x6c, 0x3a, 0x5d, 0x5b, 0x3a, 0x5A, 0x70, 0x3a, 0x5d, 
-//    [     :     C     c     :     ]     [     :     C     f     :     ]     ] 
-    0x5b, 0x3a, 0x43, 0x63, 0x3a, 0x5d, 0x5b, 0x3a, 0x43, 0x66, 0x3a, 0x5d, 0x5d, 0};
+//    [     :     C     c     :     ]     [     :     C     f     :     ]     -
+    0x5b, 0x3a, 0x43, 0x63, 0x3a, 0x5d, 0x5b, 0x3a, 0x43, 0x66, 0x3a, 0x5d, 0x2d,
+//    [     :     G     r     a     p     h     e     m     e     _
+    0x5b, 0x3a, 0x47, 0x72, 0x61, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x5f,
+//	  E     x     t     e     n     d     :     ]     ]
+	0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x3a, 0x5d, 0x5d, 0};
 
     static const UChar gGC_ExtendPattern[] = {
 //    [     \     p     {     G     r     a     p     h     e     m     e     _
     0x5b, 0x5c, 0x70, 0x7b, 0x47, 0x72, 0x61, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x5f,
-//    E     x     t     e     n     d     }     -     \     p     {     C     f     }    ]
-    0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x7d, 0x2d, 0x5c, 0x70, 0x7b, 0x43, 0x66, 0x7d, 0x5d, 0};
+//    E     x     t     e     n     d     }     ]
+    0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x7d, 0x5d, 0};
 
     static const UChar gGC_LPattern[] = {
 //    [     \     p     {     H     a     n     g     u     l     _     S     y     l    
diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp
index a39556e80fd..811a1e308d3 100644
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@@ -2091,7 +2091,7 @@ RBBICharMonkey::RBBICharMonkey() {
     fMatcher = new RegexMatcher("\\X", 0, status);     // Pattern to match a grampheme cluster
 
     fCRLFSet    = new UnicodeSet("[\\r\\n]", status);
-    fControlSet = new UnicodeSet("[[\\p{Zl}\\p{Zp}\\p{Cc}\\p{Cf}]-[\\n]-[\\r]]", status);
+    fControlSet = new UnicodeSet("[[\\p{Zl}\\p{Zp}\\p{Cc}\\p{Cf}]-[\\n]-[\\r]-\\p{Grapheme_Extend}]", status);
     fExtendSet  = new UnicodeSet("[\\p{Grapheme_Extend}]", status);
     fHangulSet  = new UnicodeSet(
         "[\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=T}"
@@ -2200,7 +2200,7 @@ RBBIWordMonkey::RBBIWordMonkey() : fGCFMatcher(0),
     fMidNumLetSet  = new UnicodeSet("[\\u002e\\u003a]", status);
     fMidNumSet     = new UnicodeSet("[\\p{Line_Break=Infix_Numeric}]", status);
     fNumericSet    = new UnicodeSet("[\\p{Line_Break=Numeric}]", status);
-    fFormatSet     = new UnicodeSet("[\\p{Format}]", status);
+    fFormatSet     = new UnicodeSet("[\\p{Format}-\\p{Grapheme_Extend}]", status);
     fExtendSet     = new UnicodeSet("[\\p{Grapheme_Extend}]", status);
     fOtherSet      = new UnicodeSet();
     if(U_FAILURE(status)) {