ICU-5359 rbbi, port of C changes to Java, in progress.

X-SVN-Rev: 20230
2025-04-07 22:44:49 +00:00 · 2006-09-01 22:14:57 +00:00 · 2006-09-01 22:14:57 +00:00 · d7167854cb
commit d7167854cb
parent 16c599e73d
1 changed files with 180 additions and 132 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
+++ b/icu4j/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
@ -135,14 +135,17 @@ public class RBBITestMonkey extends TestFmwk {
        RBBIWordMonkey() {
            fSets          = new ArrayList();

-            fALetterSet      = new UnicodeSet("[\\p{Word_Break = ALetter}]");
-            fKatakanaSet     = new UnicodeSet("[\\p{Word_Break = Katakana}]");
+            fALetterSet      = new UnicodeSet("[\\p{Word_Break = ALetter}" +
+                                               "[\\p{Line_Break = Complex_Context}"  +
+                                                 "-\\p{Grapheme_Cluster_Break = Extend}" +
+                                                 "-\\p{Grapheme_Cluster_Break = Control}]]");
+            fKatakanaSet     = new UnicodeSet("[\\p{Word_Break = Katakana}-[\\uff9e\\uff9f]]");
            fMidLetterSet    = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
            fMidNumSet       = new UnicodeSet("[\\p{Word_Break = MidNum}]");
            fNumericSet      = new UnicodeSet("[\\p{Word_Break = Numeric}]");
            fFormatSet       = new UnicodeSet("[\\p{Word_Break = Format}]");
            fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
-            fExtendSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]");
+            fExtendSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}\\uff9e\\uff9f]");
            fOtherSet        = new UnicodeSet();

            fOtherSet.complement();
@ -200,21 +203,16 @@ public class RBBITestMonkey extends TestFmwk {
                p1 = p2;  c1 = c2;
                p2 = p3;  c2 = c3;
                
-                // Advancd p3 by    (GC Format*)   Rules 3, 4
-                p3 = nextGC(fText, p3);
-                if (p3 == -1 || p3 >= fText.length()) {
-                    p3 = fText.length();
-                    c3 = 0;
-                } else {
-                    c3 = UTF16.charAt(fText, p3);
-                    while (fFormatSet.contains(c3)) {
-                        p3 = moveIndex32(fText, p3, 1);
-                        c3 = 0;
-                        if (p3 < fText.length()) {
-                            c3 = UTF16.charAt(fText, p3);   
-                        }
+                // Advancd p3 by    X(Extend | Format)*   Rule 4
+                do {
+                    p3 = moveIndex32(fText, p3, 1);
+                    c3 = -1;
+                    if (p3>=fText.length()) {
+                        break;
                    }
-                }
+                    c3 = UTF16.charAt(fText, p3);
+                 }
+                while (fFormatSet.contains(c3) || fExtendSet.contains(c3));

                if (p1 == p2) {
                    // Still warming up the loop.  (won't work with zero length strings, but we don't care)
@ -225,6 +223,14 @@ public class RBBITestMonkey extends TestFmwk {
                    break;
                }

+                // Rule (3)   CR x LF
+                //     No Extend or Format characters may appear between the CR and LF,
+                //     which requires the additional check for p2 immediately following p1.
+                //
+                if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
+                    continue;
+                }
+                
                // Rule (5).   ALetter x ALetter
                if (fALetterSet.contains(c1) &&
                        fALetterSet.contains(c2))  {
@ -417,9 +423,14 @@ public class RBBITestMonkey extends TestFmwk {
            fSets.add(fBA);
            fSets.add(fBB);
            fSets.add(fHY);
+            fSets.add(fH2);
+            fSets.add(fH3);
            fSets.add(fCL);
            fSets.add(fEX);
            fSets.add(fIN);
+            fSets.add(fJL);
+            fSets.add(fJT);
+            fSets.add(fJV);
            fSets.add(fNS);
            fSets.add(fOP);
            fSets.add(fQU);
@ -433,6 +444,7 @@ public class RBBITestMonkey extends TestFmwk {
            fSets.add(fID);
            fSets.add(fWJ);
            fSets.add(fSA);
+            fSets.add(fSG);
            
        }
        
@ -489,14 +501,14 @@ public class RBBITestMonkey extends TestFmwk {
                    break;
                }
                
-                // Rule LB 7 - adjust for combining sequences.
+                // Rule LB 9 - adjust for combining sequences.
                //             We do this rule out-of-order because the adjustment does
                //             not effect the way that rules LB 3 through LB 6 match,
                //             and doing it here rather than after LB 6 is substantially
                //             simpler when combining sequences do occur.
                
                
-                // LB 7b        Keep combining sequences together.
+                // LB 9         Keep combining sequences together.
                //              advance over any CM class chars at "pos", 
                //              result is "nextPos" for the following loop iteration.
                thisChar  = UTF16.charAt(fText, pos);
@ -514,7 +526,10 @@ public class RBBITestMonkey extends TestFmwk {
                    }
                }
                
-                // LB 7c     Treat unattached combining chars as AL
+                // LB 9 Treat X CM* as if it were X
+                //        No explicit action required.
+                
+                // LB 10     Treat any remaining combining mark as AL
                if (fCM.contains(thisChar)) {
                    thisChar = 'A';   
                }
@ -527,12 +542,12 @@ public class RBBITestMonkey extends TestFmwk {
                    continue;
                }
                
-                // LB 3a  Always break after hard line breaks,
+                // LB 4  Always break after hard line breaks,
                if (fBK.contains(prevChar)) {
                    break;
                }
                
-                // LB 3b  Break after CR, LF, NL, but not inside CR LF
+                // LB 5  Break after CR, LF, NL, but not inside CR LF
                if (fCR.contains(prevChar) && fLF.contains(thisChar)) {
                    continue;
                }
@ -542,14 +557,14 @@ public class RBBITestMonkey extends TestFmwk {
                    break;
                }
                
-                // LB 3c  Don't break before hard line breaks
+                // LB 6  Don't break before hard line breaks
                if (fBK.contains(thisChar) || fCR.contains(thisChar) ||
                        fLF.contains(thisChar) || fNL.contains(thisChar) ) {
                    continue;
                }
                
                
-                // LB 4  Don't break before spaces or zero-width space.
+                // LB 7  Don't break before spaces or zero-width space.
                if (fSP.contains(thisChar)) {
                    continue;
                }
@ -558,16 +573,33 @@ public class RBBITestMonkey extends TestFmwk {
                    continue;
                }
                
-                // LB 5  Break after zero width space
+                // LB 8  Break after zero width space
                if (fZW.contains(prevChar)) {
                    break;
                }
                
-                //  LB 7  Already done, at top of loop.
+                //  LB 9, 10  Already done, at top of loop.
                //
                
                
-                // LB 8  Don't break before closings.
+                // LB 11
+                //    x  WJ
+                //    WJ  x
+                if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) {
+                    continue;
+                }
+                
+                
+                // LB 12
+                //        (!SP) x GL
+                //        GL x
+                if ((!fSP.contains(prevChar)) && fGL.contains(thisChar) || 
+                        fGL.contains(prevChar)) {
+                    continue;
+                }
+                
+                
+                // LB 13  Don't break before closings.
                //       NU x CL  and NU x IS are not matched here so that they will
                //       fall into LB 17 and the more general number regular expression.
                //
@ -578,13 +610,15 @@ public class RBBITestMonkey extends TestFmwk {
                    continue;
                }
                
-                // LB 9  Don't break after OP SP*
+                // LB 14  Don't break after OP SP*
                //       Scan backwards, checking for this sequence.
                //       The OP char could include combining marks, so we acually check for
                //           OP CM* SP* x
                tPos = prevPos;
-                while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
-                    tPos=moveIndex32(fText, tPos, -1);
+                if (fSP.contains(prevChar)) {
+                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
+                        tPos=moveIndex32(fText, tPos, -1);
+                    }
                }
                while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
                    tPos=moveIndex32(fText, tPos, -1);
@ -593,9 +627,10 @@ public class RBBITestMonkey extends TestFmwk {
                    continue;
                }
                
-                // LB 10 Do not break withing "[ 
+                // LB 15 Do not break withing "[ 
                //       QU CM* SP* x OP
                if (fOP.contains(thisChar)) {
+                    // Scan backwards from prevChar to see if it is preceded by QU CM* SP*
                    tPos = prevPos;
                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
                        tPos = moveIndex32(fText, tPos, -1);
@ -608,7 +643,7 @@ public class RBBITestMonkey extends TestFmwk {
                    }
                }               
                
-                // LB 11   CL SP* x NS
+                // LB 16   CL SP* x NS
                if (fNS.contains(thisChar)) {
                    tPos = prevPos;
                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
@ -623,7 +658,7 @@ public class RBBITestMonkey extends TestFmwk {
                }               
                
                               
-                // LB 11a        B2 SP* x B2
+                // LB 17        B2 SP* x B2
                if (fB2.contains(thisChar)) {
                    tPos = prevPos;
                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
@ -637,39 +672,24 @@ public class RBBITestMonkey extends TestFmwk {
                    }
                }               
                
-                // LB 11b
-                //    x  WJ
-                //    WJ  x
-                if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) {
-                    continue;
-                }
-                
-                // LB 12    break after space
+                // LB 18    break after space
                if (fSP.contains(prevChar)) {
                    break;
                }
                
-                // LB 13
-                //        x GL
-                //        GL x
-                if (fGL.contains(thisChar) || fGL.contains(prevChar)) {
-                    continue;
-                }
-                
-                
-                // LB 14
+                // LB 19
                //    x   QU
                //    QU  x
                if (fQU.contains(thisChar) || fQU.contains(prevChar)) {
                    continue;
                }
                
-                // LB 14a  Break around a CB
+                // LB 20  Break around a CB
                if (fCB.contains(thisChar) || fCB.contains(prevChar)) {
                    break;
                }
                
-                // LB 15
+                // LB 21
                if (fBA.contains(thisChar) ||
                        fHY.contains(thisChar) ||
                        fNS.contains(thisChar) ||
@ -677,7 +697,7 @@ public class RBBITestMonkey extends TestFmwk {
                    continue;
                }
                
-                // LB 16
+                // LB 22
                if (fAL.contains(prevChar) && fIN.contains(thisChar) ||
                        fID.contains(prevChar) && fIN.contains(thisChar) ||
                        fIN.contains(prevChar) && fIN.contains(thisChar) ||
@ -686,7 +706,7 @@ public class RBBITestMonkey extends TestFmwk {
                }
                
                
-                // LB 17    ID x PO    (Note:  Leading CM behaves like ID)
+                // LB 23    ID x PO    (Note:  Leading CM behaves like ID)
                //          AL x NU
                //          NU x AL
                if (fID.contains(prevChar) && fPO.contains(thisChar) ||
@ -695,7 +715,18 @@ public class RBBITestMonkey extends TestFmwk {
                    continue;
                }
                
-                // LB 18    Numbers
+                // LB 24  Do not break between prefix and letters or ideographs.
+                //        PR x ID
+                //        PR x AL
+                //        PO x AL
+                if (fPR.contains(prevChar) && fID.contains(thisChar) ||
+                    fPR.contains(prevChar) && fAL.contains(thisChar) ||
+                    fPO.contains(prevChar) && fAL.contains(thisChar))  {
+                    continue;
+                }
+                
+                
+                // LB 25    Numbers
                matchVals = LBNumberCheck(fText, prevPos, matchVals);
                if (matchVals[0] != -1) {
                    // Matched a number.  But could have been just a single digit, which would
@ -718,15 +749,9 @@ public class RBBITestMonkey extends TestFmwk {
                        continue;
                    }
                }
-                if (fPR.contains(prevChar) && fAL.contains(thisChar)) {
-                    continue;   
-                }
-                if (fPR.contains(prevChar) && fID.contains(thisChar)) {
-                    continue;
-                }
                
                
-                // LB 18b  Do not break Korean Syllables
+                // LB 26  Do not break Korean Syllables
                if (fJL.contains(prevChar) && (fJL.contains(thisChar) ||
                                                fJV.contains(thisChar) ||
                                                fH2.contains(thisChar) ||
@ -744,7 +769,7 @@ public class RBBITestMonkey extends TestFmwk {
                        continue;
                }

-                // LB 18c  more Korean
+                // LB 27 Treat a Korean Syllable Block the same as ID
                if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
                    fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
                    fIN.contains(thisChar)) {
@ -762,19 +787,31 @@ public class RBBITestMonkey extends TestFmwk {

                
                
-                // LB 19
+                // LB 28 Do not break between alphabetics
                if (fAL.contains(prevChar) && fAL.contains(thisChar)) {
                    continue;
                }
                
-                // LB 19b
+                // LB 29  Do not break between numeric punctuation and alphabetics
                if (fIS.contains(prevChar) && fAL.contains(thisChar)) {
                    continue;
                }
                
-                // LB 20    Break everywhere else
-                break;
-                
+                // LB 30  Do not break between letters, numbers or oridnary symbols and 
+                //        opening or closing punctuation.
+                //        (AL | NU) x OP
+                //        CL x (AL | NU)
+                if ((fAL.contains(prevChar) || fNU.contains(prevChar)) &&
+                        fOP.contains(thisChar)) {
+                    continue;
+                }
+                if (fCL.contains(prevChar) &&
+                        (fAL.contains(thisChar) || fNU.contains(thisChar))) {
+                    continue;
+                }
+             
+                // LB 31    Break everywhere else
+                break;            
            }
            
            return pos;
@ -783,8 +820,8 @@ public class RBBITestMonkey extends TestFmwk {
        
        
        // Match the following regular expression in the input text.
-        //     (PR CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)?  (PO CM*)?
-        //      0  1       3    3    3        7     7    7    7    7      9   9     11 11    (match states)
+        //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)?  (PR | PO) CM*)?
+        //      0    0   1       3    3    4              7    7    7    7      9   9     11   11    (match states)
        //  retVals array  [0]  index of the start of the match, or -1 if no match
        //                 [1]  index of first char following the match.
        //  Can not use Java regex because need supplementary character support,
@ -803,7 +840,8 @@ public class RBBITestMonkey extends TestFmwk {
                int cLBType = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
                switch (matchState) {
                    case 0:   
-                        if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
+                        if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC ||
+                            cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
                            matchState = 1;  
                            break;
                        }
@ -851,9 +889,9 @@ public class RBBITestMonkey extends TestFmwk {
                            break;
                        }
                        break matchLoop;   /* No Match  */
-                        //     (PR CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)?  (PO CM*)?
-                        //      0  1       3    3    4    7   7     7    7    7    7      9   9     11 11    (match states)
-
+                        //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)?  (PR | PO) CM*)?
+                        //      0    0   1       3    3    4              7    7    7    7      9   9     11   11    (match states)
+                 
                    case 7:
                        if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
                            matchState = 7;
@ -879,6 +917,11 @@ public class RBBITestMonkey extends TestFmwk {
                            matchState = 11;
                            break;                           
                        }
+                        if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
+                            matchState = 11;
+                            break;                           
+                        }
+
                        break matchLoop;    // Match Complete.
                    case 9:
                        if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
@ -889,6 +932,10 @@ public class RBBITestMonkey extends TestFmwk {
                            matchState = 11;
                            break;                           
                        }
+                        if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
+                            matchState = 11;
+                            break;                           
+                        }
                        break matchLoop;    // Match Complete.
                    case 11:
                        if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
@ -937,6 +984,7 @@ public class RBBITestMonkey extends TestFmwk {
        UnicodeSet           fSTermSet;
        UnicodeSet           fCloseSet;
        UnicodeSet           fOtherSet;
+        UnicodeSet           fExtendSet;

 
        
@ -948,11 +996,12 @@ public class RBBITestMonkey extends TestFmwk {
            fSpSet           = new UnicodeSet("[\\p{Sentence_Break = Sp}]");
            fLowerSet        = new UnicodeSet("[\\p{Sentence_Break = Lower}]");
            fUpperSet        = new UnicodeSet("[\\p{Sentence_Break = Upper}]");
-            fOLetterSet      = new UnicodeSet("[\\p{Sentence_Break = OLetter}]");
+            fOLetterSet      = new UnicodeSet("[\\p{Sentence_Break = OLetter}-[\\uff9e\\uff9f]]");
            fNumericSet      = new UnicodeSet("[\\p{Sentence_Break = Numeric}]");
            fATermSet        = new UnicodeSet("[\\p{Sentence_Break = ATerm}]");
            fSTermSet        = new UnicodeSet("[\\p{Sentence_Break = STerm}]");
            fCloseSet        = new UnicodeSet("[\\p{Sentence_Break = Close}]");
+            fExtendSet       = new UnicodeSet("[\\p{Grapheme_Extend}\\uff9e\\uff9f]");
            fOtherSet        = new UnicodeSet();


@ -967,6 +1016,7 @@ public class RBBITestMonkey extends TestFmwk {
            fOtherSet.removeAll(fATermSet);
            fOtherSet.removeAll(fSTermSet);
            fOtherSet.removeAll(fCloseSet);
+            fOtherSet.removeAll(fExtendSet);

            fSets.add(fSepSet);
            fSets.add(fFormatSet);
@ -980,6 +1030,7 @@ public class RBBITestMonkey extends TestFmwk {
            fSets.add(fSTermSet);
            fSets.add(fCloseSet);
            fSets.add(fOtherSet);
+            fSets.add(fExtendSet);
        }
        
        
@ -991,64 +1042,40 @@ public class RBBITestMonkey extends TestFmwk {
            fText = s;        
        }   

- /*
-        //
-        //   moveIndex32.  Utility to move an index, needed to avoid
-        //                 onewanted exceptions, and to simplify porting from C.
-        //
-        static int moveIndex32(StringBuffer s, int from, int delta) {
-            int   result;
-            try {
-                result = UTF16.moveCodePointOffset(s, from, delta);
-            } 
-            catch(StringIndexOutOfBoundsException e) {
-                result = delta < 0? 0: s.length();
-            }
-            return result;
-        }
-        */
        
        //      moveBack()   Find the "significant" code point preceding the index i.
-        //      Skips over format chars, and 2nd-nth chars of grapheme clusters.
-        //      The incoming parameter i must be on a boundary already.
+        //      Skips over ($Extend | $Format)*
+        // 
        private int moveBack(int i) {
-            int        testPos;
            
            if (i <= 0) {
                return -1;
            }
-
-            // We are looking for the index of the first chunk that immediately
-            // precedes the incoming index.  
-            testPos = i;
-            for (;;) {
-                testPos = moveIndex32(fText, testPos, -1); 
-                int endPos = moveForward(testPos);
-                if (endPos < i) {
-                    return endPos;
-                }
-                if (testPos == 0) {
-                    return 0;
-                }
+            
+            int      c;
+            int      j = i;
+            do {
+                j = moveIndex32(fText, j, -1);
+                c = UTF16.charAt(fText, j);
            }
+            while (j>0 &&(fFormatSet.contains(c) || fExtendSet.contains(c)));
+            return j;
        }
        
        
        int moveForward(int i) {
-            int  result = fText.length();
-            if (i < fText.length()) {
-                result = nextGC(fText, i); 
-                if (i < 0) {
-                    i = fText.length();
-                } else {
-                    if (!fSepSet.contains(cAt(i))) {
-                        while (result<fText.length() && fFormatSet.contains(cAt(result))) {
-                            result = moveIndex32(fText, result, 1);
-                        }
-                    }
-                }
+            if (i>=fText.length()) {
+                return fText.length();
            }
-            return result;
+            int   c;
+            int   j = i;
+            do {
+                j = moveIndex32(fText, j, 1);
+                c = cAt(j);
+            }
+            while (c>=0 && (fFormatSet.contains(c) || fExtendSet.contains(c)));
+            return j;
+           
        }
        
        int cAt(int pos) {
@ -1081,9 +1108,21 @@ public class RBBITestMonkey extends TestFmwk {
                p0 = p1;  c0 = c1;
                p1 = p2;  c1 = c2;
                p2 = p3;  c2 = c3;
-                // Advancd p3 by  a grapheme cluster.   Rules 3, 4
+                
+                // Advancd p3 by  X(Extend | Format)*   Rule 4
                p3 = moveForward(p3);
                c3 = cAt(p3);
+                
+                // Rule (3) CR x LF
+                if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
+                    continue;
+                }
+                
+                // Rule (4)    Sep  <break>
+                if (fSepSet.contains(c1)) {
+                    p2 = p1+1;   // Separators don't combine with Extend or Format
+                    break;
+                }               

                if (p2 >= fText.length()) {
                    // Reached end of string.  Always a break position.
@ -1094,10 +1133,6 @@ public class RBBITestMonkey extends TestFmwk {
                    // Still warming up the loop.  (won't work with zero length strings, but we don't care)
                    continue;
                }
-                // Rule (3).   Sep  <break>
-                if (fSepSet.contains(c1)) {
-                    break;
-                }

                // Rule (6).   ATerm x Numeric
                if (fATermSet.contains(c1) &&  fNumericSet.contains(c2))  {
@ -1110,6 +1145,8 @@ public class RBBITestMonkey extends TestFmwk {
                }

                // Rule (8)  ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep))* Lower
+                //           Note:  Sterm | ATerm are added to the negated part of the expression by a 
+                //                  note to the Unicode 5.0 documents.
                int p8 = p1;
                while (p8>0 && fSpSet.contains(cAt(p8))) {
                    p8 = moveBack(p8);
@ -1123,9 +1160,8 @@ public class RBBITestMonkey extends TestFmwk {
                        c = cAt(p8);
                        if (c==-1 || fOLetterSet.contains(c) || fUpperSet.contains(c) ||
                            fLowerSet.contains(c) || fSepSet.contains(c) ||
-                            fATermSet.contains(c) || fSTermSet.contains(c))   // This last line deviates from
-                                                                                //  the TR.  The TR is wacky.
-                        {
+                            fATermSet.contains(c) || fSTermSet.contains(c))  
+                         {
                            break;
                        }
                        p8 = moveForward(p8);
@ -1134,6 +1170,21 @@ public class RBBITestMonkey extends TestFmwk {
                        continue;
                    }
                }
+                
+                // Rule 8a  (STerm | ATerm) Close* Sp* x (Sterm | ATerm)
+                if (fSTermSet.contains(c2) || fATermSet.contains(c2)) {
+                    p8 = p1;
+                    while (fSpSet.contains(cAt(p8))) {
+                        p8 = moveBack(p8);
+                    }
+                    while (fCloseSet.contains(cAt(p8))) {
+                        p8 = moveBack(p8);
+                    }
+                    c = cAt(p8);
+                    if (fSTermSet.contains(c) || fATermSet.contains(c)) {
+                        continue;
+                    }
+                }


                // Rule (9)  (STerm | ATerm) Close*  x  (Close | Sp | Sep)
@ -1186,9 +1237,6 @@ public class RBBITestMonkey extends TestFmwk {
    }

 
-
-    
-    
    /**
     * Move an index into a string by n code points.
     *   Similar to UTF16.moveCodePointOffset, but without the exceptions, which were
@ -1496,7 +1544,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
    //--------------------------------------------------------------------------------------------
    
    int  dotsOnLine = 0;
-    while (loopCount < numIterations || numIterations == -1) {
+     while (loopCount < numIterations || numIterations == -1) {
        if (numIterations == -1 && loopCount % 10 == 0) {
            // If test is running in an infinite loop, display a periodic tic so
            //   we can tell that it is making progress.