From 003c9da518a8ad3f0f2192b204382e606757bba2 Mon Sep 17 00:00:00 2001
From: Mark Davis <mark@macchiato.com>
Date: Thu, 10 Sep 2015 14:30:28 +0000
Subject: [PATCH] ICU-11738 Updated to handle string ranges.

X-SVN-Rev: 37943
---
 .../src/com/ibm/icu/impl/StringRange.java     | 282 ++++++++++++++++++
 .../core/src/com/ibm/icu/text/UnicodeSet.java | 135 ++++++---
 .../ibm/icu/dev/test/lang/UnicodeSetTest.java |  36 ++-
 3 files changed, 406 insertions(+), 47 deletions(-)
 create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java
new file mode 100644
index 00000000000..9b52085d81f
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java
@@ -0,0 +1,282 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2015, Google, Inc., International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import com.ibm.icu.lang.CharSequences;
+import com.ibm.icu.util.ICUException;
+
+@SuppressWarnings("deprecation")
+public class StringRange {
+    private static final boolean DEBUG = false;
+
+    public interface Adder {
+        /**
+         * @param start
+         * @param end   may be null, for adding single string
+         */
+        void add(String start, String end);
+    }
+
+    public static final Comparator<int[]> COMPARE_INT_ARRAYS = new Comparator<int[]>() {
+        public int compare(int[] o1, int[] o2) {
+            int minIndex = Math.min(o1.length, o2.length);
+            for (int i = 0; i < minIndex; ++i) {
+                int diff = o1[i] - o2[i];
+                if (diff != 0) {
+                    return diff;
+                }
+            }
+            return o1.length - o2.length;
+        }
+    };
+
+    /**
+     * Compact the set of strings.
+     * @param source
+     * @param adder adds each pair to the output. See the {@link Adder} interface.
+     * @param shorterPairs use abc-d instead of abc-abd
+     * @param moreCompact use a more compact form, at the expense of more processing. If false, source must be sorted.
+     */
+    public static void compact(Set<String> source, Adder adder, boolean shorterPairs, boolean moreCompact) {
+        if (!moreCompact) {
+            String start = null;
+            String end = null;
+            int lastCp = 0;
+            int prefixLen = 0;
+            for (String s : source) {
+                if (start != null) { // We have something queued up
+                    if (s.regionMatches(0, start, 0, prefixLen)) {
+                        int currentCp = s.codePointAt(prefixLen);
+                        if (currentCp == 1+lastCp && s.length() == prefixLen + Character.charCount(currentCp)) {
+                            end = s;
+                            lastCp = currentCp;
+                            continue;
+                        }
+                    }
+                    // We failed to find continuation. Add what we have and restart
+                    adder.add(start, end == null ? null 
+                        : !shorterPairs ? end 
+                            : end.substring(prefixLen, end.length()));
+                }
+                // new possible range
+                start = s;
+                end = null;
+                lastCp = s.codePointBefore(s.length());
+                prefixLen = s.length() - Character.charCount(lastCp);
+            }
+            adder.add(start, end == null ? null 
+                : !shorterPairs ? end 
+                    : end.substring(prefixLen, end.length()));
+        } else {
+            // not a fast algorithm, but ok for now
+            // TODO rewire to use the first (slower) algorithm to generate the ranges, then compact them from there.
+            // first sort by lengths
+            Relation<Integer,Ranges> lengthToArrays = Relation.of(new TreeMap<Integer,Set<Ranges>>(), TreeSet.class);
+            for (String s : source) {
+                Ranges item = new Ranges(s); 
+                lengthToArrays.put(item.size(), item);
+            }
+            // then compact items of each length and emit compacted sets
+            for (Entry<Integer, Set<Ranges>> entry : lengthToArrays.keyValuesSet()) {
+                LinkedList<Ranges> compacted = compact(entry.getKey(), entry.getValue());
+                for (Ranges ranges : compacted) {   
+                    adder.add(ranges.start(), ranges.end(shorterPairs));
+                }
+            }
+        }
+    }
+    
+    /**
+     * Faster but not as good compaction. Only looks at final codepoint.
+     * @param source
+     * @param adder
+     * @param shorterPairs
+     */
+    public static void compact(Set<String> source, Adder adder, boolean shorterPairs) {
+        compact(source,adder,shorterPairs,false);
+    }
+
+    private static LinkedList<Ranges> compact(int size, Set<Ranges> inputRanges) {
+        LinkedList<Ranges> ranges = new LinkedList<Ranges>(inputRanges);
+        for (int i = size-1; i >= 0; --i) {
+            Ranges last = null;
+            for (Iterator<Ranges> it = ranges.iterator(); it.hasNext();) {
+                Ranges item = it.next();
+                if (last == null) {
+                    last = item;
+                } else if (last.merge(i, item)) {
+                    it.remove();
+                } else {
+                    last = item; // go to next
+                }
+            }
+        };
+        return ranges;
+    }
+
+    static final class Range implements Comparable<Range>{
+        int min;
+        int max;
+        public Range(int min, int max) {
+            this.min = min;
+            this.max = max;
+        }
+        @Override
+        public boolean equals(Object obj) {
+            return compareTo((Range)obj) == 0;
+        }
+        public int compareTo(Range that) {
+            int diff = min - that.min;
+            if (diff != 0) {
+                return diff;
+            }
+            return max - that.max;
+        }
+        @Override
+        public int hashCode() {
+            return min * 37 + max;
+        }
+        @Override
+        public String toString() {
+            StringBuilder result = new StringBuilder().appendCodePoint(min);
+            return min == max ? result.toString() : result.append('~').appendCodePoint(max).toString();
+        }
+    }
+
+    static final class Ranges implements Comparable<Ranges> {
+        private final Range[] ranges;
+        public Ranges(String s) {
+            int[] array = CharSequences.codePoints(s);
+            ranges = new Range[array.length];
+            for (int i = 0; i < array.length; ++i) {
+                ranges[i] = new Range(array[i], array[i]);
+            }
+        }
+        public boolean merge(int pivot, Ranges other) {
+//            if (this.toString().equals("afz")) {
+//                int debug = 0;
+//            }
+           // we will merge items if the pivot is adjacent, and all other ranges are equal
+           for (int i = ranges.length-1; i >= 0; --i) {
+               if (i == pivot) {
+                   if (ranges[i].max != other.ranges[i].min-1) { // not adjacent
+                       return false;
+                   }
+               } else {
+                   if (!ranges[i].equals(other.ranges[i])) {
+                       return false;
+                   }
+               }
+           }
+           if (DEBUG) System.out.print("Merging: " + this + ", " + other);
+           ranges[pivot].max = other.ranges[pivot].max;
+           if (DEBUG) System.out.println(" => " + this);
+           return true;
+        }
+        
+        public String start() {
+            StringBuilder result = new StringBuilder();
+            for (int i = 0; i < ranges.length; ++i) {
+                result.appendCodePoint(ranges[i].min);
+            }
+            return result.toString();
+        }
+        public String end(boolean mostCompact) {
+            int firstDiff = firstDifference();
+            if (firstDiff == ranges.length) {
+                return null;
+            }
+            StringBuilder result = new StringBuilder();
+            for (int i = mostCompact ? firstDiff : 0; i < ranges.length; ++i) {
+                result.appendCodePoint(ranges[i].max);
+            }
+            return result.toString();
+        }
+        public int firstDifference() {
+            for (int i = 0; i < ranges.length; ++i) {
+                if (ranges[i].min != ranges[i].max){
+                    return i;
+                }
+            }
+            return ranges.length;
+        }
+        public Integer size() {
+            return ranges.length;
+        }
+        public int compareTo(Ranges other) {
+            int diff = ranges.length - other.ranges.length;
+            if (diff != 0) {
+                return diff;
+            }
+            for (int i = 0; i < ranges.length; ++i) {
+                diff = ranges[i].compareTo(other.ranges[i]);
+                if (diff != 0) {
+                    return diff;
+                }
+            }
+            return 0;
+        }
+        @Override
+        public String toString() {
+            String start = start();
+            String end = end(false);
+            return end == null ? start : start + "~" + end;
+        }
+    }
+
+    public static Collection<String> expand(String start, String end, boolean requireSameLength, Collection<String> output) {
+        if (start == null || end == null) {
+            throw new ICUException("Range must have 2 valid strings");
+        }
+        int[] startCps = CharSequences.codePoints(start);
+        int[] endCps = CharSequences.codePoints(end);
+        int startOffset = startCps.length - endCps.length;
+
+        if (requireSameLength && startOffset != 0) {
+            throw new ICUException("Range must have equal-length strings");
+        } else if (startOffset < 0) {
+            throw new ICUException("Range must have start-length ≥ end-length");
+        } else if (endCps.length == 0) {
+            throw new ICUException("Range must have end-length > 0");
+        }
+
+        StringBuilder builder = new StringBuilder();
+        for (int i = 0; i < startOffset; ++i) {
+            builder.appendCodePoint(startCps[i]);
+        }
+        add(0, startOffset, startCps, endCps, builder, output);
+        return output;
+    }
+    
+    private static void add(int endIndex, int startOffset, int[] starts, int[] ends, StringBuilder builder, Collection<String> output) {
+        int start = starts[endIndex+startOffset];
+        int end = ends[endIndex];
+        if (start > end) {
+            throw new ICUException("Range must have xᵢ ≤ yᵢ for each index i");
+        }
+        boolean last = endIndex == ends.length - 1;
+        int startLen = builder.length();
+        for (int i = start; i <= end; ++i) {
+            builder.appendCodePoint(i);
+            if (last) {
+                output.add(builder.toString());
+            } else {
+                add(endIndex+1, startOffset, starts, ends, builder, output);
+            }
+            builder.setLength(startLen);
+        }
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
index 87374f2ba9f..1f4abe58d24 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@@ -20,6 +20,7 @@ import com.ibm.icu.impl.Norm2AllModes;
 import com.ibm.icu.impl.PatternProps;
 import com.ibm.icu.impl.RuleCharacterIterator;
 import com.ibm.icu.impl.SortedSetRelation;
+import com.ibm.icu.impl.StringRange;
 import com.ibm.icu.impl.UBiDiProps;
 import com.ibm.icu.impl.UCaseProps;
 import com.ibm.icu.impl.UCharacterProperty;
@@ -772,19 +773,19 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
             T result, boolean escapeUnprintable, boolean includeStrings) {
         try {
             result.append('[');
-    
+
             int count = getRangeCount();
-    
+
             // If the set contains at least 2 intervals and includes both
             // MIN_VALUE and MAX_VALUE, then the inverse representation will
             // be more economical.
             if (count > 1 &&
                     getRangeStart(0) == MIN_VALUE &&
                     getRangeEnd(count-1) == MAX_VALUE) {
-    
+
                 // Emit the inverse
                 result.append('^');
-    
+
                 for (int i = 1; i < count; ++i) {
                     int start = getRangeEnd(i-1)+1;
                     int end = getRangeStart(i)-1;
@@ -797,7 +798,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                     }
                 }
             }
-    
+
             // Default; emit the ranges as pairs
             else {
                 for (int i = 0; i < count; ++i) {
@@ -812,7 +813,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                     }
                 }
             }
-    
+
             if (includeStrings && strings.size() > 0) {
                 for (String s : strings) {
                     result.append('{');
@@ -2431,6 +2432,21 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
         return this;
     }
 
+    // Add constants to make the code easier to follow
+
+    static final int LAST0_START = 0, 
+            LAST1_RANGE = 1, 
+            LAST2_SET = 2;
+
+    static final int MODE0_NONE = 0, 
+            MODE1_INBRACKET = 1, 
+            MODE2_OUTBRACKET = 2;
+
+    static final int SETMODE0_NONE = 0, 
+            SETMODE1_UNICODESET = 1, 
+            SETMODE2_PROPERTYPAT = 2, 
+            SETMODE3_PREPARSED = 3;
+
     /**
      * Parse the pattern from the given RuleCharacterIterator.  The
      * iterator is advanced over the parsed pattern.
@@ -2465,14 +2481,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
 
         // mode: 0=before [, 1=between [...], 2=after ]
         // lastItem: 0=none, 1=char, 2=set
-        int lastItem = 0, lastChar = 0, mode = 0;
+        int lastItem = LAST0_START, lastChar = 0, mode = MODE0_NONE;
         char op = 0;
 
         boolean invert = false;
 
         clear();
+        String lastString = null;
 
-        while (mode != 2 && !chars.atEnd()) {
+        while (mode != MODE2_OUTBRACKET && !chars.atEnd()) {
             //Eclipse stated the following is "dead code"
             /*
             if (false) {
@@ -2491,9 +2508,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
             // -------- Check for property pattern
 
             // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
-            int setMode = 0;
+            int setMode = SETMODE0_NONE;
             if (resemblesPropertyPattern(chars, opts)) {
-                setMode = 2;
+                setMode = SETMODE2_PROPERTYPAT;
             }
 
             // -------- Parse '[' of opening delimiter OR nested set.
@@ -2511,12 +2528,12 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                 literal = chars.isEscaped();
 
                 if (c == '[' && !literal) {
-                    if (mode == 1) {
+                    if (mode == MODE1_INBRACKET) {
                         chars.setPos(backup); // backup
-                        setMode = 1;
+                        setMode = SETMODE1_UNICODESET;
                     } else {
                         // Handle opening '[' delimiter
-                        mode = 1;
+                        mode = MODE1_INBRACKET;
                         patBuf.append('[');
                         backup = chars.getPos(backup); // prepare to backup
                         c = chars.next(opts);
@@ -2543,7 +2560,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                     if (m != null) {
                         try {
                             nested = (UnicodeSet) m;
-                            setMode = 3;
+                            setMode = SETMODE3_PREPARSED;
                         } catch (ClassCastException e) {
                             syntaxError(chars, "Syntax error");
                         }
@@ -2556,14 +2573,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
             // previously been parsed and was looked up in the symbol
             // table.
 
-            if (setMode != 0) {
-                if (lastItem == 1) {
+            if (setMode != SETMODE0_NONE) {
+                if (lastItem == LAST1_RANGE) {
                     if (op != 0) {
                         syntaxError(chars, "Char expected after operator");
                     }
                     add_unchecked(lastChar, lastChar);
                     _appendToPat(patBuf, lastChar, false);
-                    lastItem = op = 0;
+                    lastItem = LAST0_START;
+                    op = 0;
                 }
 
                 if (op == '-' || op == '&') {
@@ -2575,24 +2593,24 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                     nested = scratch;
                 }
                 switch (setMode) {
-                case 1:
+                case SETMODE1_UNICODESET:
                     nested.applyPattern(chars, symbols, patBuf, options);
                     break;
-                case 2:
+                case SETMODE2_PROPERTYPAT:
                     chars.skipIgnored(opts);
                     nested.applyPropertyPattern(chars, patBuf, symbols);
                     break;
-                case 3: // `nested' already parsed
+                case SETMODE3_PREPARSED: // `nested' already parsed
                     nested._toPattern(patBuf, false);
                     break;
                 }
 
                 usePat = true;
 
-                if (mode == 0) {
+                if (mode == MODE0_NONE) {
                     // Entire pattern is a category; leave parse loop
                     set(nested);
-                    mode = 2;
+                    mode = MODE2_OUTBRACKET;
                     break;
                 }
 
@@ -2609,12 +2627,12 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                 }
 
                 op = 0;
-                lastItem = 2;
+                lastItem = LAST2_SET;
 
                 continue;
             }
 
-            if (mode == 0) {
+            if (mode == MODE0_NONE) {
                 syntaxError(chars, "Missing '['");
             }
 
@@ -2625,7 +2643,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
             if (!literal) {
                 switch (c) {
                 case ']':
-                    if (lastItem == 1) {
+                    if (lastItem == LAST1_RANGE) {
                         add_unchecked(lastChar, lastChar);
                         _appendToPat(patBuf, lastChar, false);
                     }
@@ -2637,11 +2655,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                         syntaxError(chars, "Trailing '&'");
                     }
                     patBuf.append(']');
-                    mode = 2;
+                    mode = MODE2_OUTBRACKET;
                     continue;
                 case '-':
                     if (op == 0) {
-                        if (lastItem != 0) {
+                        if (lastItem != LAST0_START) {
+                            op = (char) c;
+                            continue;
+                        } else if (lastString != null) {
                             op = (char) c;
                             continue;
                         } else {
@@ -2651,15 +2672,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                             literal = chars.isEscaped();
                             if (c == ']' && !literal) {
                                 patBuf.append("-]");
-                                mode = 2;
+                                mode = MODE2_OUTBRACKET;
                                 continue;
                             }
                         }
                     }
-                    syntaxError(chars, "'-' not after char or set");
+                    syntaxError(chars, "'-' not after char, string, or set");
                     break;
                 case '&':
-                    if (lastItem == 2 && op == 0) {
+                    if (lastItem == LAST2_SET && op == 0) {
                         op = (char) c;
                         continue;
                     }
@@ -2669,14 +2690,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                     syntaxError(chars, "'^' not after '['");
                     break;
                 case '{':
-                    if (op != 0) {
+                    if (op != 0 && op != '-') {
                         syntaxError(chars, "Missing operand after operator");
                     }
-                    if (lastItem == 1) {
+                    if (lastItem == LAST1_RANGE) {
                         add_unchecked(lastChar, lastChar);
                         _appendToPat(patBuf, lastChar, false);
                     }
-                    lastItem = 0;
+                    lastItem = LAST0_START;
                     if (buf == null) {
                         buf = new StringBuilder();
                     } else {
@@ -2698,9 +2719,27 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                     // We have new string. Add it to set and continue;
                     // we don't need to drop through to the further
                     // processing
-                    add(buf.toString());
+                    String curString = buf.toString();
+                    if (op == '-') {
+                        int lastSingle = CharSequences.getSingleCodePoint(lastString == null ? "" : lastString);
+                        int curSingle = CharSequences.getSingleCodePoint(curString);
+                        if (lastSingle != Integer.MAX_VALUE && curSingle != Integer.MAX_VALUE) {
+                            add(lastSingle,curSingle);
+                        } else {
+                            try {
+                                StringRange.expand(lastString, curString, true, strings);
+                            } catch (Exception e) {
+                                syntaxError(chars, e.getMessage());
+                            }
+                        }
+                        lastString = null;
+                        op = 0;
+                    } else {
+                        add(curString);
+                        lastString = curString;
+                    }
                     patBuf.append('{');
-                    _appendToPat(patBuf, buf.toString(), false);
+                    _appendToPat(patBuf, curString, false);
                     patBuf.append('}');
                     continue;
                 case SymbolTable.SYMBOL_REF:
@@ -2720,14 +2759,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                         break; // literal '$'
                     }
                     if (anchor && op == 0) {
-                        if (lastItem == 1) {
+                        if (lastItem == LAST1_RANGE) {
                             add_unchecked(lastChar, lastChar);
                             _appendToPat(patBuf, lastChar, false);
                         }
                         add_unchecked(UnicodeMatcher.ETHER);
                         usePat = true;
                         patBuf.append(SymbolTable.SYMBOL_REF).append(']');
-                        mode = 2;
+                        mode = MODE2_OUTBRACKET;
                         continue;
                     }
                     syntaxError(chars, "Unquoted '$'");
@@ -2742,12 +2781,19 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
             // ("a").
 
             switch (lastItem) {
-            case 0:
-                lastItem = 1;
+            case LAST0_START:
+                if (op == '-' && lastString != null) {
+                    syntaxError(chars, "Invalid range");
+                }
+                lastItem = LAST1_RANGE;
                 lastChar = c;
+                lastString = null;
                 break;
-            case 1:
+            case LAST1_RANGE:
                 if (op == '-') {
+                    if (lastString != null) {
+                        syntaxError(chars, "Invalid range");
+                    }
                     if (lastChar >= c) {
                         // Don't allow redundant (a-a) or empty (b-a) ranges;
                         // these are most likely typos.
@@ -2757,24 +2803,25 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                     _appendToPat(patBuf, lastChar, false);
                     patBuf.append(op);
                     _appendToPat(patBuf, c, false);
-                    lastItem = op = 0;
+                    lastItem = LAST0_START;
+                    op = 0;
                 } else {
                     add_unchecked(lastChar, lastChar);
                     _appendToPat(patBuf, lastChar, false);
                     lastChar = c;
                 }
                 break;
-            case 2:
+            case LAST2_SET:
                 if (op != 0) {
                     syntaxError(chars, "Set expected after operator");
                 }
                 lastChar = c;
-                lastItem = 1;
+                lastItem = LAST1_RANGE;
                 break;
             }
         }
 
-        if (mode != 2) {
+        if (mode != MODE2_OUTBRACKET) {
             syntaxError(chars, "Missing ']'");
         }
 
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
index 9e8ccd6c9d9..a1697d755b6 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
@@ -1,6 +1,6 @@
 /*
  *******************************************************************************
- * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */
@@ -2611,7 +2611,7 @@ public class UnicodeSetTest extends TestFmwk {
         assertEquals("CharSequence remove", new UnicodeSet("[Aa-c{qr}]"), new UnicodeSet("[a-cA{abc}{qr}]").remove(new StringBuilder("abc")) );
         assertEquals("CharSequence complement", new UnicodeSet("[Aa-c{qr}]"), new UnicodeSet("[a-cA{abc}{qr}]").complement(new StringBuilder("abc")) );
         assertEquals("CharSequence complement", new UnicodeSet("[Aa-c{abc}{qr}]"), new UnicodeSet("[a-cA{qr}]").complement(new StringBuilder("abc")) );
-        
+
         assertEquals("CharSequence addAll", new UnicodeSet("[a-cABC]"), new UnicodeSet("[a-cA]").addAll(new StringBuilder("ABC")) );
         assertEquals("CharSequence retainAll", new UnicodeSet("[a-c]"), new UnicodeSet("[a-cA]").retainAll(new StringBuilder("abcB")) );
         assertEquals("CharSequence removeAll", new UnicodeSet("[Aab]"), new UnicodeSet("[a-cA]").removeAll(new StringBuilder("cC")) );
@@ -2621,7 +2621,7 @@ public class UnicodeSetTest extends TestFmwk {
         assertEquals("CharSequence contains", true, new UnicodeSet("[a-cA{ab}]"). contains(new StringBuilder("ab")) ); 
         assertEquals("CharSequence containsNone", false, new UnicodeSet("[a-cA]"). containsNone(new StringBuilder("ab"))  );
         assertEquals("CharSequence containsSome", true, new UnicodeSet("[a-cA{ab}]"). containsSome(new StringBuilder("ab"))  );
-        
+
         // spanning
         assertEquals("CharSequence span", 3, new UnicodeSet("[a-cA]"). span(new StringBuilder("abc"), SpanCondition.SIMPLE) );
         assertEquals("CharSequence span", 3, new UnicodeSet("[a-cA]"). span(new StringBuilder("abc"), 1, SpanCondition.SIMPLE) );
@@ -2636,4 +2636,34 @@ public class UnicodeSetTest extends TestFmwk {
         assertEquals("CharSequence findLastIn", -1, new UnicodeSet("[a-cA]"). findLastIn(new StringBuilder("abc"), 1, true) );
         assertEquals("CharSequence add", "c", new UnicodeSet("[abA]"). stripFrom(new StringBuilder("abc"), true));
     }
+
+    public void TestAStringRange() {
+        String[][] tests = {
+                {"[{ax}-{bz}]", "[{ax}{ay}{az}{bx}{by}{bz}]"},
+                {"[{a}-{c}]", "[a-c]"},
+                //{"[a-{c}]", "[a-c]"}, // don't handle these yet: enable once we do
+                //{"[{a}-c]", "[a-c]"}, // don't handle these yet: enable once we do
+                {"[{ax}-{by}-{cz}]", "Error: '-' not after char, string, or set at \"[{ax}-{by}-{|cz}]\""},
+                {"[{a}-{bz}]", "Error: Range must have equal-length strings at \"[{a}-{bz}|]\""},
+                {"[{ax}-{b}]", "Error: Range must have equal-length strings at \"[{ax}-{b}|]\""},
+                {"[{ax}-bz]", "Error: Invalid range at \"[{ax}-b|z]\""},
+                {"[ax-{bz}]", "Error: Range must have 2 valid strings at \"[ax-{bz}|]\""},
+                {"[{bx}-{az}]", "Error: Range must have xᵢ ≤ yᵢ for each index i at \"[{bx}-{az}|]\""},
+        };
+        int i = 0;
+        for (String[] test : tests) {
+            String expected = test[1];
+            if (test[1].startsWith("[")) {
+                expected = new UnicodeSet(expected).toPattern(false);
+            }
+            String actual;
+            try {
+                actual = new UnicodeSet(test[0]).toPattern(false);
+            } catch (Exception e) {
+                actual = e.getMessage();
+            }
+            assertEquals("StringRange " + i, expected, actual);
+            ++i;
+        }
+    }
 }