ICU-4374 first checkin of DateTimePatternGenerator

X-SVN-Rev: 20081
2025-04-08 06:53:45 +00:00 · 2006-08-17 22:03:30 +00:00 · 2006-08-17 22:03:30 +00:00 · 1fd2123188
commit 1fd2123188
parent 72184a0b2c
8 changed files with 3556 additions and 15 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java
@ -0,0 +1,211 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006, Google, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.dev.test.format;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.PatternTokenizer;
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.text.DateTimePatternGenerator;
+import com.ibm.icu.text.SimpleDateFormat;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Random;
+import java.util.ResourceBundle;
+
+public class DateTimeGeneratorTest extends TestFmwk {
+    public static boolean GENERATE_TEST_DATA = false;
+    public static int RANDOM_COUNT = 1000;
+    public static boolean DEBUG = false;
+    
+    public static void main(String[] args) throws Exception {
+        new DateTimeGeneratorTest().run(args);
+    }
+    
+    public void TestPatternParser() {
+        StringBuffer buffer = new StringBuffer();
+        PatternTokenizer pp = new PatternTokenizer()
+        .setIgnorableCharacters(new UnicodeSet("[-]"))
+        .setSyntaxCharacters(new UnicodeSet("[a-zA-Z]"))
+        .setEscapeCharacters(new UnicodeSet("[b#]"))
+        .setUsingQuote(true);
+        logln("Using Quote");
+        for (int i = 0; i < patternTestData.length; ++i) {
+            String patternTest = (String) patternTestData[i];
+            CheckPattern(buffer, pp, patternTest);
+        }
+        String[] randomSet = {"abcdef", "$12!@#-", "'\\"};
+        for (int i = 0; i < RANDOM_COUNT; ++i) {
+            String patternTest = getRandomString(randomSet, 0, 10);
+            CheckPattern(buffer, pp, patternTest);
+        }
+        logln("Using Backslash");
+        pp.setUsingQuote(false).setUsingSlash(true);
+        for (int i = 0; i < patternTestData.length; ++i) {
+            String patternTest = (String) patternTestData[i];
+            CheckPattern(buffer, pp, patternTest);
+        }
+        for (int i = 0; i < RANDOM_COUNT; ++i) {
+            String patternTest = getRandomString(randomSet, 0, 10);
+            CheckPattern(buffer, pp, patternTest);
+        }
+    }
+    
+    Random random = new java.util.Random(-1);
+    
+    private String getRandomString(String[] randomList, int minLen, int maxLen) {
+        StringBuffer result = new StringBuffer();
+        int len = random.nextInt(maxLen + 1 - minLen) + minLen;
+        for (int i = minLen; i < len; ++ i) {
+            String source = randomList[random.nextInt(randomList.length)]; // don't bother with surrogates
+            char ch = source.charAt(random.nextInt(source.length()));
+            UTF16.append(result, ch);
+        }
+        return result.toString();
+    }
+    
+    private void CheckPattern(StringBuffer buffer, PatternTokenizer pp, String patternTest) {
+        pp.setPattern(patternTest);
+        if (DEBUG && isVerbose()) {
+            showItems(buffer, pp, patternTest);
+        }
+        String normalized = pp.setStart(0).normalize();
+        logln("input:\t<" + patternTest + ">" + "\tnormalized:\t<" + normalized + ">");
+        String doubleNormalized = pp.setPattern(normalized).normalize();
+        if (!normalized.equals(doubleNormalized)) {
+            errln("Normalization not idempotent:\t" + patternTest + "\tnormalized: " + normalized +  "\tnormalized2: " + doubleNormalized);
+            // allow for debugging at the point of failure
+            if (DEBUG) {
+                pp.setPattern(patternTest);
+                normalized = pp.setStart(0).normalize();
+                pp.setPattern(normalized);
+                showItems(buffer, pp, normalized);
+                doubleNormalized = pp.normalize();
+            }
+        }
+    }
+
+    private void showItems(StringBuffer buffer, PatternTokenizer pp, String patternTest) {
+        logln("input:\t<" + patternTest + ">");
+        while (true) {
+            buffer.setLength(0);
+            int status = pp.next(buffer);
+            if (status == pp.DONE) break;
+            String lit = "";
+            if (status != pp.SYNTAX ) {
+                lit = "\t<" + pp.quoteLiteral(buffer) + ">";
+            }
+            logln("\t" + statusName[status] + "\t<" + buffer + ">" + lit);
+        }
+    }
+    
+    static final String[] statusName = {"DONE", "SYNTAX", "LITERAL", "BROKEN_QUOTE", "BROKEN_ESCAPE", "UNKNOWN"};
+    
+    public void TestBasic() {
+        ULocale uLocale = null;
+        DateTimePatternGenerator dtfg = null;
+        Date date = null;
+        for (int i = 0; i < dateTestData.length; ++i) {
+            if (dateTestData[i] instanceof ULocale) {
+                uLocale = (ULocale) dateTestData[i];
+                dtfg = DateTimePatternGenerator.getInstance(uLocale);
+                if (GENERATE_TEST_DATA) logln("new ULocale(\"" + uLocale.toString() + "\"),");
+            } else if (dateTestData[i] instanceof Date) {
+                date = (Date) dateTestData[i];
+                if (GENERATE_TEST_DATA) logln("new Date(" + date.getYear() + ", " + date.getMonth() + ", " + date.getDay() + ", " + date.getHours() + ", " + date.getMinutes() + ", " + date.getSeconds()+ "),");
+            } else if (dateTestData[i] instanceof String) {
+                String testSkeleton = (String) dateTestData[i];
+                String pattern = dtfg.getBestPattern(testSkeleton);
+                SimpleDateFormat sdf = new SimpleDateFormat(pattern, uLocale);
+                String formatted = sdf.format(date);
+                if (GENERATE_TEST_DATA) logln("new String[] {\"" + testSkeleton + "\", \"" + Utility.escape(formatted) + "\"},");
+                //logln(uLocale + "\t" + testSkeleton + "\t" + pattern + "\t" + sdf.format(date));
+            } else {
+                String[] testPair = (String[]) dateTestData[i];
+                String testSkeleton = testPair[0];
+                String testFormatted = testPair[1];
+                String pattern = dtfg.getBestPattern(testSkeleton);
+                SimpleDateFormat sdf = new SimpleDateFormat(pattern, uLocale);
+                String formatted = sdf.format(date);
+                if (GENERATE_TEST_DATA) {
+                    logln("new String[] {\"" + testSkeleton + "\", \"" + Utility.escape(formatted) + "\"},");
+                } else if (!formatted.equals(testFormatted)) {
+                    errln(uLocale + "\tformatted string doesn't match test case: " + testSkeleton + "\t generated: " +  pattern + "\t expected: " + testFormatted + "\t got: " + formatted);
+                    if (true) { // debug
+                        pattern = dtfg.getBestPattern(testSkeleton);
+                        sdf = new SimpleDateFormat(pattern, uLocale);
+                        formatted = sdf.format(date);
+                    }
+                }
+                //logln(uLocale + "\t" + testSkeleton + "\t" + pattern + "\t" + sdf.format(date));
+            }
+        }
+    }
+    
+    static final Object[] patternTestData = {
+        "'$f''#c",
+        "'' 'a",
+        "'.''.'",
+        "\\u0061\\\\",
+        "mm.dd 'dd ' x",
+        "'' ''",
+    };
+    
+    // can be generated by using GENERATE_TEST_DATA. Must be reviewed before adding
+    static final Object[] dateTestData = {
+        new Date(99, 0, 3, 23, 58, 59),
+        new ULocale("en_US"),
+        new String[] {"yM", "1/1999"},
+        new String[] {"yMMM", "Jan 1999"},
+        new String[] {"yMd", "1/13/1999"},
+        new String[] {"yMMMd", "Jan/13/1999"},
+        new String[] {"Md", "1/13"},
+        new String[] {"MMMd", "Jan 13"},
+        new String[] {"yQQQ", "Q1 1999"},
+        new String[] {"hhmm", "11:58 PM"},
+        new String[] {"HHmm", "23:58"},
+        new String[] {"mmss", "58:59"},
+        new ULocale("zh_Hans_CN"),
+        new String[] {"yM", "1999-1"},
+        new String[] {"yMMM", "1999-\u4E00\u6708"},
+        new String[] {"yMd", "1999\u5E741\u670813\u65E5"},
+        new String[] {"yMMMd", "1999\u5E74\u4E00\u6708\u670813\u65E5"},
+        new String[] {"Md", "1-13"},
+        new String[] {"MMMd", "\u4E00\u6708-13"},
+        new String[] {"yQQQ", "1\u5B63 1999"},
+        new String[] {"hhmm", "\u4E0B\u534811:58"},
+        new String[] {"HHmm", "\u4E0B\u534811:58"},
+        new String[] {"mmss", "58:59"},
+        new ULocale("de_DE"),
+        new String[] {"yM", "1.1999"},
+        new String[] {"yMMM", "Jan 1999"},
+        new String[] {"yMd", "13.1.1999"},
+        new String[] {"yMMMd", "13. Jan 1999"},
+        new String[] {"Md", "13.1"},
+        new String[] {"MMMd", "13. Jan"},
+        new String[] {"yQQQ", "Q1 1999"},
+        new String[] {"hhmm", "11:58 nachm."},
+        new String[] {"HHmm", "23:58"},
+        new String[] {"mmss", "58:59"},
+        new ULocale("fi"),
+        new String[] {"yM", "1.1999"},
+        new String[] {"yMMM", "tammita 1999"},
+        new String[] {"yMd", "13.1.1999"},
+        new String[] {"yMMMd", "13. tammita 1999"},
+        new String[] {"Md", "13.1"},
+        new String[] {"MMMd", "13. tammita"},
+        new String[] {"yQQQ", "1. nelj. 1999"},
+        new String[] {"hhmm", "23.58"},
+        new String[] {"HHmm", "23.58"},
+        new String[] {"mmss", "58.59"},
+    };
+}
--- a/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java
@ -64,6 +64,7 @@ public class BagFormatter {
    private boolean hexValue = false;
    private static final String NULL_VALUE = "_NULL_VALUE_";
    private int fullTotal = -1;
+    private boolean showTotal = true;
    private String lineSeparator = "\r\n";
    private Tabber tabber = new Tabber.MonoTabber();

@ -331,14 +332,14 @@ public class BagFormatter {
        return getName(s, false);
    }

-    class NameLabel extends UnicodeLabel {
+    public static class NameLabel extends UnicodeLabel {
        UnicodeProperty nameProp;
        UnicodeSet control;
        UnicodeSet private_use;
        UnicodeSet noncharacter;
        UnicodeSet surrogate;

-        NameLabel(UnicodeProperty.Factory source) {
+        public NameLabel(UnicodeProperty.Factory source) {
            nameProp = source.getProperty("Name");
            control = source.getSet("gc=Cc");
            private_use = source.getSet("gc=Co");
@ -526,11 +527,13 @@ public class BagFormatter {

        protected void doAfter(Object container, Object o) {
            if (fullTotal != -1 && fullTotal != counter) {
-                output.print(lineSeparator);
-                output.print("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here." + lineSeparator);
-                output.print("# Total code points: " + nf.format(fullTotal) + lineSeparator);
+                if (showTotal) {
+                    output.print(lineSeparator);
+                    output.print("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here." + lineSeparator);
+                    output.print("# Total code points: " + nf.format(fullTotal) + lineSeparator);
+                }
                fullTotal = -1;
-            } else {
+            } else if (showTotal) {
                output.print(lineSeparator);
                output.print("# Total code points: " + nf.format(counter) + lineSeparator);
            }
@ -553,7 +556,7 @@ public class BagFormatter {
                String thing = o.toString();
                String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true);
                if (value.length() != 0) value = "\t; " + value;
-                String label = getLabelSource(true).getValue(thing, ",", true);
+                String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true);
                if (label.length() != 0) label = " " + label;
                output.print(
                    tabber.process(
@ -1092,5 +1095,13 @@ public class BagFormatter {
    public void setTabber(Tabber tabber) {
        this.tabber = tabber;
    }
+
+    public boolean isShowTotal() {
+        return showTotal;
+    }
+
+    public void setShowTotal(boolean showTotal) {
+        this.showTotal = showTotal;
+    }
 }
 //#endif
--- a/icu4j/src/com/ibm/icu/impl/CollectionUtilities.java
+++ b/icu4j/src/com/ibm/icu/impl/CollectionUtilities.java
@ -1,7 +1,7 @@
 //##header
 /*
 *******************************************************************************
- * Copyright (C) 1996-2005, International Business Machines Corporation and    *
+ * Copyright (C) 1996-2006, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -104,12 +104,23 @@ public final class CollectionUtilities {
 		Iterator it = c.iterator();
 		if (!it.hasNext()) return null;
 		Object bestSoFar = it.next();
-		while (it.hasNext()) {
-			Object item = it.next();
-			if (comp.compare(item, bestSoFar) == direction) {
-				bestSoFar = item;
-			}
-		}
+        if (direction < 0) {
+    		while (it.hasNext()) {
+    			Object item = it.next();
+                int compValue = comp.compare(item, bestSoFar);
+    			if (comp.compare(item, bestSoFar) < 0) {
+    				bestSoFar = item;
+                }
+    		}
+        } else {
+            while (it.hasNext()) {
+                Object item = it.next();
+                int compValue = comp.compare(item, bestSoFar);
+                if (comp.compare(item, bestSoFar) > 0) {
+                    bestSoFar = item;
+                }
+            }
+        }
 		return bestSoFar;
 	}
 	
@ -326,7 +337,7 @@ public final class CollectionUtilities {
        return pp.toPattern(uset);
    }
    
-    static class MultiComparator implements Comparator {
+    public static class MultiComparator implements Comparator {
        private Comparator[] comparators;
    
        public MultiComparator (Comparator[] comparators) {
--- a/icu4j/src/com/ibm/icu/impl/PatternTokenizer.java
+++ b/icu4j/src/com/ibm/icu/impl/PatternTokenizer.java
@ -0,0 +1,370 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006, Google, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.DateTimePatternGenerator.FormatParser;
+import com.ibm.icu.text.DateTimePatternGenerator.VariableField;
+
+import java.util.BitSet;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * A simple parsing class for patterns and rules. Handles '...' quotations, \\uxxxx and \\Uxxxxxxxx, and symple syntax.
+ * The '' (two quotes) is treated as a single quote, inside or outside a quote
+ * <ul>
+ * <li>Any ignorable characters are ignored in parsing.</li>
+ * <li>Any syntax characters are broken into separate tokens</li>
+ * <li>Quote characters can be specified: '...', "...", and \x </li>
+ * <li>Other characters are treated as literals</li>
+ * </ul>
+ */
+public class PatternTokenizer {
+    // settings used in the interpretation of the pattern
+    private UnicodeSet ignorableCharacters = new UnicodeSet();
+    private UnicodeSet syntaxCharacters = new UnicodeSet();
+    private UnicodeSet escapeCharacters = new UnicodeSet();
+    private boolean usingSlash = false;
+    private boolean usingQuote = false;
+    
+    // transient data, set when needed. Null it out for any changes in the above fields.
+    private transient UnicodeSet needingQuoteCharacters = null;
+    
+    // data about the current pattern being parsed. start gets moved as we go along.
+    private int start;
+    private int limit;
+    private CharSequence pattern;
+    
+    public UnicodeSet getIgnorableCharacters() {
+        return (UnicodeSet) ignorableCharacters.clone();
+    }
+    /**
+     * Sets the characters to be ignored in parsing, eg new UnicodeSet("[:pattern_whitespace:]");
+     * @param ignorableCharacters
+     * @return
+     */
+    public PatternTokenizer setIgnorableCharacters(UnicodeSet ignorableCharacters) {
+        this.ignorableCharacters = (UnicodeSet) ignorableCharacters.clone();
+        needingQuoteCharacters = null;
+        return this;
+    }
+    public UnicodeSet getSyntaxCharacters() {
+        return (UnicodeSet) syntaxCharacters.clone();
+    }
+    /**
+     *  Sets the characters to be interpreted as syntax characters in parsing, eg new UnicodeSet("[:pattern_syntax:]")
+     * @param syntaxCharacters
+     * @return
+     */
+    public PatternTokenizer setSyntaxCharacters(UnicodeSet syntaxCharacters) {
+        this.syntaxCharacters = (UnicodeSet) syntaxCharacters.clone();
+        needingQuoteCharacters = null;
+        return this;
+    }   
+    public UnicodeSet getEscapeCharacters() {
+        return (UnicodeSet) escapeCharacters.clone();
+    }
+    /**
+     * Set characters to be escaped in literals, in quoteLiteral and normalize, eg new UnicodeSet("[^\\u0020-\\u007E]");
+     * @param escapeCharacters
+     * @return
+     */
+    public PatternTokenizer setEscapeCharacters(UnicodeSet escapeCharacters) {
+        this.escapeCharacters = (UnicodeSet) escapeCharacters.clone();
+        return this;
+    }
+    public boolean isUsingQuote() {
+        return usingQuote;
+    }
+    public PatternTokenizer setUsingQuote(boolean usingQuote) {
+        this.usingQuote = usingQuote;
+        needingQuoteCharacters = null;
+        return this;
+    }
+    public boolean isUsingSlash() {
+        return usingSlash;
+    }
+    public PatternTokenizer setUsingSlash(boolean usingSlash) {
+        this.usingSlash = usingSlash;
+        needingQuoteCharacters = null;
+        return this;
+    }
+    //    public UnicodeSet getQuoteCharacters() {
+//  return (UnicodeSet) quoteCharacters.clone();
+//  }
+//  public PatternTokenizer setQuoteCharacters(UnicodeSet quoteCharacters) {
+//  this.quoteCharacters = (UnicodeSet) quoteCharacters.clone();
+//  needingQuoteCharacters = null;
+//  return this;
+//  }
+    public int getLimit() {
+        return limit;
+    }
+    public PatternTokenizer setLimit(int limit) {
+        this.limit = limit;
+        return this;
+    }
+    public int getStart() {
+        return start;
+    }
+    public PatternTokenizer setStart(int start) {
+        this.start = start;
+        return this;
+    }
+    public PatternTokenizer setPattern(CharSequence pattern) {
+        if (pattern == null) {
+            throw new IllegalArgumentException("Inconsistent arguments");
+        }
+        this.start = 0;
+        this.limit = pattern.length();
+        this.pattern = pattern;
+        return this;
+    }
+    
+    public static final char SINGLE_QUOTE = '\'';
+    public static final char BACK_SLASH = '\\';
+    private static int NO_QUOTE = -1, IN_QUOTE = -2;
+    /**
+     * Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes.
+     * @param string
+     * @return
+     */
+    public String quoteLiteral(CharSequence string) {
+        if (needingQuoteCharacters == null) {
+            needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters); // .addAll(quoteCharacters)
+            if (usingSlash) needingQuoteCharacters.add(BACK_SLASH);
+            if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE);
+        }
+        StringBuffer result = new StringBuffer();
+        int quotedChar = NO_QUOTE;
+        int cp;
+        for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
+            cp = UTF16.charAt(string, i);
+            if (escapeCharacters.contains(cp)) {
+                // we may have to fix up previous characters
+                if (quotedChar == IN_QUOTE) {
+                    result.append(SINGLE_QUOTE);
+                    quotedChar = NO_QUOTE;
+                }
+                appendEscaped(result, cp);
+                continue;
+            }
+            
+            if (needingQuoteCharacters.contains(cp)) {
+                // if we have already started a quote
+                if (quotedChar == IN_QUOTE) {
+                    UTF16.append(result, cp);
+                    if (usingQuote && cp == SINGLE_QUOTE) { // double it
+                        result.append(SINGLE_QUOTE);
+                    }
+                    continue;
+                }
+                // otherwise not already in quote
+                if (usingSlash) {
+                    result.append(BACK_SLASH);
+                    UTF16.append(result, cp);
+                    continue;
+                }
+                if (usingQuote) {
+                    if (cp == SINGLE_QUOTE) { // double it and continue
+                        result.append(SINGLE_QUOTE);
+                        result.append(SINGLE_QUOTE);
+                        continue;
+                    }
+                    result.append(SINGLE_QUOTE);
+                    UTF16.append(result, cp);
+                    quotedChar = IN_QUOTE;
+                    continue;
+                }
+                // we have no choice but to use \\u or \\U
+                appendEscaped(result, cp);
+                continue;
+            }
+            // otherwise cp doesn't need quoting
+            // we may have to fix up previous characters
+            if (quotedChar == IN_QUOTE) {
+                result.append(SINGLE_QUOTE);
+                quotedChar = NO_QUOTE;
+            }
+            UTF16.append(result, cp);
+        }
+        // all done. 
+        // we may have to fix up previous characters
+        if (quotedChar == IN_QUOTE) {
+            result.append(SINGLE_QUOTE);
+        }
+        return result.toString();
+    }
+    
+    private void appendEscaped(StringBuffer result, int cp) {
+        if (cp <= 0xFFFF) {
+            result.append("\\u").append(Utility.hex(cp,4));
+        } else {
+            result.append("\\U").append(Utility.hex(cp,8));
+        }
+    }
+    
+    public String normalize() {
+        int oldStart = start;
+        StringBuffer result = new StringBuffer();
+        StringBuffer buffer = new StringBuffer();
+        while (true) {
+            buffer.setLength(0);
+            int status = next(buffer);
+            if (status == DONE) {
+                start = oldStart;
+                return result.toString();
+            }
+            if (status != SYNTAX) {
+                result.append(quoteLiteral(buffer));
+            } else {
+                result.append(buffer);
+            }
+        }
+    }
+    
+    public static final int DONE = 0, SYNTAX = 1, LITERAL = 2, BROKEN_QUOTE = 3, BROKEN_ESCAPE = 4, UNKNOWN = 5;
+    
+    private static final int AFTER_QUOTE = -1, NONE = 0, START_QUOTE = 1, NORMAL_QUOTE = 2, SLASH_START = 3, HEX = 4;
+    
+    public int next(StringBuffer buffer) {
+        if (start >= limit) return DONE;
+        int status = UNKNOWN;
+        int lastQuote = UNKNOWN;
+        int quoteStatus = NONE;
+        int hexCount = 0;
+        int hexValue = 0;
+        int cp;
+        main:
+            for (int i = start; i < limit; i += UTF16.getCharCount(cp)) {
+                cp = UTF16.charAt(pattern, i);
+                // if we are in a quote, then handle it.
+                switch (quoteStatus) {
+                case SLASH_START:
+                    switch (cp) {
+                    case 'u':
+                        quoteStatus = HEX;
+                        hexCount = 4;
+                        hexValue = 0;
+                        continue main;
+                    case 'U': 
+                        quoteStatus = HEX;
+                        hexCount = 8;
+                        hexValue = 0;
+                        continue main;
+                    default:
+                        if (usingSlash) {
+                            UTF16.append(buffer, cp);
+                            quoteStatus = NONE;
+                            continue main;
+                        } else {
+                            buffer.append(BACK_SLASH);
+                            quoteStatus = NONE;
+                        }
+                    }
+                    break; // fall through to NONE
+                case HEX:
+                    hexValue <<= 4;
+                    hexValue += cp;
+                    switch (cp) {
+                    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+                        hexValue -= '0'; break;
+                    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                        hexValue -= 'a' - 10; break;
+                    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                        hexValue -= 'A' - 10; break;
+                    default:
+                        start = i;
+                    return BROKEN_ESCAPE;
+                    }
+                    --hexCount;
+                    if (hexCount == 0) {
+                        quoteStatus = NONE;
+                        UTF16.append(buffer, hexValue);
+                    }
+                    continue main;
+                case AFTER_QUOTE:
+                    // see if we get another quote character
+                    // if we just ended a quote BUT the following character is the lastQuote character, then we have a situation like '...''...', so we restart the quote
+                    if (cp == lastQuote) {
+                        UTF16.append(buffer, cp);
+                        quoteStatus = NORMAL_QUOTE;
+                        continue main;
+                    }
+                    quoteStatus = NONE;
+                    break; // fall through to NONE
+                case START_QUOTE:
+                    // if we are at the very start of a quote, and we hit another quote mark then we emit a literal quote character and end the quote
+                    if (cp == lastQuote) {
+                        UTF16.append(buffer, cp);
+                        quoteStatus = NONE; // get out of quote, with no trace remaining
+                        continue;                            
+                    }
+                    // otherwise get into quote
+                    UTF16.append(buffer, cp);
+                    quoteStatus = NORMAL_QUOTE;
+                    continue main;
+                case NORMAL_QUOTE: 
+                    if (cp == lastQuote) {
+                        quoteStatus = AFTER_QUOTE; // get out of quote
+                        continue main;
+                    }
+                    UTF16.append(buffer, cp);
+                    continue main;
+                }
+                
+                if (ignorableCharacters.contains(cp)) {
+                    continue;
+                }
+                // do syntax characters
+                if (syntaxCharacters.contains(cp)) {
+                    if (status == UNKNOWN) {
+                        UTF16.append(buffer, cp);
+                        start = i + UTF16.getCharCount(cp);
+                        return SYNTAX;
+                    } else { // LITERAL, so back up and break
+                        start = i;
+                        return status;
+                    }
+                }
+                // otherwise it is a literal; keep on going
+                status = LITERAL;
+                if (cp == BACK_SLASH) {
+                    quoteStatus = SLASH_START;
+                    continue;
+                } else if (usingQuote && cp == SINGLE_QUOTE) {
+                    lastQuote = cp;
+                    quoteStatus = START_QUOTE;
+                    continue;
+                }
+                // normal literals
+                UTF16.append(buffer, cp);
+            }
+        // handle final cleanup
+        start = limit;
+        switch (quoteStatus) {
+        case HEX:
+            status = BROKEN_ESCAPE;
+            break;
+        case SLASH_START:
+            if (usingSlash) {
+                status = BROKEN_ESCAPE;
+            } else {
+                buffer.append(BACK_SLASH);
+            }
+            break;
+        case START_QUOTE: case NORMAL_QUOTE:
+            status = BROKEN_QUOTE;
+            break;
+        }
+        return status;
+    }
+    
+    
+}
--- a/icu4j/src/com/ibm/icu/impl/Utility.java
+++ b/icu4j/src/com/ibm/icu/impl/Utility.java
@ -1826,6 +1826,23 @@ public final class Utility {
        return result.toString();
    }
    
+    
+    /**
+     * Utility to duplicate a string count times
+     * @param s
+     * @param count
+     */
+    public static String repeat(String s, int count) {
+        if (count <= 0) return "";
+        if (count == 1) return s;
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < count; ++i) {
+            result.append(s);
+        }
+        return result.toString();
+    }
+
+    
    // !!! 1.3 compatibiliy
    public static int indexOf(StringBuffer buf, String s) {
 //#ifndef FOUNDATION
--- a/icu4j/src/com/ibm/icu/impl/data/DateData.java
+++ b/icu4j/src/com/ibm/icu/impl/data/DateData.java
--- a/icu4j/src/com/ibm/icu/text/DateTimePatternGenerator.java
+++ b/icu4j/src/com/ibm/icu/text/DateTimePatternGenerator.java
--- a/icu4j/src/com/ibm/icu/text/UTF16.java
+++ b/icu4j/src/com/ibm/icu/text/UTF16.java
@ -228,6 +228,71 @@ public final class UTF16
            }
        return single; // return unmatched surrogate
    }
+    
+    /**
+     * Extract a single UTF-32 value from a string.
+     * Used when iterating forwards or backwards (with
+     * <code>UTF16.getCharCount()</code>, as well as random access. If a
+     * validity check is required, use
+     * <code><a href="../lang/UCharacter.html#isLegal(char)">
+     * UCharacter.isLegal()</a></code> on the return value.
+     * If the char retrieved is part of a surrogate pair, its supplementary
+     * character will be returned. If a complete supplementary character is
+     * not found the incomplete character will be returned
+     * @param source array of UTF-16 chars
+     * @param offset16 UTF-16 offset to the start of the character.
+     * @return UTF-32 value for the UTF-32 value that contains the char at
+     *         offset16. The boundaries of that codepoint are the same as in
+     *         <code>bounds32()</code>.
+     * @exception IndexOutOfBoundsException thrown if offset16 is out of
+     *            bounds.
+     * @stable ICU 2.1
+     */
+    public static int charAt(CharSequence source, int offset16)
+    {
+        char single = source.charAt(offset16);
+        if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            return single;
+        }
+        return _charAt(source, offset16, single);
+    }
+    
+    private static int _charAt(CharSequence source, int offset16, char single)
+    {
+        if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+            return single;
+        }
+
+        // Convert the UTF-16 surrogate pair if necessary.
+        // For simplicity in usage, and because the frequency of pairs is
+        // low, look both directions.
+
+        if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+            ++ offset16;
+            if (source.length() != offset16) {
+                char trail = source.charAt(offset16);
+                if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
+                    trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+                    return UCharacterProperty.getRawSupplementary(single,
+                                                                  trail);
+                }
+            }
+        }
+        else
+            {
+                -- offset16;
+                if (offset16 >= 0) {
+                    // single is a trail surrogate so
+                    char lead = source.charAt(offset16);
+                    if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
+                        lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+                        return UCharacterProperty.getRawSupplementary(lead,
+                                                                      single);
+                    }
+                }
+            }
+        return single; // return unmatched surrogate
+    }

    /**
     * Extract a single UTF-32 value from a string.