mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-4374 first checkin of DateTimePatternGenerator
X-SVN-Rev: 20081
This commit is contained in:
parent
72184a0b2c
commit
1fd2123188
8 changed files with 3556 additions and 15 deletions
211
icu4j/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java
Normal file
211
icu4j/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java
Normal file
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006, Google, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.dev.test.format;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.PatternTokenizer;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.DateTimePatternGenerator;
|
||||
import com.ibm.icu.text.SimpleDateFormat;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Random;
|
||||
import java.util.ResourceBundle;
|
||||
|
||||
public class DateTimeGeneratorTest extends TestFmwk {
|
||||
public static boolean GENERATE_TEST_DATA = false;
|
||||
public static int RANDOM_COUNT = 1000;
|
||||
public static boolean DEBUG = false;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new DateTimeGeneratorTest().run(args);
|
||||
}
|
||||
|
||||
public void TestPatternParser() {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
PatternTokenizer pp = new PatternTokenizer()
|
||||
.setIgnorableCharacters(new UnicodeSet("[-]"))
|
||||
.setSyntaxCharacters(new UnicodeSet("[a-zA-Z]"))
|
||||
.setEscapeCharacters(new UnicodeSet("[b#]"))
|
||||
.setUsingQuote(true);
|
||||
logln("Using Quote");
|
||||
for (int i = 0; i < patternTestData.length; ++i) {
|
||||
String patternTest = (String) patternTestData[i];
|
||||
CheckPattern(buffer, pp, patternTest);
|
||||
}
|
||||
String[] randomSet = {"abcdef", "$12!@#-", "'\\"};
|
||||
for (int i = 0; i < RANDOM_COUNT; ++i) {
|
||||
String patternTest = getRandomString(randomSet, 0, 10);
|
||||
CheckPattern(buffer, pp, patternTest);
|
||||
}
|
||||
logln("Using Backslash");
|
||||
pp.setUsingQuote(false).setUsingSlash(true);
|
||||
for (int i = 0; i < patternTestData.length; ++i) {
|
||||
String patternTest = (String) patternTestData[i];
|
||||
CheckPattern(buffer, pp, patternTest);
|
||||
}
|
||||
for (int i = 0; i < RANDOM_COUNT; ++i) {
|
||||
String patternTest = getRandomString(randomSet, 0, 10);
|
||||
CheckPattern(buffer, pp, patternTest);
|
||||
}
|
||||
}
|
||||
|
||||
Random random = new java.util.Random(-1);
|
||||
|
||||
private String getRandomString(String[] randomList, int minLen, int maxLen) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
int len = random.nextInt(maxLen + 1 - minLen) + minLen;
|
||||
for (int i = minLen; i < len; ++ i) {
|
||||
String source = randomList[random.nextInt(randomList.length)]; // don't bother with surrogates
|
||||
char ch = source.charAt(random.nextInt(source.length()));
|
||||
UTF16.append(result, ch);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private void CheckPattern(StringBuffer buffer, PatternTokenizer pp, String patternTest) {
|
||||
pp.setPattern(patternTest);
|
||||
if (DEBUG && isVerbose()) {
|
||||
showItems(buffer, pp, patternTest);
|
||||
}
|
||||
String normalized = pp.setStart(0).normalize();
|
||||
logln("input:\t<" + patternTest + ">" + "\tnormalized:\t<" + normalized + ">");
|
||||
String doubleNormalized = pp.setPattern(normalized).normalize();
|
||||
if (!normalized.equals(doubleNormalized)) {
|
||||
errln("Normalization not idempotent:\t" + patternTest + "\tnormalized: " + normalized + "\tnormalized2: " + doubleNormalized);
|
||||
// allow for debugging at the point of failure
|
||||
if (DEBUG) {
|
||||
pp.setPattern(patternTest);
|
||||
normalized = pp.setStart(0).normalize();
|
||||
pp.setPattern(normalized);
|
||||
showItems(buffer, pp, normalized);
|
||||
doubleNormalized = pp.normalize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void showItems(StringBuffer buffer, PatternTokenizer pp, String patternTest) {
|
||||
logln("input:\t<" + patternTest + ">");
|
||||
while (true) {
|
||||
buffer.setLength(0);
|
||||
int status = pp.next(buffer);
|
||||
if (status == pp.DONE) break;
|
||||
String lit = "";
|
||||
if (status != pp.SYNTAX ) {
|
||||
lit = "\t<" + pp.quoteLiteral(buffer) + ">";
|
||||
}
|
||||
logln("\t" + statusName[status] + "\t<" + buffer + ">" + lit);
|
||||
}
|
||||
}
|
||||
|
||||
static final String[] statusName = {"DONE", "SYNTAX", "LITERAL", "BROKEN_QUOTE", "BROKEN_ESCAPE", "UNKNOWN"};
|
||||
|
||||
public void TestBasic() {
|
||||
ULocale uLocale = null;
|
||||
DateTimePatternGenerator dtfg = null;
|
||||
Date date = null;
|
||||
for (int i = 0; i < dateTestData.length; ++i) {
|
||||
if (dateTestData[i] instanceof ULocale) {
|
||||
uLocale = (ULocale) dateTestData[i];
|
||||
dtfg = DateTimePatternGenerator.getInstance(uLocale);
|
||||
if (GENERATE_TEST_DATA) logln("new ULocale(\"" + uLocale.toString() + "\"),");
|
||||
} else if (dateTestData[i] instanceof Date) {
|
||||
date = (Date) dateTestData[i];
|
||||
if (GENERATE_TEST_DATA) logln("new Date(" + date.getYear() + ", " + date.getMonth() + ", " + date.getDay() + ", " + date.getHours() + ", " + date.getMinutes() + ", " + date.getSeconds()+ "),");
|
||||
} else if (dateTestData[i] instanceof String) {
|
||||
String testSkeleton = (String) dateTestData[i];
|
||||
String pattern = dtfg.getBestPattern(testSkeleton);
|
||||
SimpleDateFormat sdf = new SimpleDateFormat(pattern, uLocale);
|
||||
String formatted = sdf.format(date);
|
||||
if (GENERATE_TEST_DATA) logln("new String[] {\"" + testSkeleton + "\", \"" + Utility.escape(formatted) + "\"},");
|
||||
//logln(uLocale + "\t" + testSkeleton + "\t" + pattern + "\t" + sdf.format(date));
|
||||
} else {
|
||||
String[] testPair = (String[]) dateTestData[i];
|
||||
String testSkeleton = testPair[0];
|
||||
String testFormatted = testPair[1];
|
||||
String pattern = dtfg.getBestPattern(testSkeleton);
|
||||
SimpleDateFormat sdf = new SimpleDateFormat(pattern, uLocale);
|
||||
String formatted = sdf.format(date);
|
||||
if (GENERATE_TEST_DATA) {
|
||||
logln("new String[] {\"" + testSkeleton + "\", \"" + Utility.escape(formatted) + "\"},");
|
||||
} else if (!formatted.equals(testFormatted)) {
|
||||
errln(uLocale + "\tformatted string doesn't match test case: " + testSkeleton + "\t generated: " + pattern + "\t expected: " + testFormatted + "\t got: " + formatted);
|
||||
if (true) { // debug
|
||||
pattern = dtfg.getBestPattern(testSkeleton);
|
||||
sdf = new SimpleDateFormat(pattern, uLocale);
|
||||
formatted = sdf.format(date);
|
||||
}
|
||||
}
|
||||
//logln(uLocale + "\t" + testSkeleton + "\t" + pattern + "\t" + sdf.format(date));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static final Object[] patternTestData = {
|
||||
"'$f''#c",
|
||||
"'' 'a",
|
||||
"'.''.'",
|
||||
"\\u0061\\\\",
|
||||
"mm.dd 'dd ' x",
|
||||
"'' ''",
|
||||
};
|
||||
|
||||
// can be generated by using GENERATE_TEST_DATA. Must be reviewed before adding
|
||||
static final Object[] dateTestData = {
|
||||
new Date(99, 0, 3, 23, 58, 59),
|
||||
new ULocale("en_US"),
|
||||
new String[] {"yM", "1/1999"},
|
||||
new String[] {"yMMM", "Jan 1999"},
|
||||
new String[] {"yMd", "1/13/1999"},
|
||||
new String[] {"yMMMd", "Jan/13/1999"},
|
||||
new String[] {"Md", "1/13"},
|
||||
new String[] {"MMMd", "Jan 13"},
|
||||
new String[] {"yQQQ", "Q1 1999"},
|
||||
new String[] {"hhmm", "11:58 PM"},
|
||||
new String[] {"HHmm", "23:58"},
|
||||
new String[] {"mmss", "58:59"},
|
||||
new ULocale("zh_Hans_CN"),
|
||||
new String[] {"yM", "1999-1"},
|
||||
new String[] {"yMMM", "1999-\u4E00\u6708"},
|
||||
new String[] {"yMd", "1999\u5E741\u670813\u65E5"},
|
||||
new String[] {"yMMMd", "1999\u5E74\u4E00\u6708\u670813\u65E5"},
|
||||
new String[] {"Md", "1-13"},
|
||||
new String[] {"MMMd", "\u4E00\u6708-13"},
|
||||
new String[] {"yQQQ", "1\u5B63 1999"},
|
||||
new String[] {"hhmm", "\u4E0B\u534811:58"},
|
||||
new String[] {"HHmm", "\u4E0B\u534811:58"},
|
||||
new String[] {"mmss", "58:59"},
|
||||
new ULocale("de_DE"),
|
||||
new String[] {"yM", "1.1999"},
|
||||
new String[] {"yMMM", "Jan 1999"},
|
||||
new String[] {"yMd", "13.1.1999"},
|
||||
new String[] {"yMMMd", "13. Jan 1999"},
|
||||
new String[] {"Md", "13.1"},
|
||||
new String[] {"MMMd", "13. Jan"},
|
||||
new String[] {"yQQQ", "Q1 1999"},
|
||||
new String[] {"hhmm", "11:58 nachm."},
|
||||
new String[] {"HHmm", "23:58"},
|
||||
new String[] {"mmss", "58:59"},
|
||||
new ULocale("fi"),
|
||||
new String[] {"yM", "1.1999"},
|
||||
new String[] {"yMMM", "tammita 1999"},
|
||||
new String[] {"yMd", "13.1.1999"},
|
||||
new String[] {"yMMMd", "13. tammita 1999"},
|
||||
new String[] {"Md", "13.1"},
|
||||
new String[] {"MMMd", "13. tammita"},
|
||||
new String[] {"yQQQ", "1. nelj. 1999"},
|
||||
new String[] {"hhmm", "23.58"},
|
||||
new String[] {"HHmm", "23.58"},
|
||||
new String[] {"mmss", "58.59"},
|
||||
};
|
||||
}
|
|
@ -64,6 +64,7 @@ public class BagFormatter {
|
|||
private boolean hexValue = false;
|
||||
private static final String NULL_VALUE = "_NULL_VALUE_";
|
||||
private int fullTotal = -1;
|
||||
private boolean showTotal = true;
|
||||
private String lineSeparator = "\r\n";
|
||||
private Tabber tabber = new Tabber.MonoTabber();
|
||||
|
||||
|
@ -331,14 +332,14 @@ public class BagFormatter {
|
|||
return getName(s, false);
|
||||
}
|
||||
|
||||
class NameLabel extends UnicodeLabel {
|
||||
public static class NameLabel extends UnicodeLabel {
|
||||
UnicodeProperty nameProp;
|
||||
UnicodeSet control;
|
||||
UnicodeSet private_use;
|
||||
UnicodeSet noncharacter;
|
||||
UnicodeSet surrogate;
|
||||
|
||||
NameLabel(UnicodeProperty.Factory source) {
|
||||
public NameLabel(UnicodeProperty.Factory source) {
|
||||
nameProp = source.getProperty("Name");
|
||||
control = source.getSet("gc=Cc");
|
||||
private_use = source.getSet("gc=Co");
|
||||
|
@ -526,11 +527,13 @@ public class BagFormatter {
|
|||
|
||||
protected void doAfter(Object container, Object o) {
|
||||
if (fullTotal != -1 && fullTotal != counter) {
|
||||
output.print(lineSeparator);
|
||||
output.print("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here." + lineSeparator);
|
||||
output.print("# Total code points: " + nf.format(fullTotal) + lineSeparator);
|
||||
if (showTotal) {
|
||||
output.print(lineSeparator);
|
||||
output.print("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here." + lineSeparator);
|
||||
output.print("# Total code points: " + nf.format(fullTotal) + lineSeparator);
|
||||
}
|
||||
fullTotal = -1;
|
||||
} else {
|
||||
} else if (showTotal) {
|
||||
output.print(lineSeparator);
|
||||
output.print("# Total code points: " + nf.format(counter) + lineSeparator);
|
||||
}
|
||||
|
@ -553,7 +556,7 @@ public class BagFormatter {
|
|||
String thing = o.toString();
|
||||
String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true);
|
||||
if (value.length() != 0) value = "\t; " + value;
|
||||
String label = getLabelSource(true).getValue(thing, ",", true);
|
||||
String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true);
|
||||
if (label.length() != 0) label = " " + label;
|
||||
output.print(
|
||||
tabber.process(
|
||||
|
@ -1092,5 +1095,13 @@ public class BagFormatter {
|
|||
public void setTabber(Tabber tabber) {
|
||||
this.tabber = tabber;
|
||||
}
|
||||
|
||||
public boolean isShowTotal() {
|
||||
return showTotal;
|
||||
}
|
||||
|
||||
public void setShowTotal(boolean showTotal) {
|
||||
this.showTotal = showTotal;
|
||||
}
|
||||
}
|
||||
//#endif
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//##header
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2005, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2006, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -104,12 +104,23 @@ public final class CollectionUtilities {
|
|||
Iterator it = c.iterator();
|
||||
if (!it.hasNext()) return null;
|
||||
Object bestSoFar = it.next();
|
||||
while (it.hasNext()) {
|
||||
Object item = it.next();
|
||||
if (comp.compare(item, bestSoFar) == direction) {
|
||||
bestSoFar = item;
|
||||
}
|
||||
}
|
||||
if (direction < 0) {
|
||||
while (it.hasNext()) {
|
||||
Object item = it.next();
|
||||
int compValue = comp.compare(item, bestSoFar);
|
||||
if (comp.compare(item, bestSoFar) < 0) {
|
||||
bestSoFar = item;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while (it.hasNext()) {
|
||||
Object item = it.next();
|
||||
int compValue = comp.compare(item, bestSoFar);
|
||||
if (comp.compare(item, bestSoFar) > 0) {
|
||||
bestSoFar = item;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bestSoFar;
|
||||
}
|
||||
|
||||
|
@ -326,7 +337,7 @@ public final class CollectionUtilities {
|
|||
return pp.toPattern(uset);
|
||||
}
|
||||
|
||||
static class MultiComparator implements Comparator {
|
||||
public static class MultiComparator implements Comparator {
|
||||
private Comparator[] comparators;
|
||||
|
||||
public MultiComparator (Comparator[] comparators) {
|
||||
|
|
370
icu4j/src/com/ibm/icu/impl/PatternTokenizer.java
Normal file
370
icu4j/src/com/ibm/icu/impl/PatternTokenizer.java
Normal file
|
@ -0,0 +1,370 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006, Google, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.DateTimePatternGenerator.FormatParser;
|
||||
import com.ibm.icu.text.DateTimePatternGenerator.VariableField;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A simple parsing class for patterns and rules. Handles '...' quotations, \\uxxxx and \\Uxxxxxxxx, and symple syntax.
|
||||
* The '' (two quotes) is treated as a single quote, inside or outside a quote
|
||||
* <ul>
|
||||
* <li>Any ignorable characters are ignored in parsing.</li>
|
||||
* <li>Any syntax characters are broken into separate tokens</li>
|
||||
* <li>Quote characters can be specified: '...', "...", and \x </li>
|
||||
* <li>Other characters are treated as literals</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class PatternTokenizer {
|
||||
// settings used in the interpretation of the pattern
|
||||
private UnicodeSet ignorableCharacters = new UnicodeSet();
|
||||
private UnicodeSet syntaxCharacters = new UnicodeSet();
|
||||
private UnicodeSet escapeCharacters = new UnicodeSet();
|
||||
private boolean usingSlash = false;
|
||||
private boolean usingQuote = false;
|
||||
|
||||
// transient data, set when needed. Null it out for any changes in the above fields.
|
||||
private transient UnicodeSet needingQuoteCharacters = null;
|
||||
|
||||
// data about the current pattern being parsed. start gets moved as we go along.
|
||||
private int start;
|
||||
private int limit;
|
||||
private CharSequence pattern;
|
||||
|
||||
public UnicodeSet getIgnorableCharacters() {
|
||||
return (UnicodeSet) ignorableCharacters.clone();
|
||||
}
|
||||
/**
|
||||
* Sets the characters to be ignored in parsing, eg new UnicodeSet("[:pattern_whitespace:]");
|
||||
* @param ignorableCharacters
|
||||
* @return
|
||||
*/
|
||||
public PatternTokenizer setIgnorableCharacters(UnicodeSet ignorableCharacters) {
|
||||
this.ignorableCharacters = (UnicodeSet) ignorableCharacters.clone();
|
||||
needingQuoteCharacters = null;
|
||||
return this;
|
||||
}
|
||||
public UnicodeSet getSyntaxCharacters() {
|
||||
return (UnicodeSet) syntaxCharacters.clone();
|
||||
}
|
||||
/**
|
||||
* Sets the characters to be interpreted as syntax characters in parsing, eg new UnicodeSet("[:pattern_syntax:]")
|
||||
* @param syntaxCharacters
|
||||
* @return
|
||||
*/
|
||||
public PatternTokenizer setSyntaxCharacters(UnicodeSet syntaxCharacters) {
|
||||
this.syntaxCharacters = (UnicodeSet) syntaxCharacters.clone();
|
||||
needingQuoteCharacters = null;
|
||||
return this;
|
||||
}
|
||||
public UnicodeSet getEscapeCharacters() {
|
||||
return (UnicodeSet) escapeCharacters.clone();
|
||||
}
|
||||
/**
|
||||
* Set characters to be escaped in literals, in quoteLiteral and normalize, eg new UnicodeSet("[^\\u0020-\\u007E]");
|
||||
* @param escapeCharacters
|
||||
* @return
|
||||
*/
|
||||
public PatternTokenizer setEscapeCharacters(UnicodeSet escapeCharacters) {
|
||||
this.escapeCharacters = (UnicodeSet) escapeCharacters.clone();
|
||||
return this;
|
||||
}
|
||||
public boolean isUsingQuote() {
|
||||
return usingQuote;
|
||||
}
|
||||
public PatternTokenizer setUsingQuote(boolean usingQuote) {
|
||||
this.usingQuote = usingQuote;
|
||||
needingQuoteCharacters = null;
|
||||
return this;
|
||||
}
|
||||
public boolean isUsingSlash() {
|
||||
return usingSlash;
|
||||
}
|
||||
public PatternTokenizer setUsingSlash(boolean usingSlash) {
|
||||
this.usingSlash = usingSlash;
|
||||
needingQuoteCharacters = null;
|
||||
return this;
|
||||
}
|
||||
// public UnicodeSet getQuoteCharacters() {
|
||||
// return (UnicodeSet) quoteCharacters.clone();
|
||||
// }
|
||||
// public PatternTokenizer setQuoteCharacters(UnicodeSet quoteCharacters) {
|
||||
// this.quoteCharacters = (UnicodeSet) quoteCharacters.clone();
|
||||
// needingQuoteCharacters = null;
|
||||
// return this;
|
||||
// }
|
||||
public int getLimit() {
|
||||
return limit;
|
||||
}
|
||||
public PatternTokenizer setLimit(int limit) {
|
||||
this.limit = limit;
|
||||
return this;
|
||||
}
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
public PatternTokenizer setStart(int start) {
|
||||
this.start = start;
|
||||
return this;
|
||||
}
|
||||
public PatternTokenizer setPattern(CharSequence pattern) {
|
||||
if (pattern == null) {
|
||||
throw new IllegalArgumentException("Inconsistent arguments");
|
||||
}
|
||||
this.start = 0;
|
||||
this.limit = pattern.length();
|
||||
this.pattern = pattern;
|
||||
return this;
|
||||
}
|
||||
|
||||
public static final char SINGLE_QUOTE = '\'';
|
||||
public static final char BACK_SLASH = '\\';
|
||||
private static int NO_QUOTE = -1, IN_QUOTE = -2;
|
||||
/**
|
||||
* Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes.
|
||||
* @param string
|
||||
* @return
|
||||
*/
|
||||
public String quoteLiteral(CharSequence string) {
|
||||
if (needingQuoteCharacters == null) {
|
||||
needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters); // .addAll(quoteCharacters)
|
||||
if (usingSlash) needingQuoteCharacters.add(BACK_SLASH);
|
||||
if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE);
|
||||
}
|
||||
StringBuffer result = new StringBuffer();
|
||||
int quotedChar = NO_QUOTE;
|
||||
int cp;
|
||||
for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(string, i);
|
||||
if (escapeCharacters.contains(cp)) {
|
||||
// we may have to fix up previous characters
|
||||
if (quotedChar == IN_QUOTE) {
|
||||
result.append(SINGLE_QUOTE);
|
||||
quotedChar = NO_QUOTE;
|
||||
}
|
||||
appendEscaped(result, cp);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (needingQuoteCharacters.contains(cp)) {
|
||||
// if we have already started a quote
|
||||
if (quotedChar == IN_QUOTE) {
|
||||
UTF16.append(result, cp);
|
||||
if (usingQuote && cp == SINGLE_QUOTE) { // double it
|
||||
result.append(SINGLE_QUOTE);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// otherwise not already in quote
|
||||
if (usingSlash) {
|
||||
result.append(BACK_SLASH);
|
||||
UTF16.append(result, cp);
|
||||
continue;
|
||||
}
|
||||
if (usingQuote) {
|
||||
if (cp == SINGLE_QUOTE) { // double it and continue
|
||||
result.append(SINGLE_QUOTE);
|
||||
result.append(SINGLE_QUOTE);
|
||||
continue;
|
||||
}
|
||||
result.append(SINGLE_QUOTE);
|
||||
UTF16.append(result, cp);
|
||||
quotedChar = IN_QUOTE;
|
||||
continue;
|
||||
}
|
||||
// we have no choice but to use \\u or \\U
|
||||
appendEscaped(result, cp);
|
||||
continue;
|
||||
}
|
||||
// otherwise cp doesn't need quoting
|
||||
// we may have to fix up previous characters
|
||||
if (quotedChar == IN_QUOTE) {
|
||||
result.append(SINGLE_QUOTE);
|
||||
quotedChar = NO_QUOTE;
|
||||
}
|
||||
UTF16.append(result, cp);
|
||||
}
|
||||
// all done.
|
||||
// we may have to fix up previous characters
|
||||
if (quotedChar == IN_QUOTE) {
|
||||
result.append(SINGLE_QUOTE);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private void appendEscaped(StringBuffer result, int cp) {
|
||||
if (cp <= 0xFFFF) {
|
||||
result.append("\\u").append(Utility.hex(cp,4));
|
||||
} else {
|
||||
result.append("\\U").append(Utility.hex(cp,8));
|
||||
}
|
||||
}
|
||||
|
||||
public String normalize() {
|
||||
int oldStart = start;
|
||||
StringBuffer result = new StringBuffer();
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
while (true) {
|
||||
buffer.setLength(0);
|
||||
int status = next(buffer);
|
||||
if (status == DONE) {
|
||||
start = oldStart;
|
||||
return result.toString();
|
||||
}
|
||||
if (status != SYNTAX) {
|
||||
result.append(quoteLiteral(buffer));
|
||||
} else {
|
||||
result.append(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static final int DONE = 0, SYNTAX = 1, LITERAL = 2, BROKEN_QUOTE = 3, BROKEN_ESCAPE = 4, UNKNOWN = 5;
|
||||
|
||||
private static final int AFTER_QUOTE = -1, NONE = 0, START_QUOTE = 1, NORMAL_QUOTE = 2, SLASH_START = 3, HEX = 4;
|
||||
|
||||
public int next(StringBuffer buffer) {
|
||||
if (start >= limit) return DONE;
|
||||
int status = UNKNOWN;
|
||||
int lastQuote = UNKNOWN;
|
||||
int quoteStatus = NONE;
|
||||
int hexCount = 0;
|
||||
int hexValue = 0;
|
||||
int cp;
|
||||
main:
|
||||
for (int i = start; i < limit; i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(pattern, i);
|
||||
// if we are in a quote, then handle it.
|
||||
switch (quoteStatus) {
|
||||
case SLASH_START:
|
||||
switch (cp) {
|
||||
case 'u':
|
||||
quoteStatus = HEX;
|
||||
hexCount = 4;
|
||||
hexValue = 0;
|
||||
continue main;
|
||||
case 'U':
|
||||
quoteStatus = HEX;
|
||||
hexCount = 8;
|
||||
hexValue = 0;
|
||||
continue main;
|
||||
default:
|
||||
if (usingSlash) {
|
||||
UTF16.append(buffer, cp);
|
||||
quoteStatus = NONE;
|
||||
continue main;
|
||||
} else {
|
||||
buffer.append(BACK_SLASH);
|
||||
quoteStatus = NONE;
|
||||
}
|
||||
}
|
||||
break; // fall through to NONE
|
||||
case HEX:
|
||||
hexValue <<= 4;
|
||||
hexValue += cp;
|
||||
switch (cp) {
|
||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
||||
hexValue -= '0'; break;
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
hexValue -= 'a' - 10; break;
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
hexValue -= 'A' - 10; break;
|
||||
default:
|
||||
start = i;
|
||||
return BROKEN_ESCAPE;
|
||||
}
|
||||
--hexCount;
|
||||
if (hexCount == 0) {
|
||||
quoteStatus = NONE;
|
||||
UTF16.append(buffer, hexValue);
|
||||
}
|
||||
continue main;
|
||||
case AFTER_QUOTE:
|
||||
// see if we get another quote character
|
||||
// if we just ended a quote BUT the following character is the lastQuote character, then we have a situation like '...''...', so we restart the quote
|
||||
if (cp == lastQuote) {
|
||||
UTF16.append(buffer, cp);
|
||||
quoteStatus = NORMAL_QUOTE;
|
||||
continue main;
|
||||
}
|
||||
quoteStatus = NONE;
|
||||
break; // fall through to NONE
|
||||
case START_QUOTE:
|
||||
// if we are at the very start of a quote, and we hit another quote mark then we emit a literal quote character and end the quote
|
||||
if (cp == lastQuote) {
|
||||
UTF16.append(buffer, cp);
|
||||
quoteStatus = NONE; // get out of quote, with no trace remaining
|
||||
continue;
|
||||
}
|
||||
// otherwise get into quote
|
||||
UTF16.append(buffer, cp);
|
||||
quoteStatus = NORMAL_QUOTE;
|
||||
continue main;
|
||||
case NORMAL_QUOTE:
|
||||
if (cp == lastQuote) {
|
||||
quoteStatus = AFTER_QUOTE; // get out of quote
|
||||
continue main;
|
||||
}
|
||||
UTF16.append(buffer, cp);
|
||||
continue main;
|
||||
}
|
||||
|
||||
if (ignorableCharacters.contains(cp)) {
|
||||
continue;
|
||||
}
|
||||
// do syntax characters
|
||||
if (syntaxCharacters.contains(cp)) {
|
||||
if (status == UNKNOWN) {
|
||||
UTF16.append(buffer, cp);
|
||||
start = i + UTF16.getCharCount(cp);
|
||||
return SYNTAX;
|
||||
} else { // LITERAL, so back up and break
|
||||
start = i;
|
||||
return status;
|
||||
}
|
||||
}
|
||||
// otherwise it is a literal; keep on going
|
||||
status = LITERAL;
|
||||
if (cp == BACK_SLASH) {
|
||||
quoteStatus = SLASH_START;
|
||||
continue;
|
||||
} else if (usingQuote && cp == SINGLE_QUOTE) {
|
||||
lastQuote = cp;
|
||||
quoteStatus = START_QUOTE;
|
||||
continue;
|
||||
}
|
||||
// normal literals
|
||||
UTF16.append(buffer, cp);
|
||||
}
|
||||
// handle final cleanup
|
||||
start = limit;
|
||||
switch (quoteStatus) {
|
||||
case HEX:
|
||||
status = BROKEN_ESCAPE;
|
||||
break;
|
||||
case SLASH_START:
|
||||
if (usingSlash) {
|
||||
status = BROKEN_ESCAPE;
|
||||
} else {
|
||||
buffer.append(BACK_SLASH);
|
||||
}
|
||||
break;
|
||||
case START_QUOTE: case NORMAL_QUOTE:
|
||||
status = BROKEN_QUOTE;
|
||||
break;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1826,6 +1826,23 @@ public final class Utility {
|
|||
return result.toString();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Utility to duplicate a string count times
|
||||
* @param s
|
||||
* @param count
|
||||
*/
|
||||
public static String repeat(String s, int count) {
|
||||
if (count <= 0) return "";
|
||||
if (count == 1) return s;
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
result.append(s);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
|
||||
// !!! 1.3 compatibiliy
|
||||
public static int indexOf(StringBuffer buf, String s) {
|
||||
//#ifndef FOUNDATION
|
||||
|
|
1624
icu4j/src/com/ibm/icu/impl/data/DateData.java
Normal file
1624
icu4j/src/com/ibm/icu/impl/data/DateData.java
Normal file
File diff suppressed because it is too large
Load diff
1232
icu4j/src/com/ibm/icu/text/DateTimePatternGenerator.java
Normal file
1232
icu4j/src/com/ibm/icu/text/DateTimePatternGenerator.java
Normal file
File diff suppressed because it is too large
Load diff
|
@ -228,6 +228,71 @@ public final class UTF16
|
|||
}
|
||||
return single; // return unmatched surrogate
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a single UTF-32 value from a string.
|
||||
* Used when iterating forwards or backwards (with
|
||||
* <code>UTF16.getCharCount()</code>, as well as random access. If a
|
||||
* validity check is required, use
|
||||
* <code><a href="../lang/UCharacter.html#isLegal(char)">
|
||||
* UCharacter.isLegal()</a></code> on the return value.
|
||||
* If the char retrieved is part of a surrogate pair, its supplementary
|
||||
* character will be returned. If a complete supplementary character is
|
||||
* not found the incomplete character will be returned
|
||||
* @param source array of UTF-16 chars
|
||||
* @param offset16 UTF-16 offset to the start of the character.
|
||||
* @return UTF-32 value for the UTF-32 value that contains the char at
|
||||
* offset16. The boundaries of that codepoint are the same as in
|
||||
* <code>bounds32()</code>.
|
||||
* @exception IndexOutOfBoundsException thrown if offset16 is out of
|
||||
* bounds.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
public static int charAt(CharSequence source, int offset16)
|
||||
{
|
||||
char single = source.charAt(offset16);
|
||||
if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {
|
||||
return single;
|
||||
}
|
||||
return _charAt(source, offset16, single);
|
||||
}
|
||||
|
||||
private static int _charAt(CharSequence source, int offset16, char single)
|
||||
{
|
||||
if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {
|
||||
return single;
|
||||
}
|
||||
|
||||
// Convert the UTF-16 surrogate pair if necessary.
|
||||
// For simplicity in usage, and because the frequency of pairs is
|
||||
// low, look both directions.
|
||||
|
||||
if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
|
||||
++ offset16;
|
||||
if (source.length() != offset16) {
|
||||
char trail = source.charAt(offset16);
|
||||
if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
|
||||
trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
|
||||
return UCharacterProperty.getRawSupplementary(single,
|
||||
trail);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
-- offset16;
|
||||
if (offset16 >= 0) {
|
||||
// single is a trail surrogate so
|
||||
char lead = source.charAt(offset16);
|
||||
if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
|
||||
lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
|
||||
return UCharacterProperty.getRawSupplementary(lead,
|
||||
single);
|
||||
}
|
||||
}
|
||||
}
|
||||
return single; // return unmatched surrogate
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a single UTF-32 value from a string.
|
||||
|
|
Loading…
Add table
Reference in a new issue