diff --git a/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java b/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java index b6981d4796f..9de3e20da53 100644 --- a/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java +++ b/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java,v $ - * $Date: 2004/01/27 23:13:13 $ - * $Revision: 1.2 $ + * $Date: 2004/02/07 00:59:26 $ + * $Revision: 1.3 $ * ******************************************************************************* */ @@ -27,7 +27,9 @@ import java.text.DateFormat; import java.text.NumberFormat; import com.ibm.icu.dev.test.*; +import com.ibm.icu.dev.test.util.BagFormatter; import com.ibm.icu.util.TimeZone; +import com.ibm.icu.util.VersionInfo; /** @@ -36,7 +38,7 @@ import com.ibm.icu.util.TimeZone; * */ public class TimeZoneAliasTest extends TestFmwk { - + public static void main(String[] args) throws Exception { new TimeZoneAliasTest().run(args); } @@ -47,6 +49,7 @@ public class TimeZoneAliasTest extends TestFmwk { * 2. all aliases must have the same offsets */ public void TestAliases() { + if (skipIfBeforeICU(3,0)) return; Zone.Seconds seconds = new Zone.Seconds(); for (Iterator it = Zone.getZoneSet().iterator(); it.hasNext(); ) { Zone zone = (Zone)it.next(); @@ -71,9 +74,9 @@ public class TimeZoneAliasTest extends TestFmwk { if (!aliasesSet.equals(otherAliases)) { errln( "Aliases Unsymmetric: " - + id + " => " + join(aliasesSet, ", ") + + id + " => " + Zone.bf.join(aliasesSet) + "; " - + otherId + " => " + join(otherAliases, ", ")); + + otherId + " => " + Zone.bf.join(otherAliases)); } if (zone.findOffsetOrdering(otherZone, seconds) != 0) { errln("Aliases differ: " + id + ", " + otherId @@ -87,12 +90,13 @@ public class TimeZoneAliasTest extends TestFmwk { * We check to see that every timezone that is not an alias is actually different! */ public void TestDifferences() { + if (skipIfBeforeICU(3,0)) return; Zone last = null; Zone.Seconds diffDate = new Zone.Seconds(); for (Iterator it = Zone.getZoneSet().iterator(); it.hasNext();) { Zone testZone = (Zone)it.next(); if (last != null) { - String common = testZone + " vs " + last + ":\t"; + String common = testZone + "\tvs " + last + ":\t"; int diff = testZone.findOffsetOrdering(last, diffDate); if (diff != 0) { logln("\t" + common + "difference at: " + diffDate @@ -110,17 +114,17 @@ public class TimeZoneAliasTest extends TestFmwk { /** * Utility for printing out zones to be translated. */ - public static void printZones() { + public static void TestGenerateZones() { int count = 1; for (Iterator it = Zone.getUniqueZoneSet().iterator(); it.hasNext();) { Zone zone = (Zone)it.next(); System.out.println(zone.toString(count++)); } } - + /** Utility; ought to be someplace common */ - + /* static String join(Collection c, String separator) { StringBuffer result = new StringBuffer(); boolean isFirst = true; @@ -131,6 +135,7 @@ public class TimeZoneAliasTest extends TestFmwk { } return result.toString(); } + */ /** * The guts is in this subclass. It sucks in all the data from the zones, @@ -144,6 +149,7 @@ public class TimeZoneAliasTest extends TestFmwk { */ static class Zone implements Comparable { // class fields + static private final BagFormatter bf = new BagFormatter().setSeparator(", "); static private final DateFormat df = DateFormat.getDateInstance(DateFormat.LONG, Locale.US); static private final NumberFormat nf = NumberFormat.getInstance(Locale.US); static private final long HOUR = 1000*60*60; @@ -365,13 +371,13 @@ public class TimeZoneAliasTest extends TestFmwk { public String getPurportedAliasesAsString() { Set s = getPurportedAliases(); if (s.size() == 0) return ""; - return " {" + join(s,", ") + "}"; + return " " + bf.join(s); } public String getRealAliasesAsString() { Set s = (Set)idToRealAliases.get(id); if (s == null) return ""; - return " {" + join(s,", ") + "}"; + return " *" + bf.join(s); } public String getCity() { @@ -384,6 +390,9 @@ public class TimeZoneAliasTest extends TestFmwk { return toString(-1); } + /** + * Where count > 0, returns string that is set up for translation + */ public String toString(int count) { String city = getCity(); String hours = formatHours(minRecentOffset) @@ -393,9 +402,9 @@ public class TimeZoneAliasTest extends TestFmwk { if (count < 0) { return id + getPurportedAliasesAsString() + " (" + hours + ")"; } - + // for getting template for translation return "\t{\t\"" + id + "\"\t// [" + count + "] " + hours - + getPurportedAliasesAsString() + getRealAliasesAsString() + "\r\n" + + getRealAliasesAsString() + "\r\n" + "\t\t// translate the following!!\r\n" + (minRecentOffset != maxRecentOffset ? "\t\t\"" + city + " Standard Time\"\r\n" diff --git a/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java b/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java index 47cf2e69548..e4c3c773a0d 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $ - * $Date: 2003/12/29 19:48:58 $ - * $Revision: 1.5 $ + * $Date: 2004/02/07 00:59:26 $ + * $Revision: 1.6 $ * ***************************************************************************************** */ @@ -22,6 +22,23 @@ import java.util.*; import java.text.MessageFormat; public class BagFormatter { + + public static final Transliterator toHTML = Transliterator.createFromRules( + "any-html", + "'<' > '<' ;" + + "'&' > '&' ;" + + "'>' > '>' ;" + + "'\"' > '"' ; ", + Transliterator.FORWARD); + + public static final Transliterator fromHTML = Transliterator.createFromRules( + "html-any", + "'<' < '&'[lL][Tt]';' ;" + + "'&' < '&'[aA][mM][pP]';' ;" + + "'>' < '&'[gG][tT]';' ;" + + "'\"' < '&'[qQ][uU][oO][tT]';' ; ", + Transliterator.REVERSE); + public static final PrintWriter CONSOLE = new PrintWriter(System.out,true); private static PrintWriter log = CONSOLE; @@ -132,8 +149,7 @@ public class BagFormatter { StringWriter buffer = new StringWriter(); PrintWriter output = new PrintWriter(buffer); output.println(title); - mainVisitor.output = output; - mainVisitor.doAt(c); + mainVisitor.doAt(c, output); return buffer.toString(); } @@ -147,8 +163,8 @@ public class BagFormatter { */ public void showSetNames(PrintWriter output, String title, Object c) { output.println(title); - mainVisitor.output = output; - mainVisitor.doAt(c); + mainVisitor.doAt(c, output); + output.flush(); } /** @@ -244,91 +260,116 @@ public class BagFormatter { } */ - public void setMergeRanges(boolean in) { + public BagFormatter setMergeRanges(boolean in) { mergeRanges = in; + return this; } - public void setShowSetAlso(boolean b) { + public BagFormatter setShowSetAlso(boolean b) { showSetAlso = b; + return this; } - public String getName(int codePoint) { - String hcp = "U+" + Utility.hex(codePoint, 4) + " "; - String result = nameProp.getPropertyValue(codePoint); - if (result != null) - return hcp + result; - String prop = catProp.getPropertyValue(codePoint); - if (prop.equals("Control")) { - result = nameProp.getPropertyValue(codePoint); - if (result != null) - return hcp + "<" + result + ">"; - } - return hcp + ""; + + /*public String getName(int codePoint) { + return getName(codePoint, false); + }*/ + + public String getName(String separator, int start, int end) { + if (nameSource == null || nameSource == UnicodeProperty.NULL) return ""; + String result = getName(start, false); + if (start == end) return separator + result; + String endString = getName(end, false); + if (abbreviated) endString = getAbbreviatedName(endString,result,"~"); + return separator + result + ".." + endString; } public String getName(String s) { - if (s.length() == 1) return getName(s.charAt(0)); // optimize - StringBuffer sb = new StringBuffer(); - int cp; - for (int i = 0; i < s.length(); i+=UTF16.getCharCount(cp)) { - cp = UTF16.charAt(s,i); - if (i != 0) sb.append(separator); - sb.append(getName(cp)); + return getName(s, false); + } + + UnicodeLabel nameSource; + + static class NameLabel extends UnicodeLabel { + UnicodeProperty nameProp; + UnicodeProperty name1Prop; + UnicodeProperty catProp; + //UnicodeProperty shortCatProp; + + NameLabel(UnicodeProperty.Factory source) { + nameProp = source.getProperty("Name"); + name1Prop = source.getProperty("Unicode_1_Name"); + catProp = source.getProperty("General_Category"); + //shortCatProp = source.getProperty("General_Category"); + } + + public String getValue(int codePoint, boolean isShort) { + String hcp = !isShort + ? "U+" + Utility.hex(codePoint, 4) + " " + : ""; + String result = nameProp.getValue(codePoint); + if (result != null) + return hcp + result; + String prop = catProp.getValue(codePoint, true); + if (prop.equals("Control")) { + result = name1Prop.getValue(codePoint); + if (result != null) + return hcp + "<" + result + ">"; + } + return hcp + ""; } - return sb.toString(); + + } + + // refactored + public String getName(int codePoint, boolean withCodePoint) { + return nameSource.getValue(codePoint, !withCodePoint); + } + + public String getName(String s, boolean withCodePoint) { + return nameSource.getValue(s, separator, !withCodePoint); } public String hex(String s) { - if (s.length() == 1) return Utility.hex(s.charAt(0),4); // optimize - StringBuffer sb = new StringBuffer(); - int cp; - for (int i = 0; i < s.length(); i+=UTF16.getCharCount(cp)) { - cp = UTF16.charAt(s,i); - if (i != 0) sb.append(separator); - sb.append(Utility.hex(cp,4)); - } - return sb.toString(); + return UnicodeLabel.HEX.getValue(s, separator, true); } - String separator = ","; - UnicodePropertySource source; - UnicodePropertySource labelSource; + public String hex(int start, int end) { + String s = Utility.hex(start,4); + if (start == end) return s; + return s + ".." + Utility.hex(end,4); + } + + private String separator = ","; + private String prefix = "["; + private String suffix = "]"; + UnicodeProperty.Factory source; + UnicodeLabel labelSource = UnicodeLabel.NULL; + UnicodeLabel valueSource = UnicodeLabel.NULL; + private boolean showCount = true; - UnicodePropertySource nameProp; - UnicodePropertySource name1Prop; - UnicodePropertySource catProp; - UnicodePropertySource shortCatProp; - - public void setUnicodePropertySource(UnicodePropertySource source) { + public BagFormatter setUnicodePropertySource(UnicodeProperty.Factory source) { this.source = source; - nameProp = ((UnicodePropertySource)source.clone()) - .setPropertyAlias("Name"); - - name1Prop = ((UnicodePropertySource)source.clone()) - .setPropertyAlias("Unicode_1_Name"); - - catProp = ((UnicodePropertySource)source.clone()) - .setPropertyAlias("General_Category"); - - shortCatProp = ((UnicodePropertySource)source.clone()) - .setPropertyAlias("General_Category") - .setNameChoice(UProperty.NameChoice.SHORT); + nameSource = new NameLabel(source); + return this; } { - setUnicodePropertySource(new UnicodePropertySource.ICU()); + setUnicodePropertySource(ICUPropertyFactory.make()); Map labelMap = new HashMap(); labelMap.put("Lo","L&"); labelMap.put("Lu","L&"); labelMap.put("Lt","L&"); - setLabelSource(new UnicodePropertySource.ICU() - .setPropertyAlias("General_Category") - .setNameChoice(UProperty.NameChoice.SHORT) - .setFilter( - new UnicodePropertySource.MapFilter().setMap(labelMap))); + setLabelSource(new UnicodeProperty.FilteredUnicodeProperty( + source.getProperty("General_Category"), + new UnicodeProperty.MapFilter(labelMap))); + } + + public String join(Object o) { + return labelVisitor.join(o); } // ===== PRIVATES ===== - private Visitor.Join labelVisitor = new Visitor.Join(); + private Join labelVisitor = new Join(); private boolean mergeRanges = true; private Transliterator showLiteral = null; @@ -375,36 +416,38 @@ public class BagFormatter { } } - private Tabber singleTabber = - new Tabber.MonoTabber( - new int[] { - 0, - Tabber.LEFT, - 6, - Tabber.LEFT, - 10, - Tabber.LEFT, - 14, - Tabber.LEFT }); - private Tabber rangeTabber = - new Tabber.MonoTabber( - new int[] { - 0, - Tabber.LEFT, - 14, - Tabber.LEFT, - 18, - Tabber.LEFT, - 27, - Tabber.LEFT, - 34, - Tabber.LEFT }); - private static NumberFormat nf = NumberFormat.getIntegerInstance(Locale.ENGLISH); + + private String lineSeparator = "\r\n"; private class MyVisitor extends Visitor { - PrintWriter output; + private PrintWriter output; + Tabber.MonoTabber myTabber; + String commentSeparator = "\t# "; + + public void doAt(Object c, PrintWriter output) { + this.output = output; + myTabber = new Tabber.MonoTabber(); + int valueSize = valueSource.getMaxWidth(shortValue); + if (valueSize > 0) valueSize += 2; + if (!mergeRanges) { + myTabber.add(0,Tabber.LEFT); + myTabber.add(6 + valueSize,Tabber.LEFT); + myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT); + myTabber.add(4,Tabber.LEFT); + } else { + myTabber.add(0,Tabber.LEFT); + myTabber.add(15 + valueSize,Tabber.LEFT); + myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT); + myTabber.add(11,Tabber.LEFT); + myTabber.add(7,Tabber.LEFT); + } + commentSeparator = (showCount || showLiteral != null + || labelSource != UnicodeProperty.NULL || nameSource != UnicodeProperty.NULL) + ? "\t# " : ""; + doAt(c); + } public String format(Object o) { StringWriter sw = new StringWriter(); @@ -418,7 +461,7 @@ public class BagFormatter { protected void doBefore(Object container, Object o) { if (showSetAlso && container instanceof UnicodeSet) { - output.println("# " + container); + output.print("# " + container + lineSeparator); } } @@ -426,7 +469,7 @@ public class BagFormatter { } protected void doAfter(Object container, Object o) { - output.println("# Total: " + nf.format(count(container))); + output.print("# Total: " + nf.format(count(container)) + lineSeparator); } protected void doSimpleAt(Object o) { @@ -443,35 +486,40 @@ public class BagFormatter { doAt((Visitor.CodePointRange) o); } else { String thing = o.toString(); - output.println( - singleTabber.process( + output.print( + myTabber.process( hex(thing) - + " \t# " + + commentSeparator + insertLiteral(thing) - + " \t" - + getName(thing))); + + "\t" + + getName(thing)) + + lineSeparator); } } protected void doAt(Visitor.CodePointRange usi) { if (!mergeRanges) { for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) { - String label = labelSource.getPropertyValue(cp); + String label = labelSource.getValue(cp, shortLabel); if (label.length() != 0) label += " "; - output.println( - singleTabber.process( + String value = valueSource.getValue(cp, shortValue); + if (value.length() != 0) { + value = "; " + value; + } + output.print( + myTabber.process( Utility.hex(cp, 4) - + " \t# " + + value + + commentSeparator + label - + insertLiteral(cp) - + " \t" - + getName(cp))); + + insertLiteral(cp,cp) + + getName("\t", cp, cp)) + + lineSeparator); } } else { rf.reset(usi.codepoint, usi.codepointEnd + 1); - String label; - while ((label = rf.next()) != null) { + while (rf.next()) { /* String label = (usi.codepoint != usi.codepointEnd) ? label = getLabels(usi.codepoint, usi.codepointEnd) @@ -479,31 +527,24 @@ public class BagFormatter { */ int start = rf.start; int end = rf.limit - 1; + String label = rf.label; if (label.length() != 0) label += " "; - output.println( - rangeTabber.process( - Utility.hex(start, 4) - + ((start != end) - ? (".." + Utility.hex(end, 4)) - : "") - + " \t# " + String value = rf.value; + if (value.length() != 0) { + value = "; " + value; + } + String count = showCount ? "\t["+ nf.format(end - start + 1)+ "]" : ""; + output.print( + myTabber.process( + hex(start, end) + + value + + commentSeparator + label - + " \t[" - + nf.format(end - start + 1) - + "]" + + count + insertLiteral(start, end) - + " \t" - + getName(start) - + ((start != end) - ? (".." - + (abbreviated - ? getAbbreviatedName( - getName(end), - getName(start), - "~") - : getName(end))) - : ""))); + + getName("\t", start, end)) + + lineSeparator); } } } @@ -521,11 +562,12 @@ public class BagFormatter { : "") + ") "); } - + /* private String insertLiteral(int cp) { return (showLiteral == null ? "" : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") "); } + */ } /** @@ -581,44 +623,67 @@ public class BagFormatter { private class RangeFinder { int start, limit; private int veryLimit; - void reset(int start, int end) { + String label, value; + void reset(int start, int limit) { this.limit = start; - this.veryLimit = end; + this.veryLimit = limit; } - String next() { + boolean next() { if (limit >= veryLimit) - return null; - start = limit; - String label = labelSource.getPropertyValue(limit++); - for (; limit < veryLimit; ++limit) { - String s = labelSource.getPropertyValue(limit); - if (!s.equals(label)) - break; + return false; + start = limit; // set to end of last + label = labelSource.getValue(limit, shortLabel); + value = valueSource.getValue(limit, shortLabel); + limit++; + for (; limit < veryLimit; limit++) { + String s = labelSource.getValue(limit, shortLabel); + String v = valueSource.getValue(limit, shortLabel); + if (!s.equals(label) || !v.equals(value)) break; } - return label; + // at this point, limit is the first item that has a different label than source + // OR, we got to the end, and limit == veryLimit + return true; } } + boolean shortLabel = true; + boolean shortValue = true; + + public String getPrefix() { + return prefix; + } + + public String getSuffix() { + return suffix; + } + + public BagFormatter setPrefix(String string) { + prefix = string; + return this; + } + + public BagFormatter setSuffix(String string) { + suffix = string; + return this; + } + public boolean isAbbreviated() { return abbreviated; } - public void setAbbreviated(boolean b) { + public BagFormatter setAbbreviated(boolean b) { abbreviated = b; + return this; } - public UnicodePropertySource getSource() { + public UnicodeProperty.Factory getSource() { return source; } - public UnicodePropertySource getLabelSource() { + public UnicodeLabel getLabelSource() { return labelSource; } - public void setLabelSource(UnicodePropertySource source) { - labelSource = source; - } - /** * @deprecated */ @@ -629,7 +694,7 @@ public class BagFormatter { // UTILITIES public static final Transliterator hex = Transliterator.getInstance( - "[^\\u0021-\\u007E\\u00A0-\\u00FF] hex"); + "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex"); public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException { return openReader(dir,filename,"UTF-8"); @@ -670,20 +735,117 @@ public class BagFormatter { public static PrintWriter getLog() { return log; } - public static void setLog(PrintWriter writer) { + public BagFormatter setLog(PrintWriter writer) { log = writer; + return this; } public String getSeparator() { return separator; } - public void setSeparator(String string) { + public BagFormatter setSeparator(String string) { separator = string; + return this; } public Transliterator getShowLiteral() { return showLiteral; } - public void setShowLiteral(Transliterator transliterator) { + public BagFormatter setShowLiteral(Transliterator transliterator) { showLiteral = transliterator; + return this; + } + + // ===== CONVENIENCES ===== + private class Join extends Visitor { + StringBuffer output = new StringBuffer(); + int depth = 0; + String join (Object o) { + output.setLength(0); + doAt(o); + return output.toString(); + } + protected void doBefore(Object container, Object item) { + ++depth; + output.append(prefix); + } + protected void doAfter(Object container, Object item) { + output.append(suffix); + --depth; + } + protected void doBetween(Object container, Object lastItem, Object nextItem) { + output.append(separator); + } + protected void doSimpleAt(Object o) { + if (o != null) output.append(o.toString()); + } + } + /** + * @return + */ + public String getLineSeparator() { + return lineSeparator; + } + + /** + * @param string + */ + public void setLineSeparator(String string) { + lineSeparator = string; + } + + /** + * @param label + */ + public BagFormatter setLabelSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + labelSource = label; + return this; + } + + /** + * @return + */ + public UnicodeLabel getNameSource() { + return nameSource; + } + + /** + * @param label + */ + public BagFormatter setNameSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + nameSource = label; + return this; + } + + /** + * @return + */ + public UnicodeLabel getValueSource() { + return valueSource; + } + + /** + * @param label + */ + public BagFormatter setValueSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + valueSource = label; + return this; + } + + /** + * @return + */ + public boolean isShowCount() { + return showCount; + } + + /** + * @param b + */ + public BagFormatter setShowCount(boolean b) { + showCount = b; + return this; } } \ No newline at end of file diff --git a/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java b/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java new file mode 100644 index 00000000000..37765768fcd --- /dev/null +++ b/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java @@ -0,0 +1,390 @@ + +/* + ******************************************************************************* + * Copyright (C) 2002-2003, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java,v $ + * $Date: 2004/02/07 00:59:26 $ + * $Revision: 1.1 $ + * + ***************************************************************************************** + */ +package com.ibm.icu.dev.test.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.Locale; +import java.util.Map; +import java.util.List; +import java.util.Arrays; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.ibm.icu.lang.UProperty; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.Normalizer; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; + + +/** + * Provides a general interface for Unicode Properties, and + * extracting sets based on those values. + * @author Davis + */ + +public class ICUPropertyFactory extends UnicodeProperty.Factory { + + public static class RegexMatcher implements UnicodeProperty.Matcher { + private Matcher matcher; + + public UnicodeProperty.Matcher set(String pattern) { + matcher = Pattern.compile(pattern).matcher(""); + return this; + } + public boolean matches(String value) { + matcher.reset(value); + return matcher.matches(); + } + } + + static class ICUProperty extends UnicodeProperty { + protected int propEnum = Integer.MIN_VALUE; + + protected ICUProperty(String propName, int propEnum) { + this.propEnum = propEnum; + setName(propName); + setType(internalGetPropertyType(propEnum)); + } + + boolean shownException = false; + + public String getValue(int codePoint) { + if (propEnum < UProperty.INT_LIMIT) { + int enumValue = -1; + String value = null; + try { + enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum); + if (enumValue >= 0) value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG); + } catch (IllegalArgumentException e) { + if (!shownException) { + System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint)); + shownException = true; + } + } + return value != null ? value : String.valueOf(enumValue); + } else if (propEnum < UProperty.DOUBLE_LIMIT) { + double num = UCharacter.getUnicodeNumericValue(codePoint); + if (num == UCharacter.NO_NUMERIC_VALUE) return null; + return Double.toString(num); + // TODO: Fix HACK -- API deficient + } else switch(propEnum) { + case UProperty.AGE: String temp = UCharacter.getAge(codePoint).toString(); + if (temp.equals("0.0.0.0")) return "UNSPECIFIED"; + if (temp.endsWith(".0.0")) return temp.substring(0,temp.length()-4); + return temp; + case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint)); + case UProperty.CASE_FOLDING: return UCharacter.foldCase(UTF16.valueOf(codePoint),true); + case UProperty.ISO_COMMENT: return UCharacter.getISOComment(codePoint); + case UProperty.LOWERCASE_MAPPING: return UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint)); + case UProperty.NAME: return UCharacter.getName(codePoint); + case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(UCharacter.foldCase(codePoint,true)); + case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(UCharacter.toLowerCase(codePoint)); + case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(UCharacter.toTitleCase(codePoint)); + case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(UCharacter.toUpperCase(codePoint)); + case UProperty.TITLECASE_MAPPING: return UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null); + case UProperty.UNICODE_1_NAME: return UCharacter.getName1_0(codePoint); + case UProperty.UPPERCASE_MAPPING: return UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint)); + case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC); + case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD); + case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC); + case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD); + case isNFC: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint))); + case isNFD: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint))); + case isNFKC: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint))); + case isNFKD: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint))); + case isLowercase: return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint))); + case isUppercase: return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint))); + case isTitlecase: return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null).equals(UTF16.valueOf(codePoint))); + case isCasefolded: return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint),true).equals(UTF16.valueOf(codePoint))); + case isCased: return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint))); + } + return null; + } + + public Collection getAvailableValueAliases(Collection result) { + if (result == null) result = new ArrayList(); + if (propEnum < UProperty.INT_LIMIT) { + if (Binary_Extras.isInRange(propEnum)) { + propEnum = UProperty.BINARY_START; // HACK + } + int start = UCharacter.getIntPropertyMinValue(propEnum); + int end = UCharacter.getIntPropertyMaxValue(propEnum); + for (int i = start; i <= end; ++i) { + String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG); + String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT); + if (alias == null) { + alias = alias2; + } + //System.out.println(propertyAlias + "\t" + i + ":\t" + alias); + if (alias != null && !result.contains(alias)) result.add(alias); + } + } else { + String alias = getFixedValueAlias(null, -1,UProperty.NameChoice.LONG); + if (alias != null && !result.contains(alias)) result.add(alias); + } + return result; + } + + /** + * @param valueAlias null if unused. + * @param valueEnum -1 if unused + * @param nameChoice + * @return + */ + private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) { + if (propEnum >= UProperty.STRING_START) { + if (nameChoice != UProperty.NameChoice.LONG) return null; + return ""; + } else if (propEnum >= UProperty.DOUBLE_START) { + if (nameChoice != UProperty.NameChoice.LONG) return null; + return ""; + } + if (valueAlias != null && !valueAlias.equals("")) { + valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias); + } + // because these are defined badly, there may be no normal (long) name. + // if there is + String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice); + if (result != null) return result; + // HACK try other namechoice + if (nameChoice == UProperty.NameChoice.LONG) { + result = fixedGetPropertyValueName(propEnum,valueEnum, UProperty.NameChoice.SHORT); + if (result != null) return result; + return ""; + } + return null; + } + + private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) { + try { + return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice); + } catch (Exception e) { + return null; + } + } + + public Collection getAliases(Collection result) { + if (result == null) result = new ArrayList(); + String alias = String_Extras.get(propEnum); + if (alias == null) alias = Binary_Extras.get(propEnum); + if (alias != null) { + if (!result.contains(alias)) result.add(alias); + } else { + try { + for (int nameChoice = 0; ; ++nameChoice) { + alias = UCharacter.getPropertyName(propEnum, nameChoice); + if (alias == null) break; + if (nameChoice > 2) { + System.out.println("Something wrong"); + } + if (!result.contains(alias)) result.add(alias); + } + } catch (IllegalArgumentException e) { + // ok, continue + } + } + return result; + } + + public Collection getValueAliases(String valueAlias, Collection result) { + if (result == null) result = new ArrayList(); + for (int nameChoice = 0; ; ++nameChoice) { + String alias = getFixedValueAlias(valueAlias, -1, nameChoice); + if (nameChoice > 2) break; + if (alias == null) continue; + if (!result.contains(alias)) result.add(alias); + } + return result; + } + + + /* (non-Javadoc) + * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType() + */ + private int internalGetPropertyType(int propEnum) { + switch(propEnum) { + //case UProperty.AGE: + //case UProperty.NAME: + //case UProperty.UNICODE_1_NAME: + case UProperty.BIDI_MIRRORING_GLYPH: + case UProperty.CASE_FOLDING: + case UProperty.ISO_COMMENT: + case UProperty.LOWERCASE_MAPPING: + case UProperty.SIMPLE_CASE_FOLDING: + case UProperty.SIMPLE_LOWERCASE_MAPPING: + case UProperty.SIMPLE_TITLECASE_MAPPING: + case UProperty.SIMPLE_UPPERCASE_MAPPING: + case UProperty.TITLECASE_MAPPING: + case UProperty.UPPERCASE_MAPPING: + return UnicodeProperty.EXTENDED_STRING; + } + if (propEnum < UProperty.BINARY_START) return UnicodeProperty.UNKNOWN; + if (propEnum < UProperty.BINARY_LIMIT) return UnicodeProperty.BINARY; + if (propEnum < UProperty.INT_START) return UnicodeProperty.EXTENDED_BINARY; + if (propEnum < UProperty.INT_LIMIT) return UnicodeProperty.ENUMERATED; + if (propEnum < UProperty.DOUBLE_START) return UnicodeProperty.EXTENDED_ENUMERATED; + if (propEnum < UProperty.DOUBLE_LIMIT) return UnicodeProperty.NUMERIC; + if (propEnum < UProperty.STRING_START) return UnicodeProperty.EXTENDED_NUMERIC; + if (propEnum < UProperty.STRING_LIMIT) return UnicodeProperty.STRING; + return UnicodeProperty.EXTENDED_STRING; + } + } + + /*{ + matchIterator = new UnicodeSetIterator( + new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]")); + }*/ + + + + /* + * Other Missing Functions: + Expands_On_NFC + Expands_On_NFD + Expands_On_NFKC + Expands_On_NFKD + Composition_Exclusion + Decomposition_Mapping + FC_NFKC_Closure + ISO_Comment + NFC_Quick_Check + NFD_Quick_Check + NFKC_Quick_Check + NFKD_Quick_Check + Special_Case_Condition + Unicode_Radical_Stroke + */ + + static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT, + new String[] { + "isNFC", "isNFD", "isNFKC", "isNFKD", + "isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased", + }); + + static final Names String_Extras = new Names(UProperty.STRING_LIMIT, + new String[] { + "toNFC", "toNFD", "toNFKC", "toNKFD", + }); + + static final int + isNFC = UProperty.BINARY_LIMIT, + isNFD = UProperty.BINARY_LIMIT+1, + isNFKC = UProperty.BINARY_LIMIT+2, + isNFKD = UProperty.BINARY_LIMIT+3, + isLowercase = UProperty.BINARY_LIMIT+4, + isUppercase = UProperty.BINARY_LIMIT+5, + isTitlecase = UProperty.BINARY_LIMIT+6, + isCasefolded = UProperty.BINARY_LIMIT+7, + isCased = UProperty.BINARY_LIMIT+8, + + NFC = UProperty.STRING_LIMIT, + NFD = UProperty.STRING_LIMIT+1, + NFKC = UProperty.STRING_LIMIT+2, + NFKD = UProperty.STRING_LIMIT+3 + ; + + private ICUPropertyFactory() { + Collection c = getInternalAvailablePropertyAliases(new TreeSet()); + Iterator it = c.iterator(); + while (it.hasNext()) { + add(getInternalProperty((String)it.next())); + } + } + + private static ICUPropertyFactory singleton = null; + + public static synchronized ICUPropertyFactory make() { + if (singleton != null) return singleton; + singleton = new ICUPropertyFactory(); + return singleton; + } + + public Collection getInternalAvailablePropertyAliases(Collection result) { + int[][] ranges = { + {UProperty.BINARY_START, UProperty.BINARY_LIMIT}, + {UProperty.INT_START, UProperty.INT_LIMIT}, + {UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT}, + {UProperty.STRING_START, UProperty.STRING_LIMIT}, + }; + for (int i = 0; i < ranges.length; ++i) { + for (int j = ranges[i][0]; j < ranges[i][1]; ++j) { + String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG); + if (!result.contains(alias)) result.add(alias); + } + } + result.addAll(String_Extras.getNames()); + result.addAll(Binary_Extras.getNames()); + return result; + } + + public UnicodeProperty getInternalProperty(String propertyAlias) { + int propEnum; + main: + { + int possibleItem = Binary_Extras.get(propertyAlias); + if (possibleItem >= 0) { + propEnum = possibleItem; + break main; + } + possibleItem = String_Extras.get(propertyAlias); + if (possibleItem >= 0) { + propEnum = possibleItem; + break main; + } + propEnum = UCharacter.getPropertyEnum(propertyAlias); + } + return new ICUProperty(propertyAlias, propEnum); + } + + /* (non-Javadoc) + * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String) + */ + // TODO file bug on getPropertyValueName for Canonical_Combining_Class + + public static class Names { + private String[] names; + private int base; + public Names(int base, String[] names) { + this.base = base; + this.names = names; + } + public int get(String name) { + for (int i = 0; i < names.length; ++i) { + if (name.equalsIgnoreCase(names[i])) return base + i; + } + return -1; + } + public String get(int number) { + number -= base; + if (number < 0 || names.length <= number) return null; + return names[number]; + } + public boolean isInRange(int number) { + number -= base; + return (0 <= number && number < names.length); + } + public List getNames() { + return Arrays.asList(names); + } + } +} \ No newline at end of file diff --git a/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java b/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java index 2d0ba67a76c..f86b7d5fe72 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java @@ -6,13 +6,16 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java,v $ - * $Date: 2003/12/20 03:06:53 $ - * $Revision: 1.2 $ + * $Date: 2004/02/07 00:59:26 $ + * $Revision: 1.3 $ * ***************************************************************************************** */ package com.ibm.icu.dev.test.util; +import java.util.ArrayList; +import java.util.List; + public abstract class Tabber { static final byte LEFT = 0, CENTER = 1, RIGHT = 2; @@ -36,61 +39,130 @@ public abstract class Tabber { public String process(String source) { StringBuffer result = new StringBuffer(); int lastPos = 0; - int count = 0; - while (lastPos < source.length()) { + for (int count = 0; lastPos < source.length(); ++count) { int pos = source.indexOf('\t', lastPos); if (pos < 0) pos = source.length(); process_field(count, source, lastPos, pos, result); lastPos = pos+1; - ++count; // skip type } - if (lastPos < source.length()) { - result.append(source.substring(lastPos)); - } - return result.toString(); + return prefix + result.toString() + postfix; } + private String prefix = ""; + private String postfix = ""; + public abstract void process_field(int count, String source, int start, int limit, StringBuffer output); public static class MonoTabber extends Tabber { - private int[] tabs; + private List stops = new ArrayList(); + private List types = new ArrayList(); - public MonoTabber(int[] tabs) { - this.tabs = (int[]) tabs.clone(); + public void addAbsolute(int tabPos, int type) { + stops.add(new Integer(tabPos)); + types.add(new Integer(type)); } + public void add(int fieldWidth, byte type) { + int last = getStop(stops.size()-1); + stops.add(new Integer(last + fieldWidth)); + types.add(new Integer(type)); + } + + public int getStop(int fieldNumber) { + if (fieldNumber < 0) return 0; + return ((Integer)stops.get(fieldNumber)).intValue(); + } + /* public String process(String source) { StringBuffer result = new StringBuffer(); int lastPos = 0; int count = 0; - while (lastPos < source.length() && count < tabs.length) { + for (count = 0; lastPos < source.length() && count < stops.size(); count++) { int pos = source.indexOf('\t', lastPos); if (pos < 0) pos = source.length(); String piece = source.substring(lastPos, pos); - if (result.length() < tabs[count]) { - result.append(repeat(" ", tabs[count] - result.length())); + int stopPos = getStop(count); + if (result.length() < stopPos) { + result.append(repeat(" ", stopPos - result.length())); // TODO fix type } result.append(piece); lastPos = pos+1; - count += 2; // skip type } if (lastPos < source.length()) { result.append(source.substring(lastPos)); } return result.toString(); } - + */ + public void process_field(int count, String source, int start, int limit, StringBuffer output) { String piece = source.substring(start, limit); - if (output.length() < tabs[count*2]) { - output.append(repeat(" ", tabs[count*2] - output.length())); + int pos = getStop(count); + if (output.length() < pos) { + output.append(repeat(" ", pos - output.length())); // TODO fix type } else { output.append(" "); } output.append(piece); } + } + + public static class HTMLTabber extends Tabber { + private List parameters = new ArrayList(); + { + setPrefix(""); + setPostfix(""); + } + public void setParameters(int count, String params) { + parameters.set(count,params); + } + + public void process_field(int count, String source, int start, int limit, StringBuffer output) { + output.append(""); + output.append(source.substring(start, limit)); + // TODO Quote string + output.append(""); + } + } + /** + * @return + */ + public String getPostfix() { + return postfix; + } + + /** + * @return + */ + public String getPrefix() { + return prefix; + } + + /** + * @param string + */ + public Tabber setPostfix(String string) { + postfix = string; + return this; + } + + /** + * @param string + */ + public Tabber setPrefix(String string) { + prefix = string; + return this; + } + } \ No newline at end of file diff --git a/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java b/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java index 32d07606b5c..41e63e45202 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java,v $ - * $Date: 2004/01/27 23:13:13 $ - * $Revision: 1.6 $ + * $Date: 2004/02/07 00:59:25 $ + * $Revision: 1.7 $ * ***************************************************************************************** */ @@ -14,6 +14,8 @@ package com.ibm.icu.dev.test.util; // TODO integrate this into the test framework +import java.util.ArrayList; +import java.util.Collection; import java.util.TreeSet; import java.util.Iterator; import java.io.IOException; @@ -28,26 +30,30 @@ import com.ibm.icu.text.Transliterator; import com.ibm.icu.text.UnicodeSet; public class TestBagFormatter { + static final void generatePropertyAliases(boolean showValues) { - UnicodePropertySource ups = new UnicodePropertySource.ICU().setNameChoice(UProperty.NameChoice.SHORT); Collator order = Collator.getInstance(Locale.ENGLISH); + UnicodeProperty.Factory ups = ICUPropertyFactory.make(); TreeSet props = new TreeSet(order); TreeSet values = new TreeSet(order); - ups.getAvailablePropertyAliases(props); + Collection aliases = new ArrayList(); + BagFormatter bf = new BagFormatter(); + ups.getAvailableAliases(props); Iterator it = props.iterator(); while (it.hasNext()) { String propAlias = (String)it.next(); - ups.setPropertyAlias(propAlias); + UnicodeProperty up = ups.getProperty(propAlias); System.out.println(); - System.out.println(propAlias + ";\t" + ups.getPropertyAlias(UProperty.NameChoice.LONG)); + aliases.clear(); + System.out.println(bf.join(up.getAliases(aliases))); if (!showValues) continue; values.clear(); - ups.getAvailablePropertyValueAliases(values); + up.getAvailableValueAliases(values); Iterator it2 = values.iterator(); while (it2.hasNext()) { String valueAlias = (String)it2.next(); - System.out.println("\t" + valueAlias - + ";\t" + ups.getPropertyValueAlias(valueAlias, UProperty.NameChoice.LONG)); + aliases.clear(); + System.out.println("\t" + bf.join(up.getValueAliases(valueAlias, aliases))); } } } @@ -66,6 +72,10 @@ public class TestBagFormatter { us = new UnicodeSet("[:numeric_type=numeric:]"); bf.showSetNames(BagFormatter.CONSOLE,"[:numeric_type=numeric:]", us); + UnicodeProperty.Factory ups = ICUPropertyFactory.make(); + us = ups.getSet("gc=mn", null, null); + bf.showSetNames(bf.CONSOLE,"gc=mn", us); + if (true) return; //showNames("Name", ".*MARK.*"); //showNames("NFD", "a.+"); diff --git a/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java b/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java index bc7ccbc25b2..3f55dcdc9f5 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java,v $ - * $Date: 2003/12/20 03:06:53 $ - * $Revision: 1.3 $ + * $Date: 2004/02/07 00:59:25 $ + * $Revision: 1.4 $ * ***************************************************************************************** */ @@ -115,7 +115,7 @@ public class Tokenizer { case UNICODESET: return s+"n=" + getUnicodeSet() + s; default: - return s+"c=" + usf.getName(type) + s; + return s+"c=" + usf.getName(type,true) + s; } } diff --git a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeLabel.java b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeLabel.java new file mode 100644 index 00000000000..bc5d095c20e --- /dev/null +++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeLabel.java @@ -0,0 +1,37 @@ +package com.ibm.icu.dev.test.util; + +import com.ibm.icu.impl.Utility; +import com.ibm.icu.text.UTF16; + +public abstract class UnicodeLabel { + public abstract String getValue(int codepoint, boolean isShort); + public String getValue(String s, String separator, boolean withCodePoint) { + if (s.length() == 1) { // optimize simple case + return getValue(s.charAt(0), withCodePoint); + } + StringBuffer sb = new StringBuffer(); + int cp; + for (int i = 0; i < s.length(); i+=UTF16.getCharCount(cp)) { + cp = UTF16.charAt(s,i); + if (i != 0) sb.append(separator); + sb.append(getValue(cp, withCodePoint)); + } + return sb.toString(); + } + public int getMaxWidth(boolean isShort) { + return 0; + } + private static class Hex extends UnicodeLabel { + public String getValue(int codepoint, boolean isShort) { + if (isShort) return Utility.hex(codepoint,4); + return "U+" + Utility.hex(codepoint,4); + } + } + private static class Null extends UnicodeLabel { + public String getValue(int codepoint, boolean isShort) { + return ""; + } + } + public static final UnicodeLabel NULL = new Null(); + public static final UnicodeLabel HEX = new Hex(); +} \ No newline at end of file diff --git a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java new file mode 100644 index 00000000000..7b445a7b8cf --- /dev/null +++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java @@ -0,0 +1,112 @@ +package com.ibm.icu.dev.test.util; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; + +import com.ibm.icu.text.UnicodeSet; +/** + * Class for mapping Unicode characters to values + * Much smaller storage than using HashMap. + * @author Davis + */ +// TODO Optimize using range map +public class UnicodeMap { + // TODO optimize + private HashMap objectToSet = new HashMap(); + private UnicodeSet missing = new UnicodeSet(0,0x10FFFF); + + /** + * Associates code point with value. Removes any previous association. + * @param codepoint + * @param value + * @return this, for chaining + */ + public UnicodeMap put(int codepoint, Object value) { + if (!missing.contains(codepoint)) { + // remove from wherever it is. + Iterator it = objectToSet.keySet().iterator(); + while (it.hasNext()) { + UnicodeSet set = (UnicodeSet) objectToSet.get(it.next()); + if (set.contains(codepoint)) { + set.remove(codepoint); + break; + } + } + missing.remove(codepoint); + } + UnicodeSet set = (UnicodeSet) objectToSet.get(value); + if (set == null) { + set = new UnicodeSet(); + objectToSet.put(value,set); + } + set.add(codepoint); + return this; + } + /** + * Adds bunch o' codepoints; otherwise like add. + * @param codepoints + * @param value + * @return this, for chaining + */ + public UnicodeMap putAll(UnicodeSet codepoints, Object value) { + if (!missing.containsAll(codepoints)) { + // remove from wherever it is. + Iterator it = objectToSet.keySet().iterator(); + while (it.hasNext()) { + UnicodeSet set = (UnicodeSet) objectToSet.get(it.next()); + set.removeAll(codepoints); + } + missing.removeAll(codepoints); + } + UnicodeSet set = (UnicodeSet) objectToSet.get(value); + if (set == null) { + set = new UnicodeSet(); + objectToSet.put(value,set); + } + set.addAll(codepoints); + return this; + } + /** + * Returns the set associated with a given value. Deposits into + * result if it is not null. Remember to clear if you just want + * the new values. + * @param value + * @param result + * @return result + */ + public UnicodeSet getSet(Object value, UnicodeSet result) { + if (result == null) result = new UnicodeSet(); + UnicodeSet set = (UnicodeSet) objectToSet.get(value); + if (set != null) result.addAll(set); + return result; + } + /** + * Returns the list of possible values. Deposits into + * result if it is not null. Remember to clear if you just want + * @param result + * @return + */ + public Collection getAvailableValues(Collection result) { + if (result == null) result = new HashSet(); + result.addAll(objectToSet.keySet()); + return result; + } + /** + * Gets the value associated with a given code point. + * Returns null, if there is no such value. + * @param codepoint + * @return + */ + public Object getValue(int codepoint) { + if (missing.contains(codepoint)) return null; + Iterator it = objectToSet.keySet().iterator(); + while (it.hasNext()) { + Object value = it.next(); + UnicodeSet set = (UnicodeSet) objectToSet.get(value); + if (set.contains(codepoint)) return value; + } + return null; + } +} \ No newline at end of file diff --git a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java new file mode 100644 index 00000000000..5c04afdf80a --- /dev/null +++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java @@ -0,0 +1,448 @@ +package com.ibm.icu.dev.test.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.TreeSet; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; + +public abstract class UnicodeProperty extends UnicodeLabel { + + private String propertyAlias; + private int type; + private Map mapToShortName = null; + + public static final int UNKNOWN = 0, + BINARY = 2, EXTENDED_BINARY = 3, + ENUMERATED = 4, EXTENDED_ENUMERATED = 5, + NUMERIC = 6, EXTENDED_NUMERIC = 7, + STRING = 8, EXTENDED_STRING = 9, + LIMIT_TYPE = 10, + EXTENDED_BIT = 1; + + private static final String[] TYPE_NAMES = { + "Unknown", + "Unknown", + "Binary", + "Extended Binary", + "Enumerated", + "Extended Enumerated", + "Numeric", + "Extended Numeric", + "String", + "Extended String", + }; + + public static String getTypeName(int propType) { + return TYPE_NAMES[propType]; + } + + public final String getName() { + return propertyAlias; + } + + public final int getType() { + return type; + } + + protected final void setName(String string) { + propertyAlias = string; + } + + protected final void setType(int i) { + type = i; + } + + public abstract String getValue(int codepoint); + public abstract Collection getAliases(Collection result); + public abstract Collection getValueAliases(String valueAlias, Collection result); + abstract public Collection getAvailableValueAliases(Collection result); + + static public class Factory { + Map canonicalNames = new TreeMap(); + Map skeletonNames = new TreeMap(); + + public final Factory add(UnicodeProperty sp) { + canonicalNames.put(sp.getName(), sp); + Collection c = sp.getAliases(new TreeSet()); + Iterator it = c.iterator(); + while (it.hasNext()) { + skeletonNames.put(toSkeleton((String)it.next()), sp); + } + return this; + } + + public final UnicodeProperty getProperty(String propertyAlias) { + return (UnicodeProperty) skeletonNames.get(toSkeleton(propertyAlias)); + } + + public final Collection getAvailableAliases(Collection result) { + if (result == null) result = new ArrayList(); + Iterator it = canonicalNames.keySet().iterator(); + while (it.hasNext()) { + addUnique(it.next(), result); + } + return result; + } + public final Collection getAvailableAliases() { + return getAvailableAliases(null); + } + + public final Collection getAvailablePropertyAliases(Collection result, int propertyTypeMask) { + Iterator it = canonicalNames.keySet().iterator(); + while (it.hasNext()) { + UnicodeProperty property = (UnicodeProperty)it.next(); + if (((1< other.length()) shortest = other; + } + mapToShortName.put(value,shortest); + if (shortest.length() > maxWidth) maxWidth = shortest.length(); + } + } + + private int maxWidth = -1; + + public final int getMaxWidth(boolean getShortest) { + if (maxWidth < 0) getValueCache(); + return maxWidth; + } + + public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { + int type = getType(); + return getSet(new SimpleMatcher(propertyValue, + type >= STRING ? null : new SkeletonComparator()), + result); + } + + private UnicodeMap cacheValueToSet = null; + + public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) { + if (result == null) result = new UnicodeSet(); + if (type >= STRING) { + for (int i = 0; i <= 0x10FFFF; ++i) { + String value = getValue(i); + if (matcher.matches(value)) { + result.add(i); + } + } + return result; + } + if (cacheValueToSet == null) { + cacheValueToSet = new UnicodeMap(); + for (int i = 0; i <= 0x10FFFF; ++i) { + cacheValueToSet.put(i, getValue(i)); + } + } + Collection temp = new HashSet(); // to avoid reallocating... + Iterator it = cacheValueToSet.getAvailableValues(null).iterator(); + main: + while (it.hasNext()) { + String value = (String)it.next(); + temp.clear(); + Iterator it2 = getValueAliases(value,temp).iterator(); + while (it2.hasNext()) { + String value2 = (String)it2.next(); + if (matcher.matches(value2) + || matcher.matches(toSkeleton(value2))) { + cacheValueToSet.getSet(value, result); + continue main; + } + } + } + return result; + } + + /* + public UnicodeSet getMatchSet(UnicodeSet result) { + if (result == null) result = new UnicodeSet(); + addAll(matchIterator, result); + return result; + } + + public void setMatchSet(UnicodeSet set) { + matchIterator = new UnicodeSetIterator(set); + } + */ + + public static Collection addUnique(Object obj, Collection result) { + if (obj != null && !result.contains(obj)) result.add(obj); + return result; + } + + public static Collection addAllUnique(Collection source, Collection result) { + Iterator it = source.iterator(); + while (it.hasNext()) { + Object obj = it.next(); + if (obj != null && !result.contains(obj)) result.add(obj); + } + return result; + } + + public static class SkeletonComparator implements Comparator { + public int compare(Object o1, Object o2) { + // TODO optimize + return toSkeleton((String)o1).compareTo(toSkeleton((String)o2)); + } + } + + private static String toSkeleton(String source) { + StringBuffer skeletonBuffer = new StringBuffer(); + boolean gotOne = false; + // remove spaces, '_', '-' + // we can do this with char, since no surrogates are involved + for (int i = 0; i < source.length(); ++i) { + char ch = source.charAt(i); + if (ch == '_' || ch == ' ' || ch == '-') { + gotOne = true; + } else { + char ch2 = Character.toLowerCase(ch); + if (ch2 != ch) { + gotOne = true; + skeletonBuffer.append(ch2); + } else { + skeletonBuffer.append(ch); + } + } + } + if (!gotOne) return source; // avoid string creation + return skeletonBuffer.toString(); + } + + /** + * Utility function for comparing codepoint to string without + * generating new string. + * @param codepoint + * @param other + * @return + */ + public static final boolean equals(int codepoint, String other) { + if (other.length() == 1) { + return codepoint == other.charAt(0); + } + if (other.length() == 2) { + return other.equals(UTF16.valueOf(codepoint)); + } + return false; + } + + /** + * Utility that should be on UnicodeSet + * @param source + * @param result + */ + static public void addAll(UnicodeSetIterator source, UnicodeSet result) { + while (source.nextRange()) { + if (source.codepoint == UnicodeSetIterator.IS_STRING) { + result.add(source.string); + } else { + result.add(source.codepoint, source.codepointEnd); + } + } + } +} + diff --git a/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java b/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java index 9e0e7f63025..575284f045a 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java @@ -6,8 +6,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java,v $ - * $Date: 2003/12/20 03:06:54 $ - * $Revision: 1.3 $ + * $Date: 2004/02/07 00:59:24 $ + * $Revision: 1.4 $ * ***************************************************************************************** */ @@ -134,22 +134,4 @@ public abstract class Visitor { abstract protected void doAfter(Object container, Object item); abstract protected void doSimpleAt(Object o); - // ===== CONVENIENCES ===== - static class Join extends Visitor { - StringBuffer output = new StringBuffer(); - String join (Object o) { - output.setLength(0); - doAt(o); - return output.toString(); - } - protected void doBefore(Object container, Object item) {} - protected void doAfter(Object container, Object item) {} - protected void doBetween(Object container, Object lastItem, Object nextItem) { - output.append(","); - } - protected void doSimpleAt(Object o) { - output.append(o.toString()); - } - } - } \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java index 1a489a795e7..b8594df26bf 100644 --- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java +++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ -* $Date: 2003/08/20 03:48:47 $ -* $Revision: 1.11 $ +* $Date: 2004/02/07 01:01:12 $ +* $Revision: 1.12 $ * ******************************************************************************* */ @@ -63,7 +63,7 @@ public class GenOverlap implements UCD_Types, UCA_Types { CEList.main(null); System.out.println("# Overlap"); - System.out.println("# Generated " + new Date()); + System.out.println("# Generated " + Default.getDate()); ucd = UCD.make(); @@ -335,7 +335,7 @@ public class GenOverlap implements UCD_Types, UCA_Types { CEList.main(null); System.out.println("# Generate"); - System.out.println("# Generated " + new Date()); + System.out.println("# Generated " + Default.getDate()); ucd = UCD.make(); @@ -533,7 +533,7 @@ public class GenOverlap implements UCD_Types, UCA_Types { collator = collatorIn; System.out.println("# Check Hash"); - System.out.println("# Generated " + new Date()); + System.out.println("# Generated " + Default.getDate()); ucd = UCD.make(); diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java index 9d6c983f4a7..86207b6795e 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $ -* $Date: 2004/02/06 18:32:03 $ -* $Revision: 1.18 $ +* $Date: 2004/02/07 01:01:12 $ +* $Revision: 1.19 $ * ******************************************************************************* */ @@ -32,11 +32,11 @@ public class WriteCharts implements UCD_Types { static boolean HACK_KANA = false; static public void special() { - Default.setUCD(); + for (int i = 0xE000; i < 0x10000; ++i) { - if (!Default.ucd.isRepresented(i)) continue; - if (!Default.nfkc.isNormalized(i)) continue; - System.out.println(Default.ucd.getCodeAndName(i)); + if (!Default.ucd().isRepresented(i)) continue; + if (!Default.nfkc().isNormalized(i)) continue; + System.out.println(Default.ucd().getCodeAndName(i)); } } @@ -109,7 +109,7 @@ public class WriteCharts implements UCD_Types { int cp = UTF16.charAt(s,0); - byte script = Default.ucd.getScript(cp); + byte script = Default.ucd().getScript(cp); // get first non-zero primary int currentPrimary = getFirstPrimary(sortKey); @@ -140,7 +140,7 @@ public class WriteCharts implements UCD_Types { ++scriptCount[script+3]; if (scriptCount[script+3] > 1) { System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " + - getChunkName(script, LONG) + ", " + Default.ucd.getCodeAndName(s)); + getChunkName(script, LONG) + ", " + Default.ucd().getCodeAndName(s)); } output = openFile(scriptCount[script+3], folder, script); } @@ -190,19 +190,19 @@ public class WriteCharts implements UCD_Types { String s, byte script, String classname) { - String name = Default.ucd.getName(s); + String name = Default.ucd().getName(s); if (s.equals("\u1eaf")) { System.out.println("debug"); } - String comp = Default.nfc.normalize(s); - int cat = Default.ucd.getCategory(UTF16.charAt(comp,0)); + String comp = Default.nfc().normalize(s); + int cat = Default.ucd().getCategory(UTF16.charAt(comp,0)); if (cat == Mn || cat == Mc || cat == Me) { comp = '\u25CC' + comp; if (s.equals("\u0300")) { - System.out.println(Default.ucd.getCodeAndName(comp)); + System.out.println(Default.ucd().getCodeAndName(comp)); } } // TODO: merge with showCell @@ -226,27 +226,26 @@ public class WriteCharts implements UCD_Types { } static public void normalizationChart() throws IOException { - Default.setUCD(); HACK_KANA = false; Set set = new TreeSet(); for (int i = 0; i <= 0x10FFFF; ++i) { - if (!Default.ucd.isRepresented(i)) { + if (!Default.ucd().isRepresented(i)) { if (i < 0xAC00) continue; if (i > 0xD7A3) continue; if (i > 0xACFF && i < 0xD700) continue; } - byte cat = Default.ucd.getCategory(i); + byte cat = Default.ucd().getCategory(i); if (cat == Cs || cat == Co) continue; - if (Default.nfkd.isNormalized(i)) continue; - String decomp = Default.nfkd.normalize(i); + if (Default.nfkd().isNormalized(i)) continue; + String decomp = Default.nfkd().normalize(i); byte script = getBestScript(decomp); set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script), - new Pair(Default.ucd.getCase(decomp, FULL, FOLD), + new Pair(Default.ucd().getCase(decomp, FULL, FOLD), new Integer(i)))); } @@ -302,10 +301,10 @@ public class WriteCharts implements UCD_Types { String prefix; String code = UTF16.valueOf(cp); - String c = Default.nfc.normalize(cp); - String d = Default.nfd.normalize(cp); - String kc = Default.nfkc.normalize(cp); - String kd = Default.nfkd.normalize(cp); + String c = Default.nfc().normalize(cp); + String d = Default.nfd().normalize(cp); + String kc = Default.nfkc().normalize(cp); + String kd = Default.nfkd().normalize(cp); showCell(output, code, "= CAT_OFFSET) return Default.ucd.getCategoryID_fromIndex((byte)(script - CAT_OFFSET), length); + if (script >= CAT_OFFSET) return Default.ucd().getCategoryID_fromIndex((byte)(script - CAT_OFFSET), length); else if (script == HIRAGANA_SCRIPT && HACK_KANA) return length == SHORT ? "Kata-Hira" : "Katakana-Hiragana"; - else return Default.ucd.getCase(Default.ucd.getScriptID_fromIndex((byte)script, length), FULL, TITLE); + else return Default.ucd().getCase(Default.ucd().getScriptID_fromIndex((byte)script, length), FULL, TITLE); } } @@ -816,8 +812,8 @@ public class WriteCharts implements UCD_Types { gotOne = true; } indexFile.println("


"); - indexFile.println("UCD: " + Default.ucd.getVersion() + extra); - indexFile.println("
" + df.format(new Date()) + " MED"); + indexFile.println("UCD: " + Default.ucd().getVersion() + extra); + indexFile.println("
" + Default.getDate() + " MED"); indexFile.println("

"); indexFile.close(); } @@ -827,10 +823,10 @@ public class WriteCharts implements UCD_Types { for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i); // contains Lu, Lo, Lt, or Lowercase or Uppercase - byte cat = Default.ucd.getCategory(cp); + byte cat = Default.ucd().getCategory(cp); if (cat == Lu || cat == Ll || cat == Lt) return true; - if (Default.ucd.getBinaryProperty(cp, Other_Lowercase)) return true; - if (Default.ucd.getBinaryProperty(cp, Other_Uppercase)) return true; + if (Default.ucd().getBinaryProperty(cp, Other_Lowercase)) return true; + if (Default.ucd().getBinaryProperty(cp, Other_Uppercase)) return true; } return false; } @@ -839,7 +835,6 @@ public class WriteCharts implements UCD_Types { "any-addCircle", "([[:Mn:][:Me:]]) > \u25CC $1", Transliterator.FORWARD); public static void writeCompositionChart() throws IOException { - Default.setUCD(); UCA uca = new UCA(null,""); Set letters = new TreeSet(); @@ -873,7 +868,7 @@ public class WriteCharts implements UCD_Types { String scriptName = ""; try { - scriptName = Default.ucd.getScriptID_fromIndex(script); + scriptName = Default.ucd().getScriptID_fromIndex(script); Utility.fixDot(); System.out.println(scriptName); } catch (IllegalArgumentException e) { @@ -889,15 +884,15 @@ public class WriteCharts implements UCD_Types { printed.clear(); for (int cp = 0; cp < 0x10FFFF; ++cp) { - byte type = Default.ucd.getCategory(cp); - if (type == Default.ucd.UNASSIGNED || type == Default.ucd.PRIVATE_USE) continue; // skip chaff + byte type = Default.ucd().getCategory(cp); + if (type == Default.ucd().UNASSIGNED || type == Default.ucd().PRIVATE_USE) continue; // skip chaff Utility.dot(cp); - byte newScript = Default.ucd.getScript(cp); + byte newScript = Default.ucd().getScript(cp); if (newScript != script) continue; String source = UTF16.valueOf(cp); - String decomp = Default.nfd.normalize(source); + String decomp = Default.nfd().normalize(source); if (decomp.equals(source)) continue; // pick up all decompositions @@ -931,7 +926,7 @@ public class WriteCharts implements UCD_Types { Iterator it2 = letters.iterator(); while (it2.hasNext()) { String let = (String)it2.next(); - out.println("" + showCell(Default.nfc.normalize(let), "class='h'")); + out.println("" + showCell(Default.nfc().normalize(let), "class='h'")); Iterator it3 = marks.iterator(); while (it3.hasNext()) { String mark = (String)it3.next(); @@ -942,7 +937,7 @@ public class WriteCharts implements UCD_Types { } String comp; try { - comp = Default.nfc.normalize(merge); + comp = Default.nfc().normalize(merge); } catch (Exception e) { System.out.println("Failed when trying to compose <" + Utility.hex(e) + ">"); continue; @@ -1027,14 +1022,13 @@ public class WriteCharts implements UCD_Types { } return "" + addCircle.transliterate(comp) + + "title='" + Utility.hex(comp) + " " + Default.ucd().getName(comp) + "'>" + addCircle.transliterate(comp) + "
" + Utility.hex(comp) + ""; } public static void writeAllocation() throws IOException { - Default.setUCD(); String[] names = new String[300]; // HACK, 300 is plenty for now. Fix if it ever gets larger int[] starts = new int[names.length]; int[] ends = new int[names.length]; @@ -1043,7 +1037,7 @@ public class WriteCharts implements UCD_Types { int counter = 0; int blockId = 0; - while (Default.ucd.getBlockData(blockId++, blockData)) { + while (Default.ucd().getBlockData(blockId++, blockData)) { names[counter] = blockData.name; starts[counter] = blockData.start; ends[counter] = blockData.end; @@ -1094,7 +1088,7 @@ public class WriteCharts implements UCD_Types { int total = ends[i] - starts[i] + 1; int alloc = 0; for (int j = starts[i]; j <= ends[i]; ++j) { - if (Default.ucd.isAllocated(j)) ++alloc; + if (Default.ucd().isAllocated(j)) ++alloc; } //System.out.println(names[i] + "\t" + alloc + "\t" + total); String color = names[i].indexOf("Surrogates") >= 0 ? "#FF0000" diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java index c7269b1924d..1c25f6469c3 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ -* $Date: 2004/01/16 01:22:26 $ -* $Revision: 1.38 $ +* $Date: 2004/02/07 01:01:11 $ +* $Revision: 1.39 $ * ******************************************************************************* */ @@ -115,9 +115,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types { //if (0xA000 <= a && a <= 0xA48F) continue; // skip YI String b = Case.fold(a); - String c = Default.nfkc.normalize(b); + String c = Default.nfkc().normalize(b); String d = Case.fold(c); - String e = Default.nfkc.normalize(d); + String e = Default.nfkc().normalize(d); if (!e.equals(c)) { System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a)); /* @@ -135,7 +135,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types { */ } String f = Case.fold(e); - String g = Default.nfkc.normalize(f); + String g = Default.nfkc().normalize(f); if (!f.equals(d) || !g.equals(e)) System.out.println("!!!!!!SKY IS FALLING!!!!!!"); } } @@ -204,9 +204,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types { for (char c = 0; c < 0xFFFF; ++c) { if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c)); if (0xAC00 <= c && c <= 0xD7A3) continue; - if (!Default.nfkd.isNormalized(c)) { + if (!Default.nfkd().isNormalized(c)) { ++count; - String decomp = Default.nfkd.normalize(c); + String decomp = Default.nfkd().normalize(c); datasize += decomp.length(); if (max < decomp.length()) max = decomp.length(); if (decomp.length() > 7) ++over7; @@ -232,9 +232,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types { for (char c = 0; c < 0xFFFF; ++c) { if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c)); if (0xAC00 <= c && c <= 0xD7A3) continue; - if (!Default.nfd.isNormalized(c)) { + if (!Default.nfd().isNormalized(c)) { ++count; - String decomp = Default.nfd.normalize(c); + String decomp = Default.nfd().normalize(c); datasize += decomp.length(); if (max < decomp.length()) max = decomp.length(); csa.setElementAt(c, (short)count); @@ -256,7 +256,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types { for (char c = 0; c < 0xFFFF; ++c) { if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c)); - int canClass = Default.nfkd.getCanonicalClass(c); + int canClass = Default.nfkd().getCanonicalClass(c); if (canClass != 0) { ++count; @@ -295,7 +295,6 @@ public class WriteCollationData implements UCD_Types, UCA_Types { static void writeConformance(String filename, byte option, boolean shortPrint) throws IOException { - Default.setUCD(); //UCD ucd30 = UCD.make("3.0.0"); /* @@ -480,11 +479,11 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON // NOW, if the character decomposes, or is a combining mark (non-zero), try combinations - if (Default.ucd.getCombiningClass(firstChar) > 0 - || !Default.nfd.isNormalized(s) && !Default.ucd.isHangulSyllable(firstChar)) { + if (Default.ucd().getCombiningClass(firstChar) > 0 + || !Default.nfd().isNormalized(s) && !Default.ucd().isHangulSyllable(firstChar)) { // if it ends with a non-starter, try the decompositions. - String decomp = Default.nfd.normalize(s); - if (Default.ucd.getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) { + String decomp = Default.nfd().normalize(s); + if (Default.ucd().getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) { if (canIt == null) canIt = new CanonicalIterator("."); canIt.setSource(s + LOW_ACCENT); int limit = 4; @@ -503,7 +502,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON for (int j = 0; j < CONTRACTION_TEST.length; ++j) { String extra = s.substring(0,i) + CONTRACTION_TEST[j] + s.substring(i); addStringY(extra + 'a', option); - if (DEBUG) System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(extra)); + if (DEBUG) System.out.println(addCounter++ + " Adding " + Default.ucd().getCodeAndName(extra)); } } } @@ -550,12 +549,12 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON for (int ch = 0; ch < 0x10FFFF; ++ch) { if (!ucd_uca_base.isAllocated(ch)) continue; - if (Default.nfkd.isNormalized(ch)) continue; + if (Default.nfkd().isNormalized(ch)) continue; if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul if (alreadySeen.contains(ch)) continue; Utility.dot(ch); - String decomp = Default.nfkd.normalize(ch); + String decomp = Default.nfkd().normalize(ch); if (ch != ' ' && decomp.charAt(0) == ' ') { skipSet.add(ch); continue; // skip wierd decomps @@ -608,15 +607,15 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON } static String remapSortKey(int cp, boolean decomposition) { - if (Default.nfd.isNormalized(cp)) return remapCanSortKey(cp, decomposition); + if (Default.nfd().isNormalized(cp)) return remapCanSortKey(cp, decomposition); // we know that it is not NFKD. - String canDecomp = Default.nfd.normalize(cp); + String canDecomp = Default.nfd().normalize(cp); String result = ""; int ch; for (int j = 0; j < canDecomp.length(); j += UTF16.getCharCount(ch)) { ch = UTF16.charAt(canDecomp, j); - System.out.println("* " + Default.ucd.getCodeAndName(ch)); + System.out.println("* " + Default.ucd().getCodeAndName(ch)); String newSortKey = remapCanSortKey(ch, decomposition); System.out.println("* " + UCA.toString(newSortKey)); result = mergeSortKeys(result, newSortKey); @@ -626,7 +625,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON } static String remapCanSortKey(int ch, boolean decomposition) { - String compatDecomp = Default.nfkd.normalize(ch); + String compatDecomp = Default.nfkd().normalize(ch); String decompSortKey = collator.getSortKey(compatDecomp, UCA.NON_IGNORABLE, decomposition); byte type = ucd.getDecompositionType(ch); @@ -799,9 +798,9 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON log.println("compressed: " + comp); } log.println("Ken's : " + kenStr); - String nfkd = Default.nfkd.normalize(s); + String nfkd = Default.nfkd().normalize(s); log.println("NFKD : " + ucd.getCodeAndName(nfkd)); - String nfd = Default.nfd.normalize(s); + String nfd = Default.nfd().normalize(s); if (!nfd.equals(nfkd)) { log.println("NFD : " + ucd.getCodeAndName(nfd)); } @@ -824,7 +823,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON static final byte getDecompType(int cp) { byte result = ucd.getDecompositionType(cp); if (result == ucd.CANONICAL) { - String d = Default.nfd.normalize(cp); // TODO + String d = Default.nfd().normalize(cp); // TODO int cp1; for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) { cp1 = UTF16.charAt(d, i); @@ -887,7 +886,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON byte type = getDecompType(UTF16.charAt(s, 0)); char ch = s.charAt(0); - String decomp = Default.nfkd.normalize(s); + String decomp = Default.nfkd().normalize(s); int len = 0; int markLen = collator.getCEs(decomp, true, markCes); if (compress) markLen = kenCompress(markCes, markLen); @@ -994,14 +993,14 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON log.println("

These are not necessarily errors, but should be examined for possible errors

"); log.println(""); - UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd); + UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd()); Map map = new TreeMap(); while (true) { String s = cc.next(); if (s == null) break; - if (!Default.nfd.isNormalized(s)) continue; // only unnormalized stuff + if (!Default.nfd().isNormalized(s)) continue; // only unnormalized stuff if (UTF16.countCodePoint(s) == 1) { int cat = ucd.getCategory(UTF16.charAt(s,0)); if (cat == Cn || cat == Cc || cat == Cs) continue; @@ -1033,7 +1032,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON log.println("

These are not necessarily errors, but should be examined for possible errors

"); log.println("
"); - UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd); + UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd()); Map map = new TreeMap(); Map tails = new TreeMap(); @@ -1045,7 +1044,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON String s = cc.next(); if (s == null) break; Utility.dot(counter++); - if (!Default.nfd.isNormalized(s)) continue; // only normalized stuff + if (!Default.nfd().isNormalized(s)) continue; // only normalized stuff CEList celist = collator.getCEList(s, true); map.put(celist, s); } @@ -1216,7 +1215,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON int[] ces = new int[50]; - UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd); + UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd()); int[] lenArray = new int[1]; diLog.println("# Contractions"); @@ -1287,7 +1286,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON String s = String.valueOf(ch); int len = collator.getCEs(s, true, ces); */ - UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd); + UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd()); int[] lenArray = new int[1]; Set sortedCodes = new TreeSet(); @@ -1458,7 +1457,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON String s = String.valueOf(ch); int len = collator.getCEs(s, true, ces); */ - UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd); + UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd()); int[] lenArray = new int[1]; Set sortedCodes = new TreeSet(); @@ -1671,7 +1670,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON Map ordered = new TreeMap(cm); UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, - SKIP_CANONICAL_DECOMPOSIBLES ? Default.nfd : null); + SKIP_CANONICAL_DECOMPOSIBLES ? Default.nfd() : null); int[] lenArray = new int[1]; Set alreadyDone = new HashSet(); @@ -1737,7 +1736,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON UnicodeSet composites = new UnicodeSet(); for (int i = 0; i < 0x10FFFF; ++i) { if (!ucd.isAllocated(i)) continue; - if (Default.nfd.isNormalized(i)) continue; + if (Default.nfd().isNormalized(i)) continue; composites.add(i); } UnicodeSet CJKcomposites = new UnicodeSet(CJK).retainAll(composites); @@ -1774,9 +1773,9 @@ F900..FAFF; CJK Compatibility Ideographs System.out.println("Adding Kanji"); for (int i = 0; i < 0x10FFFF; ++i) { if (!ucd.isAllocated(i)) continue; - if (Default.nfkd.isNormalized(i)) continue; + if (Default.nfkd().isNormalized(i)) continue; Utility.dot(i); - String decomp = Default.nfkd.normalize(i); + String decomp = Default.nfkd().normalize(i); int cp; for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) { cp = UTF16.charAt(decomp, j); @@ -2438,7 +2437,7 @@ F900..FAFF; CJK Compatibility Ideographs System.out.println("Fix Homeless! No back map for " + CEList.toString(ces[i]) + " from " + CEList.toString(ces, len)); System.out.println("\t" + ucd.getCodeAndName(chr) - + " => " + ucd.getCodeAndName(Default.nfkd.normalize(chr)) + + " => " + ucd.getCodeAndName(Default.nfkd().normalize(chr)) ); s = "[" + Utility.hex(ces[i]) + "]"; } while (false); // exactly one time, just for breaking @@ -2528,7 +2527,7 @@ F900..FAFF; CJK Compatibility Ideographs "[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]"); // needsQuoting.remove(); } - s = Default.nfc.normalize(s); + s = Default.nfc().normalize(s); quoteOperandBuffer.setLength(0); boolean noQuotes = true; boolean inQuote = false; @@ -2628,7 +2627,6 @@ F900..FAFF; CJK Compatibility Ideographs static int[] primaryDelta; static void writeFractionalUCA(String filename) throws IOException { - Default.setUCD(); checkImplicit(); checkFixes(); @@ -2760,7 +2758,7 @@ F900..FAFF; CJK Compatibility Ideographs for (int i = 0; i < 0x10FFFF; ++i) { if (!ucd.isNoncharacter(i)) { if (!ucd.isAllocated(i)) continue; - if (Default.nfd.isNormalized(i)) continue; + if (Default.nfd().isNormalized(i)) continue; if (ucd.isHangulSyllable(i)) continue; //if (collator.getCEType(i) >= UCA.FIXED_CE) continue; } @@ -2795,7 +2793,7 @@ F900..FAFF; CJK Compatibility Ideographs // Skip anything that is not FCD. - if (!Default.nfd.isFCD(s)) continue; + if (!Default.nfd().isFCD(s)) continue; // We ONLY add if the sort key would be different // Than what we would get if we didn't decompose!! @@ -3381,7 +3379,7 @@ F900..FAFF; CJK Compatibility Ideographs } String toString(boolean showEmpty) { - String src = source.length() == 0 ? "CONSTRUCTED" : Default.ucd.getCodeAndName(source); + String src = source.length() == 0 ? "CONSTRUCTED" : Default.ucd().getCodeAndName(source); return "[" + (max ? "last " : "first ") + title + " " + formatFCE(showEmpty) + "] # " + src; } @@ -3631,7 +3629,7 @@ F900..FAFF; CJK Compatibility Ideographs // b. toSmallKana(NFKD(x)) != x. static final boolean needsCaseBit(String x) { - String s = Default.nfkd.normalize(x); + String s = Default.nfkd().normalize(x); if (!ucd.getCase(s, FULL, LOWER).equals(s)) return true; if (!toSmallKana(s).equals(s)) return true; return false; @@ -3952,7 +3950,7 @@ F900..FAFF; CJK Compatibility Ideographs static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd','HH:mm:ss' GMT'"); static String getNormalDate() { - return myDateFormat.format(new Date()) + " [MD]"; + return Default.getDate() + " [MD]"; } @@ -3976,7 +3974,6 @@ F900..FAFF; CJK Compatibility Ideographs static UnicodeSet compatibilityExceptions = new UnicodeSet("[\u0CCB\u0DDD\u017F\u1E9B\uFB05]"); static void writeCollationValidityLog() throws IOException { - Default.setUCD(); //log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html")); log = Utility.openPrintWriter(UCA_GEN_DIR, "CheckCollationValidity.html", Utility.UTF8_WINDOWS); @@ -4120,7 +4117,7 @@ F900..FAFF; CJK Compatibility Ideographs continue; } canIt.setSource(key); - String nfdKey = Default.nfd.normalize(key); + String nfdKey = Default.nfd().normalize(key); boolean first = true; while (true) { @@ -4132,7 +4129,7 @@ F900..FAFF; CJK Compatibility Ideographs // Skip anything that is not FCD. - if (!Default.nfd.isFCD(s)) continue; + if (!Default.nfd().isFCD(s)) continue; // We ONLY add if the sort key would be different // Than what we would get if we didn't decompose!! @@ -4184,7 +4181,7 @@ F900..FAFF; CJK Compatibility Ideographs int[] ces = new int[50]; - UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd); + UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd()); int[] lenArray = new int[1]; int minps = Integer.MAX_VALUE; @@ -4220,7 +4217,7 @@ F900..FAFF; CJK Compatibility Ideographs } } - cc = collator.getContents(UCA.FIXED_CE, Default.nfd); + cc = collator.getContents(UCA.FIXED_CE, Default.nfd()); log.println("
"); int lastPrimary = 0; @@ -4370,7 +4367,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; static void addString(String ch, byte option) { String colDbase = collator.getSortKey(ch, option, true); String colNbase = collator.getSortKey(ch, option, false); - String colCbase = collator.getSortKey(Default.nfc.normalize(ch), option, false); + String colCbase = collator.getSortKey(Default.nfc().normalize(ch), option, false); if (!colNbase.equals(colCbase) || !colNbase.equals(colDbase) ) { /*System.out.println(Utility.hex(ch)); System.out.println(printableKey(colNbase)); @@ -4540,7 +4537,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; } static void showLine(int count, String ch, String keyD, String keyN) { - String decomp = Default.nfd.normalize(ch); + String decomp = Default.nfd().normalize(ch); if (decomp.equals(ch)) decomp = ""; else decomp = "
<" + Utility.hex(decomp, " ") + "> "; log.println("\n"; table += ""; } in.close(); @@ -102,7 +102,7 @@ public final class GenerateStandardizedVariants implements UCD_Types { String[] batName = {""}; String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName); - String version = Default.ucd.getVersion(); + String version = Default.ucd().getVersion(); int lastDot = version.lastIndexOf('.'); String updateDirectory = version.substring(0,lastDot) + "-Update"; int updateV = version.charAt(version.length()-1) - '0'; @@ -110,7 +110,7 @@ public final class GenerateStandardizedVariants implements UCD_Types { if (DEBUG) System.out.println("updateDirectory: " + updateDirectory); String[] replacementList = { - "@revision@", Default.ucd.getVersion(), + "@revision@", Default.ucd().getVersion(), "@updateDirectory@", updateDirectory, "@date@", Default.getDate(), "@table@", table}; diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java index 56f2e0fdc4d..30e9e6da73d 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java,v $ -* $Date: 2002/08/04 21:38:45 $ -* $Revision: 1.3 $ +* $Date: 2004/02/07 01:01:14 $ +* $Revision: 1.4 $ * ******************************************************************************* */ @@ -27,7 +27,7 @@ public class GenerateThaiBreaks { PrintWriter out = null; try { - Default.setUCD(); + UnicodeSet ignorables = new UnicodeSet(); /* new UnicodeSet(0xE30, 0xE3A); ignorables.add(0x0E40, 0x0E44); // add logical order exception @@ -89,18 +89,18 @@ public class GenerateThaiBreaks { UnicodeSet missingThai = new UnicodeSet("[[\u0e00-\u0e7f]-[:Cn:]]").removeAll(all); System.out.println("Never occur: " + missingThai.toPattern(true)); - Utility.showSetNames("", missingThai, true, Default.ucd); + Utility.showSetNames("", missingThai, true, Default.ucd()); System.out.println(); UnicodeSet neverInitial = new UnicodeSet(all).removeAll(initials); UnicodeSet neverFinal = new UnicodeSet(all).removeAll(finals); System.out.println("Never initial: " + neverInitial.toPattern(true)); - Utility.showSetNames("", neverInitial, true, Default.ucd); + Utility.showSetNames("", neverInitial, true, Default.ucd()); System.out.println(); System.out.println("Never final: " + neverFinal.toPattern(true)); - Utility.showSetNames("", neverFinal, true, Default.ucd); + Utility.showSetNames("", neverFinal, true, Default.ucd()); System.out.println(); initials.removeAll(medials); @@ -110,11 +110,11 @@ public class GenerateThaiBreaks { System.out.println("finals size: " + finals.size()); System.out.println("Only Initials" + initials.toPattern(true)); - Utility.showSetNames("", initials, true, Default.ucd); + Utility.showSetNames("", initials, true, Default.ucd()); System.out.println(); System.out.println("Only Finals" + finals.toPattern(true)); - Utility.showSetNames("", finals, true, Default.ucd); + Utility.showSetNames("", finals, true, Default.ucd()); } finally { br.close(); if (out != null) out.close(); @@ -124,9 +124,9 @@ public class GenerateThaiBreaks { static class MyBreaker implements Utility.Breaker { public String get(Object current, Object old) { if (old == null || UTF16.charAt(current.toString(), 0) == UTF16.charAt(old.toString(), 0)) { - return current.toString() + "(" + Default.ucd.getCode(current.toString().substring(1)) + "))"; + return current.toString() + "(" + Default.ucd().getCode(current.toString().substring(1)) + "))"; } else { - return "\r\n" + current + "(" + Default.ucd.getCode(current.toString()) + "))"; + return "\r\n" + current + "(" + Default.ucd().getCode(current.toString()) + "))"; } } public boolean filter(Object current) { return true; } diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java index b8a985ed51d..815323dacda 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Main.java +++ b/tools/unicodetools/com/ibm/text/UCD/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $ -* $Date: 2004/02/06 18:30:21 $ -* $Revision: 1.33 $ +* $Date: 2004/02/07 01:01:14 $ +* $Revision: 1.34 $ * ******************************************************************************* */ @@ -56,7 +56,7 @@ public final class Main implements UCD_Types { }; public static void main (String[] args) throws Exception { - System.out.println("*** Start *** " + new Date()); + System.out.println("*** Start *** " + Default.getDate()); try { for (int i = 0; i < args.length; ++i) { @@ -68,7 +68,7 @@ public final class Main implements UCD_Types { Utility.fixDot(); System.out.println(); - System.out.println("** Argument: " + args[i] + " ** " + new Date()); + System.out.println("** Argument: " + args[i] + " ** " + Default.getDate()); // Expand string arguments @@ -96,7 +96,6 @@ public final class Main implements UCD_Types { Default.setUCD(args[++i]); continue; } - Default.ensureUCD(); // Now handle other options @@ -106,7 +105,7 @@ public final class Main implements UCD_Types { VerifyUCD.CheckCaseFold(); VerifyUCD.checkAgainstUInfo(); - } else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.getUcdVersion()}); + } else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion()}); else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics(); else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null); else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable(); @@ -342,7 +341,7 @@ public final class Main implements UCD_Types { // } } finally { - System.out.println("*** Done *** " + new Date()); + System.out.println("*** Done *** " + Default.getDate()); } } diff --git a/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java b/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java index 0e2c708d54d..b9cadbbbf0e 100644 --- a/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java +++ b/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java @@ -191,21 +191,21 @@ public final class NFSkippable extends UCDProperty { static int limit = 0x10FFFF; // full version = 10ffff, for testing may use smaller public static void main (String[] args) throws java.io.IOException { - Default.setUCD(); + PrintWriter out = Utility.openPrintWriter("NFSafeSets.txt", Utility.UTF8_WINDOWS); out.println("NFSafeSets"); - out.println("Version: " + Default.ucd.getVersion()); + out.println("Version: " + Default.ucd().getVersion()); out.println("Date: " + Default.getDate()); out.println(); for (int mode = NFD_UnsafeStart; mode <= NFKC_UnsafeStart; ++mode) { - UCDProperty up = DerivedProperty.make(mode, Default.ucd); + UCDProperty up = DerivedProperty.make(mode, Default.ucd()); generateSet(out, "UNSAFE[" + Normalizer.getName((byte)(mode-NFD_UnsafeStart)) + "]", up); } for (byte mode = NFD; mode <= NFKC; ++mode) { - NFSkippable skipper = new NFSkippable(mode, Default.ucd); + NFSkippable skipper = new NFSkippable(mode, Default.ucd()); generateSet(out, "SKIPPABLE[" + Normalizer.getName(mode) + "]", skipper); } diff --git a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java index 0cf515215eb..e0cf5a57ae3 100644 --- a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java +++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $ -* $Date: 2003/02/25 23:38:22 $ -* $Revision: 1.2 $ +* $Date: 2004/02/07 01:01:14 $ +* $Revision: 1.3 $ * ******************************************************************************* */ @@ -22,7 +22,7 @@ import com.ibm.text.utility.*; public class QuickTest implements UCD_Types { static final void test() { - Default.setUCD(); + UnicodeSet format = new UnicodeSet("[:Cf:]"); /* [4] NameStartChar := ":" | [A-Z] | "_" | [a-z] | @@ -64,10 +64,10 @@ public class QuickTest implements UCD_Types { UnicodeSet noncharacter = new UnicodeSet(); for (int i = 0; i <= 0x10FFFF; ++i) { - if (!Default.ucd.isAllocated(i)) continue; - if (!Default.nfkc.isNormalized(i)) notNFKC.add(i); - if (Default.ucd.isNoncharacter(i)) noncharacter.add(i); - if (Default.ucd.getCategory(i) == PRIVATE_USE) privateUse.add(i); + if (!Default.ucd().isAllocated(i)) continue; + if (!Default.nfkc().isNormalized(i)) notNFKC.add(i); + if (Default.ucd().isNoncharacter(i)) noncharacter.add(i); + if (Default.ucd().getCategory(i) == PRIVATE_USE) privateUse.add(i); } showSet("notNFKC in NameChar", new UnicodeSet(notNFKC).retainAll(nameChar)); @@ -110,6 +110,6 @@ public class QuickTest implements UCD_Types { System.out.println("\tCount:" + set1.size()); System.out.println("\tSet:" + set1.toPattern(true)); System.out.println("\tDetails:"); - Utility.showSetNames("", set1, false, Default.ucd); + Utility.showSetNames("", set1, false, Default.ucd()); } } \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java b/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java index 9525ebf63e9..54db7b9ae1d 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java +++ b/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java @@ -18,7 +18,7 @@ public final class TernaryStore { static DepthPrinter dp; static void test() throws java.io.IOException { - Default.setUCD(); + PrintWriter pw = Utility.openPrintWriter("TestTernary.txt", Utility.LATIN1_WINDOWS); try { @@ -35,7 +35,7 @@ public final class TernaryStore { int counter = 0; int i; for (i = 0; counter < tests.length && i <= 0x10FFFF; ++i) { - if (Default.ucd.hasComputableName(i)) continue; + if (Default.ucd().hasComputableName(i)) continue; String temp = UCharacter.getName(i); if (temp != null) tests[counter++] = temp.trim(); diff --git a/tools/unicodetools/com/ibm/text/UCD/TestData.java b/tools/unicodetools/com/ibm/text/UCD/TestData.java index 52700c76d96..dae010f89b1 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TestData.java +++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $ -* $Date: 2004/02/06 18:30:20 $ -* $Revision: 1.13 $ +* $Date: 2004/02/07 01:01:14 $ +* $Revision: 1.14 $ * ******************************************************************************* */ @@ -35,10 +35,10 @@ public class TestData implements UCD_Types { static UnicodeProperty.Factory upf; public static void main (String[] args) throws IOException { - Default.setUCD(); - System.out.println(new Date()); + + System.out.println("main: " + Default.getDate()); upf = ICUPropertyFactory.make(); - System.out.println(new Date()); + System.out.println("after factory: " + Default.getDate()); showPropDiff( "gc=mn", null, @@ -56,39 +56,39 @@ public class TestData implements UCD_Types { showPropDiff( "General_Category=L", null, "Script!=Inherited|Common", - UnifiedBinaryProperty.getSet("script=inherited", Default.ucd) - .addAll(UnifiedBinaryProperty.getSet("script=common", Default.ucd)) + upf.getSet("script=inherited") + .addAll(UnifiedBinaryProperty.getSet("script=common", Default.ucd())) .complement() ); - UnicodeSet sterm = UnifiedProperty.getSet("STerm", Default.ucd); - UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd); + UnicodeSet sterm = UnifiedProperty.getSet("STerm", Default.ucd()); + UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd()); UnicodeSet po = new UnicodeSet("[:po:]"); UnicodeSet empty = new UnicodeSet(); Utility.showSetDifferences( "Sentence_Terminal", sterm, "Empty", empty, - true, Default.ucd); + true, Default.ucd()); Utility.showSetDifferences( "Sentence_Terminal", sterm, "Terminal_Punctuation", term, - true, Default.ucd); + true, Default.ucd()); Utility.showSetDifferences( "Terminal_Punctuation", term, "Punctuation_Other", po, - true, Default.ucd); + true, Default.ucd()); if (true) return; UnicodeSet us = getSetForName("LATIN LETTER.*P"); - Utility.showSetNames("",us,false,Default.ucd); + Utility.showSetNames("",us,false,Default.ucd()); us = getSetForName(".*VARIA(TION|NT).*"); - Utility.showSetNames("",us,false,Default.ucd); + Utility.showSetNames("",us,false,Default.ucd()); if (true) return; @@ -128,9 +128,9 @@ public class TestData implements UCD_Types { UnicodeSetIterator it = new UnicodeSetIterator(base); while (it.next()) { String s = UTF16.valueOf(it.codepoint); - String norm = Default.nfd.normalize(s); - if (s.equals(norm) && Default.nfkd.isNormalized(norm)) { - log.println("# " + s + " <> XXX # " + Default.ucd.getName(it.codepoint)); + String norm = Default.nfd().normalize(s); + if (s.equals(norm) && Default.nfkd().isNormalized(norm)) { + log.println("# " + s + " <> XXX # " + Default.ucd().getName(it.codepoint)); } } } finally { @@ -158,10 +158,10 @@ public class TestData implements UCD_Types { Matcher m = p.matcher(""); for (int i = 0; i < 0x10FFFF; ++i) { Utility.dot(i); - if (!Default.ucd.isAssigned(i)) continue; - byte cat = Default.ucd.getCategory(i); + if (!Default.ucd().isAssigned(i)) continue; + byte cat = Default.ucd().getCategory(i); if (cat == PRIVATE_USE) continue; - m.reset(Default.ucd.getName(i)); + m.reset(Default.ucd().getName(i)); if (m.matches()) { result.add(i); } @@ -174,7 +174,7 @@ public class TestData implements UCD_Types { System.out.println(x); UnicodeSet ss = new UnicodeSet(x); pw.println(x); - Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd); + Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd()); pw.println("****************************"); } diff --git a/tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java b/tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java index 155ba809cf5..4b1023bcd5f 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java +++ b/tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java,v $ -* $Date: 2003/02/26 00:35:09 $ -* $Revision: 1.1 $ +* $Date: 2004/02/07 01:01:13 $ +* $Revision: 1.2 $ * ******************************************************************************* */ @@ -24,7 +24,7 @@ import com.ibm.icu.text.UnicodeSet; public class TestNameUniqueness implements UCD_Types { public static void test() throws IOException { - Default.setUCD(); + new TestNameUniqueness().checkNames(); } @@ -39,18 +39,18 @@ public class TestNameUniqueness implements UCD_Types { out.println(); for (int cp = 0; cp < 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAllocated(cp)) continue; - if (Default.ucd.hasComputableName(cp)) continue; - int cat = Default.ucd.getCategory(cp); + if (!Default.ucd().isAllocated(cp)) continue; + if (Default.ucd().hasComputableName(cp)) continue; + int cat = Default.ucd().getCategory(cp); if (cat == Cc) continue; - String name = Default.ucd.getName(cp); + String name = Default.ucd().getName(cp); String processedName = processName(cp, name); Integer existing = (Integer) names.get(processedName); if (existing != null) { out.println("Collision between: " - + Default.ucd.getCodeAndName(existing.intValue()) - + ", " + Default.ucd.getCodeAndName(cp)); + + Default.ucd().getCodeAndName(existing.intValue()) + + ", " + Default.ucd().getCodeAndName(cp)); } else { names.put(processedName, new Integer(cp)); } @@ -61,19 +61,19 @@ public class TestNameUniqueness implements UCD_Types { for (int i = 0; i < charCount.length; ++i) { int count = charCount[i]; if (count == 0) continue; - String sampleName = Default.ucd.getCodeAndName(samples[i]); + String sampleName = Default.ucd().getCodeAndName(samples[i]); out.println(count + "\t'" + ((char)i) - + "'\t" + Default.ucd.getCodeAndName(samples[i]) - + "\t=>\t" + processName(samples[i], Default.ucd.getName(samples[i]))); + + "'\t" + Default.ucd().getCodeAndName(samples[i]) + + "\t=>\t" + processName(samples[i], Default.ucd().getName(samples[i]))); } out.println(); out.println("Name Samples"); out.println(); for (int i = 0; i < 256; ++i) { - int cat = Default.ucd.getCategory(i); + int cat = Default.ucd().getCategory(i); if (cat == Cc) continue; - out.println(Default.ucd.getCodeAndName(i) - + "\t=>\t" + processName(i, Default.ucd.getName(i))); + out.println(Default.ucd().getCodeAndName(i) + + "\t=>\t" + processName(i, Default.ucd().getName(i))); } } finally { out.close(); diff --git a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java index ec8de3a29cf..fa9f12cd880 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java +++ b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $ -* $Date: 2004/02/06 18:30:20 $ -* $Revision: 1.6 $ +* $Date: 2004/02/07 01:01:13 $ +* $Revision: 1.7 $ * ******************************************************************************* */ @@ -36,21 +36,21 @@ public final class TestNormalization { public static void main(String[] args) throws java.io.IOException { System.out.println("Creating Normalizers"); - Default.setUCD(); + String[] testSet = {"a\u0304\u0328", "a\u0328\u0304"}; for (int i = 0; i < testSet.length; ++i) { String s = testSet[i]; - boolean test = Default.nfc.isFCD(s); - System.out.println(test + ": " + Default.ucd.getCodeAndName(s)); + boolean test = Default.nfc().isFCD(s); + System.out.println(test + ": " + Default.ucd().getCodeAndName(s)); } String x = UTF32.valueOf32(0x10000); - check("NFC", Default.nfc, x); - check("NFD", Default.nfd, x); - check("NFKC", Default.nfkc, x); - check("NFKD", Default.nfkd, x); + check("NFC", Default.nfc(), x); + check("NFD", Default.nfd(), x); + check("NFKC", Default.nfkc(), x); + check("NFKD", Default.nfkd(), x); out = new PrintWriter( @@ -97,36 +97,36 @@ public final class TestNormalization { } // c2 == NFC(c1) == NFC(c2) == NFC(c3) - errorCount += check("NFCa", Default.nfc, parts[1], parts[0]); - errorCount += check("NFCb", Default.nfc, parts[1], parts[1]); - errorCount += check("NFCc", Default.nfc, parts[1], parts[2]); + errorCount += check("NFCa", Default.nfc(), parts[1], parts[0]); + errorCount += check("NFCb", Default.nfc(), parts[1], parts[1]); + errorCount += check("NFCc", Default.nfc(), parts[1], parts[2]); // c4 == NFC(c4) == NFC(c5) - errorCount += check("NFCd", Default.nfc, parts[3], parts[3]); - errorCount += check("NFCe", Default.nfc, parts[3], parts[4]); + errorCount += check("NFCd", Default.nfc(), parts[3], parts[3]); + errorCount += check("NFCe", Default.nfc(), parts[3], parts[4]); // c3 == NFD(c1) == NFD(c2) == NFD(c3) - errorCount += check("NFDa", Default.nfd, parts[2], parts[0]); - errorCount += check("NFDb", Default.nfd, parts[2], parts[1]); - errorCount += check("NFDc", Default.nfd, parts[2], parts[2]); + errorCount += check("NFDa", Default.nfd(), parts[2], parts[0]); + errorCount += check("NFDb", Default.nfd(), parts[2], parts[1]); + errorCount += check("NFDc", Default.nfd(), parts[2], parts[2]); // c5 == NFD(c4) == NFD(c5) - errorCount += check("NFDd", Default.nfd, parts[4], parts[3]); - errorCount += check("NFDe", Default.nfd, parts[4], parts[4]); + errorCount += check("NFDd", Default.nfd(), parts[4], parts[3]); + errorCount += check("NFDe", Default.nfd(), parts[4], parts[4]); // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) - errorCount += check("NFKCa", Default.nfkc, parts[3], parts[0]); - errorCount += check("NFKCb", Default.nfkc, parts[3], parts[1]); - errorCount += check("NFKCc", Default.nfkc, parts[3], parts[2]); - errorCount += check("NFKCd", Default.nfkc, parts[3], parts[3]); - errorCount += check("NFKCe", Default.nfkc, parts[3], parts[4]); + errorCount += check("NFKCa", Default.nfkc(), parts[3], parts[0]); + errorCount += check("NFKCb", Default.nfkc(), parts[3], parts[1]); + errorCount += check("NFKCc", Default.nfkc(), parts[3], parts[2]); + errorCount += check("NFKCd", Default.nfkc(), parts[3], parts[3]); + errorCount += check("NFKCe", Default.nfkc(), parts[3], parts[4]); // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) - errorCount += check("NFKDa", Default.nfkd, parts[4], parts[0]); - errorCount += check("NFKDb", Default.nfkd, parts[4], parts[1]); - errorCount += check("NFKDc", Default.nfkd, parts[4], parts[2]); - errorCount += check("NFKDd", Default.nfkd, parts[4], parts[3]); - errorCount += check("NFKDe", Default.nfkd, parts[4], parts[4]); + errorCount += check("NFKDa", Default.nfkd(), parts[4], parts[0]); + errorCount += check("NFKDb", Default.nfkd(), parts[4], parts[1]); + errorCount += check("NFKDc", Default.nfkd(), parts[4], parts[2]); + errorCount += check("NFKDd", Default.nfkd(), parts[4], parts[3]); + errorCount += check("NFKDe", Default.nfkd(), parts[4], parts[4]); } System.out.println("Total errors in file: " + errorCount + ", lines: " + lineErrorCount); @@ -160,21 +160,21 @@ public final class TestNormalization { } String otherList = ""; if (!base.equals(other)) { - otherList = "(" + Default.ucd.getCodeAndName(other) + ")"; + otherList = "(" + Default.ucd().getCodeAndName(other) + ")"; } out.println("DIFF " + type + ": " - + Default.ucd.getCodeAndName(base) + " != " + + Default.ucd().getCodeAndName(base) + " != " + type + otherList - + " == " + Default.ucd.getCodeAndName(trans) + + " == " + Default.ucd().getCodeAndName(trans) + temp ); return 1; } } catch (Exception e) { throw new ChainException("DIFF " + type + ": " - + Default.ucd.getCodeAndName(base) + " != " - + type + "(" + Default.ucd.getCodeAndName(other) + ")", new Object[]{}, e); + + Default.ucd().getCodeAndName(base) + " != " + + type + "(" + Default.ucd().getCodeAndName(other) + ")", new Object[]{}, e); } return 0; } @@ -188,10 +188,10 @@ public final class TestNormalization { if ((missing & 0xFFF) == 0) System.out.println("# " + Utility.hex(missing)); if (charsListed.get(missing)) continue; String x = UTF32.valueOf32(missing); - errorCount += check("NFC", Default.nfc, x); - errorCount += check("NFD", Default.nfd, x); - errorCount += check("NFKC", Default.nfkc, x); - errorCount += check("NFKD", Default.nfkd, x); + errorCount += check("NFC", Default.nfc(), x); + errorCount += check("NFD", Default.nfd(), x); + errorCount += check("NFKC", Default.nfkc(), x); + errorCount += check("NFKD", Default.nfkd(), x); } } @@ -200,9 +200,9 @@ public final class TestNormalization { UnicodeSet leading = new UnicodeSet(); UnicodeSet trailing = new UnicodeSet(); for (int i = 0; i <= 0x10FFFF; ++i) { - if (Default.nfc.isLeading(i)) leading.add(i); - if (Default.ucd.getCombiningClass(i) != 0) continue; - if (Default.nfc.isTrailing(i)) trailing.add(i); + if (Default.nfc().isLeading(i)) leading.add(i); + if (Default.ucd().getCombiningClass(i) != 0) continue; + if (Default.nfc().isTrailing(i)) trailing.add(i); } System.out.println("Leading: " + leading.size()); System.out.println("Trailing Starters: " + trailing.size()); @@ -214,12 +214,12 @@ public final class TestNormalization { trail.reset(); followers.clear(); while (trail.next()) { - if (Default.nfc.getComposition(lead.codepoint, trail.codepoint) != 0xFFFF) { + if (Default.nfc().getComposition(lead.codepoint, trail.codepoint) != 0xFFFF) { followers.add(trail.codepoint); } } if (followers.size() == 0) continue; - System.out.println(Default.ucd.getCode(lead.codepoint) + System.out.println(Default.ucd().getCode(lead.codepoint) + "\t" + followers.toPattern(true)); UnicodeSet possLead = (UnicodeSet) map.get(followers); if (possLead == null) { diff --git a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java index 673c6cece4b..b2d9b10a2be 100644 --- a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java +++ b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java @@ -35,36 +35,37 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { add(new ToolUnicodeProperty(name)); } add(new UnicodeProperty.SimpleProperty() { - {set("Name", "na", "", UnicodeProperty.STRING);} - public String getPropertyValue(int codepoint) { + {set("Name", "na", UnicodeProperty.STRING, "");} + public String getValue(int codepoint) { if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getName(codepoint); } }); add(new UnicodeProperty.SimpleProperty() { - {set("Block", "blk", "", UnicodeProperty.STRING);} - public String getPropertyValue(int codepoint) { + {set("Block", "blk", UnicodeProperty.ENUMERATED, + ucd.getBlockNames(null));} + public String getValue(int codepoint) { //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getBlock(codepoint); } }); add(new UnicodeProperty.SimpleProperty() { - {set("Bidi_Mirroring_Glyph", "bmg", "", UnicodeProperty.STRING);} - public String getPropertyValue(int codepoint) { + {set("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, "");} + public String getValue(int codepoint) { //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getBidiMirror(codepoint); } }); add(new UnicodeProperty.SimpleProperty() { - {set("Case_Folding", "cf", "", UnicodeProperty.STRING);} - public String getPropertyValue(int codepoint) { + {set("Case_Folding", "cf", UnicodeProperty.STRING, "");} + public String getValue(int codepoint) { //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD); } }); add(new UnicodeProperty.SimpleProperty() { - {set("Numeric_Value", "nv", "", UnicodeProperty.NUMERIC);} - public String getPropertyValue(int codepoint) { + {set("Numeric_Value", "nv", UnicodeProperty.NUMERIC, "");} + public String getValue(int codepoint) { double num = ucd.getNumericValue(codepoint); if (Double.isNaN(num)) return null; return Double.toString(num); @@ -108,8 +109,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { setName(propertyAlias); } - public Collection getAvailablePropertyValueAliases(Collection result) { - int type = getPropertyType() & ~EXTENDED_BIT; + public Collection getAvailableValueAliases(Collection result) { + int type = getType() & ~EXTENDED_BIT; if (type == STRING) result.add(""); else if (type == NUMERIC) result.add(""); else if (type == BINARY) { @@ -155,7 +156,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { return result; } - public Collection getPropertyAliases(Collection result) { + public Collection getAliases(Collection result) { String longName = up.getName(UCD_Types.LONG); addUnique(Utility.getUnskeleton(longName, true), result); String shortName = up.getName(UCD_Types.SHORT); @@ -163,12 +164,12 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { return result; } - public Collection getPropertyValueAliases(String valueAlias, Collection result) { + public Collection getValueAliases(String valueAlias, Collection result) { // TODO Auto-generated method stub return result; } - public String getPropertyValue(int codepoint) { + public String getValue(int codepoint) { byte style = UCD_Types.LONG; String temp = null; boolean titlecase = false; @@ -200,7 +201,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { temp = (ucd.getHangulSyllableTypeID_fromIndex(ucd.getHangulSyllableType(codepoint),style)); break; } if (temp != null) return Utility.getUnskeleton(temp,titlecase); - if (getPropertyType() == BINARY) { + if (getType() == BINARY) { return up.hasValue(codepoint) ? "True" : "False"; } return ""; diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java index 0a504a6d4cf..f5a3218a67c 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java @@ -5,14 +5,15 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $ -* $Date: 2004/02/06 18:30:20 $ -* $Revision: 1.29 $ +* $Date: 2004/02/07 01:01:13 $ +* $Revision: 1.30 $ * ******************************************************************************* */ package com.ibm.text.UCD; +import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.ArrayList; @@ -27,6 +28,8 @@ import java.io.FileInputStream; import java.io.BufferedReader; import com.ibm.text.utility.*; +import com.ibm.icu.dev.test.util.BagFormatter; +import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.icu.text.UnicodeSet; public final class UCD implements UCD_Types { @@ -1503,10 +1506,6 @@ to guarantee identifier closure. UData uData = new UData(); uData.readBytes(dataIn); - if (uData.codePoint == SPOT_CHECK) { - System.out.println("SPOT-CHECK: " + uData); - } - //T = Mc + (Cf - ZWNJ - ZWJ) int cp = uData.codePoint; byte old = uData.joiningType; @@ -1571,6 +1570,18 @@ to guarantee identifier closure. } return NOBLOCK; } + + public Collection getBlockNames(Collection result) { + if (result == null) result = new ArrayList(); + if (blocks == null) loadBlocks(); + Iterator it = blocks.iterator(); + while (it.hasNext()) { + BlockData data = (BlockData) it.next(); + UnicodeProperty.addUnique(data.name, result); + } + UnicodeProperty.addUnique(NOBLOCK, result); + return result; + } public boolean getBlockData(int blockId, BlockData output) { if (blocks == null) loadBlocks(); diff --git a/tools/unicodetools/com/ibm/text/UCD/UData.java b/tools/unicodetools/com/ibm/text/UCD/UData.java index 020f1d1d374..d37abc87444 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UData.java +++ b/tools/unicodetools/com/ibm/text/UCD/UData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $ -* $Date: 2003/07/21 15:50:05 $ -* $Revision: 1.9 $ +* $Date: 2004/02/07 01:01:13 $ +* $Revision: 1.10 $ * ******************************************************************************* */ @@ -186,7 +186,7 @@ class UData implements UCD_Types { static final byte ABBREVIATED = 0, FULL = 1; public String toString() { - return toString(Default.ucd, FULL); + return toString(Default.ucd(), FULL); } public String toString(UCD ucd, byte style) { diff --git a/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java b/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java index 1477340e35d..75e5ea5aa90 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java +++ b/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $ -* $Date: 2004/02/06 18:30:19 $ -* $Revision: 1.15 $ +* $Date: 2004/02/07 01:01:13 $ +* $Revision: 1.16 $ * ******************************************************************************* */ @@ -24,7 +24,7 @@ public final class UnifiedBinaryProperty extends UCDProperty { // DerivedProperty dp; public static UCDProperty make(int propMask) { - return make(propMask, Default.ucd); + return make(propMask, Default.ucd()); } public static UCDProperty make(int propMask, UCD ucd) { @@ -323,7 +323,7 @@ public final class UnifiedBinaryProperty extends UCDProperty { if (shortOne.length() == 0) shortOne = "xx"; String longOne = getValue(LONG); if (majorProp == (SCRIPT>>8)) { - longOne = Default.ucd.getCase(longOne, FULL, TITLE); + longOne = Default.ucd().getCase(longOne, FULL, TITLE); } if (longOne.length() == 0) longOne = "none"; diff --git a/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java b/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java index 36e405c9dbc..999b30fafa6 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java +++ b/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $ -* $Date: 2004/02/06 18:30:18 $ -* $Revision: 1.5 $ +* $Date: 2004/02/07 01:01:12 $ +* $Revision: 1.6 $ * ******************************************************************************* */ @@ -23,7 +23,7 @@ public final class UnifiedProperty extends UCDProperty { // DerivedProperty dp; public static UCDProperty make(int propMask) { - return make(propMask, Default.ucd); + return make(propMask, Default.ucd()); } public static UCDProperty make(int propMask, UCD ucd) { diff --git a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java index ff30a280394..f54a6f19e92 100644 --- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $ -* $Date: 2004/02/06 18:30:18 $ -* $Revision: 1.23 $ +* $Date: 2004/02/07 01:01:12 $ +* $Revision: 1.24 $ * ******************************************************************************* */ @@ -32,25 +32,25 @@ public class VerifyUCD implements UCD_Types { static final boolean DEBUG = false; static void checkDecompFolding() { - Default.setUCD(); + UnicodeSet sum = new UnicodeSet(); for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAllocated(cp)) continue; - byte cat = Default.ucd.getCategory(cp); + if (!Default.ucd().isAllocated(cp)) continue; + byte cat = Default.ucd().getCategory(cp); if (cat == UNASSIGNED || cat == PRIVATE_USE) continue; - String decomp = Default.nfd.normalize(cp); - String foldDecomp = Default.ucd.getCase(decomp, FULL, FOLD); - int d0 = Default.ucd.getCombiningClass(decomp.charAt(0)); - int dL = Default.ucd.getCombiningClass(decomp.charAt(decomp.length()-1)); - int f0 = Default.ucd.getCombiningClass(foldDecomp.charAt(0)); - int fL = Default.ucd.getCombiningClass(foldDecomp.charAt(decomp.length()-1)); + String decomp = Default.nfd().normalize(cp); + String foldDecomp = Default.ucd().getCase(decomp, FULL, FOLD); + int d0 = Default.ucd().getCombiningClass(decomp.charAt(0)); + int dL = Default.ucd().getCombiningClass(decomp.charAt(decomp.length()-1)); + int f0 = Default.ucd().getCombiningClass(foldDecomp.charAt(0)); + int fL = Default.ucd().getCombiningClass(foldDecomp.charAt(decomp.length()-1)); if (d0 != f0 || dL != fL) { Utility.fixDot(); System.out.println(); - System.out.println("Exception: " + Default.ucd.getCodeAndName(cp)); - System.out.println("Decomp: " + Default.ucd.getCodeAndName(decomp)); - System.out.println("FoldedDecomp: " + Default.ucd.getCodeAndName(foldDecomp)); + System.out.println("Exception: " + Default.ucd().getCodeAndName(cp)); + System.out.println("Decomp: " + Default.ucd().getCodeAndName(decomp)); + System.out.println("FoldedDecomp: " + Default.ucd().getCodeAndName(foldDecomp)); System.out.println("d0: " + d0 + ", " + "dL: " + dL + ", " + "f0: " + f0 + ", " @@ -63,20 +63,20 @@ public class VerifyUCD implements UCD_Types { } static void oneTime() { - Default.setUCD(); + int[] testSet = {0x10000, 'a', 0xE0000, '\u0221'}; // 10000 for (int i = 0; i < testSet.length; ++i) { int item = testSet[i]; - System.out.println(Default.ucd.getCode(item)); + System.out.println(Default.ucd().getCode(item)); - boolean ass = Default.ucd.isAssigned(item); + boolean ass = Default.ucd().isAssigned(item); System.out.println(ass ? " assigned" : " unassigned"); - ass = Default.ucd.isAllocated(item); + ass = Default.ucd().isAllocated(item); System.out.println(ass ? " allocated" : " unallocated"); - String name = Default.ucd.getName(item, SHORT); + String name = Default.ucd().getName(item, SHORT); System.out.println(" " + name); - name = Default.ucd.getName(item); + name = Default.ucd().getName(item); System.out.println(" " + name); System.out.println(); @@ -129,8 +129,8 @@ public class VerifyUCD implements UCD_Types { } byte getProp(int cp) { - byte cat = Default.ucd.getCategory(cp); - if (cat == Cn && Default.ucd.getBinaryProperty(cp, Noncharacter_Code_Point)) { + byte cat = Default.ucd().getCategory(cp); + if (cat == Cn && Default.ucd().getBinaryProperty(cp, Noncharacter_Code_Point)) { return NC; } return cat; @@ -140,14 +140,14 @@ public class VerifyUCD implements UCD_Types { if (prop == NC) { return "NC"; } - return Default.ucd.getCategoryID_fromIndex(prop); + return Default.ucd().getCategoryID_fromIndex(prop); } String getName(byte prop) { if (prop >= LIMIT_CATEGORY) return "???" + prop; if (prop == NC) { return "Noncharacter"; } - String name = Default.ucd.getCategoryID_fromIndex(prop, LONG); + String name = Default.ucd().getCategoryID_fromIndex(prop, LONG); if (prop == Cn) name += " - NC"; return name; } @@ -177,15 +177,15 @@ public class VerifyUCD implements UCD_Types { } byte getProp(int cp) { - return Default.ucd.getScript(cp); + return Default.ucd().getScript(cp); } String getCode(byte prop) { if (prop >= LIMIT_SCRIPT) return "???" + prop; - return Default.ucd.getScriptID_fromIndex(prop, SHORT); + return Default.ucd().getScriptID_fromIndex(prop, SHORT); } String getName(byte prop) { if (prop >= LIMIT_SCRIPT) return "???" + prop; - return Default.ucd.getScriptID_fromIndex(prop, LONG); + return Default.ucd().getScriptID_fromIndex(prop, LONG); } byte getPermutation(byte prop) { if (prop == LIMIT_SCRIPT-1) return COMMON_SCRIPT; @@ -215,11 +215,11 @@ public class VerifyUCD implements UCD_Types { int[] totalCount = new int[5]; - Default.setUCD(); + byte cat; for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAllocated(cp)) { + if (!Default.ucd().isAllocated(cp)) { cat = prop.getUnallocatedProp(); setSample(count[cat], sample[cat], 0, cp); continue; @@ -227,16 +227,16 @@ public class VerifyUCD implements UCD_Types { cat = prop.getProp(cp); setSample(count[cat], sample[cat], 0, cp); - if (checkNormalizer(Default.nfd, cp)) { + if (checkNormalizer(Default.nfd(), cp)) { setSample(count[cat], sample[cat], NFD+1, cp); } - if (checkNormalizer(Default.nfc, cp)) { + if (checkNormalizer(Default.nfc(), cp)) { setSample(count[cat], sample[cat], NFC+1, cp); } - if (checkNormalizer(Default.nfkd, cp)) { + if (checkNormalizer(Default.nfkd(), cp)) { setSample(count[cat], sample[cat], NFKD+1, cp); } - if (checkNormalizer(Default.nfkc, cp)) { + if (checkNormalizer(Default.nfkc(), cp)) { setSample(count[cat], sample[cat], NFKC+1, cp); } @@ -265,7 +265,7 @@ public class VerifyUCD implements UCD_Types { else { System.out.println(" "); System.out.println(" "); + Default.ucd().getCodeAndName(sample[i][j]) + "'>" + quote(sample[i][j]) + ""); } subtotalCount[j] += count[i][j]; totalCount[j] += count[i][j]; @@ -279,7 +279,7 @@ public class VerifyUCD implements UCD_Types { } static public String quote(int cp) { - byte cat2 = Default.ucd.getCategory(cp); + byte cat2 = Default.ucd().getCategory(cp); if (cat2 == Zs || cat2 == Zp || cat2 == Zl) return " "; if (cat2 == Cc || cat2 == Cs) return "??"; if (cat2 == Mn || cat2 == Me || cat2 == Mc) return "◌&#" + cp + ";"; @@ -291,7 +291,7 @@ public class VerifyUCD implements UCD_Types { int value = array[index]; if (value == 0) { array[index] = cp; - } else if (Default.ucd.isAllocated(cp)) { + } else if (Default.ucd().isAllocated(cp)) { int ncount1 = getNFCount(value, index); int ncount2 = getNFCount(cp, index); if (ncount1 != ncount2) { @@ -306,10 +306,10 @@ public class VerifyUCD implements UCD_Types { public static int getNFCount(int cp, int index) { int count = 0; - boolean nfc1 = checkNormalizer(Default.nfc, cp); - boolean nfd1 = checkNormalizer(Default.nfd, cp); - boolean nfkc1 = checkNormalizer(Default.nfkc, cp); - boolean nfkd1 = checkNormalizer(Default.nfkd, cp); + boolean nfc1 = checkNormalizer(Default.nfc(), cp); + boolean nfd1 = checkNormalizer(Default.nfd(), cp); + boolean nfkc1 = checkNormalizer(Default.nfkc(), cp); + boolean nfkd1 = checkNormalizer(Default.nfkd(), cp); if (nfc1) count += 1; if (nfd1) count += 2; if (nfkc1) count += 4; @@ -333,23 +333,23 @@ public class VerifyUCD implements UCD_Types { String s = x.normalize(cp); boolean sResult = !s.equals(UTF16.valueOf(cp)); if (result != sResult) { - System.out.println("Failure with " + x + " at " + Default.ucd.getCodeAndName(cp)); + System.out.println("Failure with " + x + " at " + Default.ucd().getCodeAndName(cp)); } } return result; } public static void checkBIDI() { - Default.setUCD(); + for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAllocated(cp)) continue; + if (!Default.ucd().isAllocated(cp)) continue; - if (Default.nfd.isNormalized(cp)) continue; + if (Default.nfd().isNormalized(cp)) continue; - String decomp = Default.nfd.normalize(cp); - String comp = Default.nfc.normalize(cp); + String decomp = Default.nfd().normalize(cp); + String comp = Default.nfc().normalize(cp); String source = UTF16.valueOf(cp); String bidiDecomp = getBidi(decomp, true); @@ -358,9 +358,9 @@ public class VerifyUCD implements UCD_Types { if (!bidiDecomp.equals(bidiSource) || !bidiComp.equals(bidiSource)) { Utility.fixDot(); - System.out.println(Default.ucd.getCodeAndName(cp) + ": " + getBidi(source, false)); - System.out.println("\tNFC: " + Default.ucd.getCodeAndName(comp) + ": " + getBidi(comp, false)); - System.out.println("\tNFD: " + Default.ucd.getCodeAndName(decomp) + ": " + getBidi(decomp, false)); + System.out.println(Default.ucd().getCodeAndName(cp) + ": " + getBidi(source, false)); + System.out.println("\tNFC: " + Default.ucd().getCodeAndName(comp) + ": " + getBidi(comp, false)); + System.out.println("\tNFD: " + Default.ucd().getCodeAndName(decomp) + ": " + getBidi(decomp, false)); } } } @@ -371,7 +371,7 @@ public class VerifyUCD implements UCD_Types { int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i); - byte bidi = Default.ucd.getBidiClass(cp); + byte bidi = Default.ucd().getBidiClass(cp); if (compact) { if (bidi == BIDI_NSM) { if (lastBidi != -1) bidi = lastBidi; @@ -380,15 +380,15 @@ public class VerifyUCD implements UCD_Types { continue; } } - result += Default.ucd.getCase( - Default.ucd.getBidiClassID_fromIndex(bidi, SHORT), FULL, TITLE); + result += Default.ucd().getCase( + Default.ucd().getBidiClassID_fromIndex(bidi, SHORT), FULL, TITLE); lastBidi = bidi; } return result; } public static void verify() throws IOException { - Default.setUCD(); + checkIdentical("ea=h", "dt=nar"); checkIdentical("ea=f", "dt=wide"); @@ -436,7 +436,7 @@ can help you narrow these down. } static final void checkCase3 () { - Default.setUCD(); + checkNF_AndCase("\u0130", true); checkNF_AndCase("\u0131", true); @@ -447,8 +447,8 @@ can help you narrow these down. for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAllocated(cp)) continue; - byte cat = Default.ucd.getCategory(cp); + if (!Default.ucd().isAllocated(cp)) continue; + byte cat = Default.ucd().getCategory(cp); // check if canonical equivalents are case-mapped to canonical equivalents if (cat != PRIVATE_USE && cat != SURROGATE) { String str = UTF16.valueOf(cp); @@ -467,31 +467,31 @@ can help you narrow these down. if (!checkNF_AndCase(s, false)) badChars.add(cp); } } catch (StringIndexOutOfBoundsException e) { - System.out.println("Problem with " + Default.ucd.getCodeAndName(str)); + System.out.println("Problem with " + Default.ucd().getCodeAndName(str)); throw e; } } if (false) { - if (softdot == null) softdot = DerivedProperty.make(Type_i, Default.ucd); - if (Default.ucd.getBinaryProperty(cp, Soft_Dotted) != + if (softdot == null) softdot = DerivedProperty.make(Type_i, Default.ucd()); + if (Default.ucd().getBinaryProperty(cp, Soft_Dotted) != softdot.hasValue(cp)) { - System.out.println("FAIL: " + Default.ucd.getCodeAndName(cp)); - System.out.println("Soft_Dotted='" + Default.ucd.getBinaryPropertiesID(cp, Soft_Dotted) + System.out.println("FAIL: " + Default.ucd().getCodeAndName(cp)); + System.out.println("Soft_Dotted='" + Default.ucd().getBinaryPropertiesID(cp, Soft_Dotted) + "', DerivedSD=" + softdot.getValue(cp) + "'"); } } } System.out.println(); - Utility.showSetNames("", badChars, false, Default.ucd); + Utility.showSetNames("", badChars, false, Default.ucd()); } static void checkIdentical(String ubpName1, String ubpName2) { - UCDProperty prop1 = UnifiedBinaryProperty.make(ubpName1, Default.ucd); + UCDProperty prop1 = UnifiedBinaryProperty.make(ubpName1, Default.ucd()); UnicodeSet set1 = prop1.getSet(); - UCDProperty prop2 = UnifiedBinaryProperty.make(ubpName2, Default.ucd); + UCDProperty prop2 = UnifiedBinaryProperty.make(ubpName2, Default.ucd()); UnicodeSet set2 = prop2.getSet(); UnicodeSet set1minus2 = new UnicodeSet(set1); set1minus2.removeAll(set2); @@ -506,31 +506,31 @@ can help you narrow these down. System.out.println("FAIL: " + prop1.getFullName(LONG) + " != " + prop2.getFullName(LONG)); if (!set1minus2.isEmpty()) { System.out.println(" In " + prop1.getFullName(LONG) + " but not " + prop2.getFullName(LONG)); - Utility.showSetNames(" " + prop1.getFullName(SHORT) + ": ", set1minus2, false, Default.ucd); + Utility.showSetNames(" " + prop1.getFullName(SHORT) + ": ", set1minus2, false, Default.ucd()); } if (!set2minus1.isEmpty()) { System.out.println(" In " + prop2.getFullName(LONG) + " but not " + prop1.getFullName(LONG)); - Utility.showSetNames(" " + prop2.getFullName(SHORT) + ": ", set2minus1, false, Default.ucd); + Utility.showSetNames(" " + prop2.getFullName(SHORT) + ": ", set2minus1, false, Default.ucd()); } System.out.println(); } static boolean checkNF_AndCase(String source, boolean both) { boolean result = true; - String decomp = Default.nfd.normalize(source); + String decomp = Default.nfd().normalize(source); if (!decomp.equals(source)) { - result &= checkNFC("Lower", source, decomp, Default.ucd.getCase(source, FULL, LOWER), Default.ucd.getCase(decomp, FULL, LOWER)); - result &= checkNFC("Upper", source, decomp, Default.ucd.getCase(source, FULL, UPPER), Default.ucd.getCase(decomp, FULL, UPPER)); - result &= checkNFC("Title", source, decomp, Default.ucd.getCase(source, FULL, TITLE), Default.ucd.getCase(decomp, FULL, TITLE)); - result &= checkNFC("Fold", source, decomp, Default.ucd.getCase(source, FULL, FOLD), Default.ucd.getCase(decomp, FULL, FOLD)); + result &= checkNFC("Lower", source, decomp, Default.ucd().getCase(source, FULL, LOWER), Default.ucd().getCase(decomp, FULL, LOWER)); + result &= checkNFC("Upper", source, decomp, Default.ucd().getCase(source, FULL, UPPER), Default.ucd().getCase(decomp, FULL, UPPER)); + result &= checkNFC("Title", source, decomp, Default.ucd().getCase(source, FULL, TITLE), Default.ucd().getCase(decomp, FULL, TITLE)); + result &= checkNFC("Fold", source, decomp, Default.ucd().getCase(source, FULL, FOLD), Default.ucd().getCase(decomp, FULL, FOLD)); if (!both) return result; - result &= checkNFC("SLower", source, decomp, Default.ucd.getCase(source, SIMPLE, LOWER), Default.ucd.getCase(decomp, SIMPLE, LOWER)); - result &= checkNFC("SUpper", source, decomp, Default.ucd.getCase(source, SIMPLE, UPPER), Default.ucd.getCase(decomp, SIMPLE, UPPER)); - result &= checkNFC("STitle", source, decomp, Default.ucd.getCase(source, SIMPLE, TITLE), Default.ucd.getCase(decomp, SIMPLE, TITLE)); - result &= checkNFC("SFold", source, decomp, Default.ucd.getCase(source, SIMPLE, TITLE), Default.ucd.getCase(decomp, SIMPLE, TITLE)); + result &= checkNFC("SLower", source, decomp, Default.ucd().getCase(source, SIMPLE, LOWER), Default.ucd().getCase(decomp, SIMPLE, LOWER)); + result &= checkNFC("SUpper", source, decomp, Default.ucd().getCase(source, SIMPLE, UPPER), Default.ucd().getCase(decomp, SIMPLE, UPPER)); + result &= checkNFC("STitle", source, decomp, Default.ucd().getCase(source, SIMPLE, TITLE), Default.ucd().getCase(decomp, SIMPLE, TITLE)); + result &= checkNFC("SFold", source, decomp, Default.ucd().getCase(source, SIMPLE, TITLE), Default.ucd().getCase(decomp, SIMPLE, TITLE)); } return result; } @@ -538,12 +538,12 @@ can help you narrow these down. static final boolean SHOW_NFC_DIFFERENCE = false; static boolean checkNFC(String label, String source, String decomp, String casedCp, String casedDecomp) { - if (!Default.nfd.normalize(casedCp).equals(Default.nfd.normalize(casedDecomp))) { + if (!Default.nfd().normalize(casedCp).equals(Default.nfd().normalize(casedDecomp))) { if (SHOW_NFC_DIFFERENCE) { Utility.fixDot(); - System.out.println("FAIL CASE CE: " + label + " (" + Default.ucd.getCodeAndName(source) + ")"); - System.out.println("\t" + Default.ucd.getCode(source) + " => " + Default.ucd.getCode(casedCp)); - System.out.println("\t" + Default.ucd.getCode(decomp) + " => " + Default.ucd.getCode(casedDecomp)); + System.out.println("FAIL CASE CE: " + label + " (" + Default.ucd().getCodeAndName(source) + ")"); + System.out.println("\t" + Default.ucd().getCode(source) + " => " + Default.ucd().getCode(casedCp)); + System.out.println("\t" + Default.ucd().getCode(decomp) + " => " + Default.ucd().getCode(casedDecomp)); } return false; } @@ -565,13 +565,13 @@ can help you narrow these down. */ static void checkAgainstOtherVersion(String otherVersion) { - Default.setUCD(); + UCD ucd2 = UCD.make(otherVersion); for (int cp = 0; cp <= 0x10FFFF; ++cp) { - UData curr = Default.ucd.get(cp, true); + UData curr = Default.ucd().get(cp, true); UData other = ucd2.get(cp, true); if (!curr.equals(other)) { - System.out.println("Difference at " + Default.ucd.getCodeAndName(cp)); + System.out.println("Difference at " + Default.ucd().getCodeAndName(cp)); System.out.println(curr); System.out.println(curr); System.out.println(); @@ -580,7 +580,7 @@ can help you narrow these down. } static void generateXML() throws IOException { - Default.setUCD(); + String filename = "UCD.xml"; PrintWriter log = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX); @@ -589,11 +589,11 @@ can help you narrow these down. for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isRepresented(cp)) continue; + if (!Default.ucd().isRepresented(cp)) continue; if (cp == 0xE0026 || cp == 0x20000) { System.out.println("debug"); } - log.println(Default.ucd.toString(cp)); + log.println(Default.ucd().toString(cp)); } log.println(""); @@ -603,33 +603,33 @@ can help you narrow these down. static final byte MIXED = (byte)(UNCASED + 1); public static void checkCase() throws IOException { - Default.setUCD(); + Utility.fixDot(); System.out.println("checkCase"); String test = "The qui'ck br\u2019own 'fox jum\u00ADped ov\u200Ber th\u200Ce lazy dog."; - String ttest = Default.ucd.getCase(test, FULL, TITLE); + String ttest = Default.ucd().getCase(test, FULL, TITLE); PrintWriter titleTest = Utility.openPrintWriter("TestTitle.txt", Utility.LATIN1_UNIX); titleTest.println(test); titleTest.println(ttest); titleTest.close(); - System.out.println(Default.ucd.getCase("ABC,DE'F G\u0308H", FULL, TITLE)); + System.out.println(Default.ucd().getCase("ABC,DE'F G\u0308H", FULL, TITLE)); String fileName = "CaseDifferences.txt"; PrintWriter log = Utility.openPrintWriter(fileName, Utility.LATIN1_UNIX); for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isRepresented(cp) || Default.ucd.isPUA(cp)) continue; + if (!Default.ucd().isRepresented(cp) || Default.ucd().isPUA(cp)) continue; if (cp == '\u3371') { System.out.println("debug"); } - String x = Default.nfkd.normalize(cp); - String xu = Default.ucd.getCase(x, FULL, UPPER); - String xl = Default.ucd.getCase(x, FULL, LOWER); - String xt = Default.ucd.getCase(x, FULL, TITLE); + String x = Default.nfkd().normalize(cp); + String xu = Default.ucd().getCase(x, FULL, UPPER); + String xl = Default.ucd().getCase(x, FULL, LOWER); + String xt = Default.ucd().getCase(x, FULL, TITLE); byte caseCat = MIXED; if (xu.equals(xl)) caseCat = UNCASED; @@ -637,9 +637,9 @@ can help you narrow these down. else if (x.equals(xu)) caseCat = UPPER; else if (x.equals(xt)) caseCat = TITLE; - byte cat = Default.ucd.getCategory(cp); - boolean otherLower = Default.ucd.getBinaryProperty(cp, Other_Lowercase); - boolean otherUpper = Default.ucd.getBinaryProperty(cp, Other_Uppercase); + byte cat = Default.ucd().getCategory(cp); + boolean otherLower = Default.ucd().getBinaryProperty(cp, Other_Lowercase); + boolean otherUpper = Default.ucd().getBinaryProperty(cp, Other_Uppercase); byte oldCaseCat = (cat == Lu || otherUpper) ? UPPER : (cat == Ll || otherLower) ? LOWER : (cat == Lt) ? TITLE @@ -649,14 +649,14 @@ can help you narrow these down. log.println(UTF32.valueOf32(cp) + "\t" + names[caseCat] + "\t" + names[oldCaseCat] - + "\t" + Default.ucd.getCategoryID_fromIndex(cat) + + "\t" + Default.ucd().getCategoryID_fromIndex(cat) + "\t" + lowerNames[otherLower ? 1 : 0] + "\t" + upperNames[otherUpper ? 1 : 0] - + "\t" + Default.ucd.getCodeAndName(cp) - + "\t" + Default.ucd.getCodeAndName(x) - + "\t" + Default.ucd.getCodeAndName(xu) - + "\t" + Default.ucd.getCodeAndName(xl) - + "\t" + Default.ucd.getCodeAndName(xt) + + "\t" + Default.ucd().getCodeAndName(cp) + + "\t" + Default.ucd().getCodeAndName(x) + + "\t" + Default.ucd().getCodeAndName(xu) + + "\t" + Default.ucd().getCodeAndName(xl) + + "\t" + Default.ucd().getCodeAndName(xt) ); } } @@ -665,7 +665,7 @@ can help you narrow these down. } public static void checkCase2(boolean longForm) throws IOException { - Default.setUCD(); + Utility.fixDot(); System.out.println("checkCase"); @@ -689,24 +689,24 @@ can help you narrow these down. for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isRepresented(cp) || Default.ucd.isPUA(cp)) continue; + if (!Default.ucd().isRepresented(cp) || Default.ucd().isPUA(cp)) continue; if (cp == '\u0130') { System.out.println("debug"); } String x = UTF32.valueOf32(cp); - String dx = Default.nfd.normalize(cp); - String cx = Default.nfc.normalize(cp); + String dx = Default.nfd().normalize(cp); + String cx = Default.nfc().normalize(cp); - String ux = Default.ucd.getCase(x, FULL, UPPER); - String lx = Default.ucd.getCase(x, FULL, LOWER); - String tx = Default.ucd.getCase(x, FULL, TITLE); + String ux = Default.ucd().getCase(x, FULL, UPPER); + String lx = Default.ucd().getCase(x, FULL, LOWER); + String tx = Default.ucd().getCase(x, FULL, TITLE); if (x.equals(dx) && dx.equals(cx) && cx.equals(ux) && ux.equals(lx) && lx.equals(tx)) continue; - String cux = Default.nfc.normalize(ux); - String clx = Default.nfc.normalize(lx); - String ctx = Default.nfc.normalize(tx); + String cux = Default.nfc().normalize(ux); + String clx = Default.nfc().normalize(lx); + String ctx = Default.nfc().normalize(tx); if (x.equals(cx)) { boolean needBreak = false; @@ -721,21 +721,21 @@ can help you narrow these down. + Utility.hex(lx) + "; " + Utility.hex(tx) + "; " + Utility.hex(ux) + "; # " - + Default.ucd.getName(x)); + + Default.ucd().getName(x)); log.println("# should be:"); log.println( Utility.hex(x) + "; " + Utility.hex(clx) + "; " + Utility.hex(ctx) + "; " + Utility.hex(cux) + "; # " - + Default.ucd.getName(x)); + + Default.ucd().getName(x)); log.println(); } } - String dux = Default.nfd.normalize(ux); - String dlx = Default.nfd.normalize(lx); - String dtx = Default.nfd.normalize(tx); + String dux = Default.nfd().normalize(ux); + String dlx = Default.nfd().normalize(lx); + String dtx = Default.nfd().normalize(tx); @@ -755,32 +755,32 @@ can help you narrow these down. if (!startdx.startsWith(startdux) || !startdx.startsWith(startdtx) || !startdx.startsWith(startdlx) || !enddx.endsWith(enddux) || !enddx.endsWith(enddtx) || !enddx.endsWith(enddlx)) { - log.println("Combining Class Difference for " + Default.ucd.getCodeAndName(x)); - log.println("x: " + Default.ucd.getCodeAndName(dx) + ", " + Utility.hex(startdx) + ", " + Utility.hex(enddx)); - log.println("ux: " + Default.ucd.getCodeAndName(dux) + ", " + Utility.hex(startdux) + ", " + Utility.hex(enddux)); - log.println("tx: " + Default.ucd.getCodeAndName(dtx) + ", " + Utility.hex(startdtx) + ", " + Utility.hex(enddtx)); - log.println("lx: " + Default.ucd.getCodeAndName(dlx) + ", " + Utility.hex(startdlx) + ", " + Utility.hex(enddlx)); + log.println("Combining Class Difference for " + Default.ucd().getCodeAndName(x)); + log.println("x: " + Default.ucd().getCodeAndName(dx) + ", " + Utility.hex(startdx) + ", " + Utility.hex(enddx)); + log.println("ux: " + Default.ucd().getCodeAndName(dux) + ", " + Utility.hex(startdux) + ", " + Utility.hex(enddux)); + log.println("tx: " + Default.ucd().getCodeAndName(dtx) + ", " + Utility.hex(startdtx) + ", " + Utility.hex(enddtx)); + log.println("lx: " + Default.ucd().getCodeAndName(dlx) + ", " + Utility.hex(startdlx) + ", " + Utility.hex(enddlx)); log.println(); } if (!longForm) continue; - String udx = Default.ucd.getCase(dx, FULL, UPPER); - String ldx = Default.ucd.getCase(dx, FULL, LOWER); - String tdx = Default.ucd.getCase(dx, FULL, TITLE); + String udx = Default.ucd().getCase(dx, FULL, UPPER); + String ldx = Default.ucd().getCase(dx, FULL, LOWER); + String tdx = Default.ucd().getCase(dx, FULL, TITLE); - String ucx = Default.ucd.getCase(cx, FULL, UPPER); - String lcx = Default.ucd.getCase(cx, FULL, LOWER); - String tcx = Default.ucd.getCase(cx, FULL, TITLE); + String ucx = Default.ucd().getCase(cx, FULL, UPPER); + String lcx = Default.ucd().getCase(cx, FULL, LOWER); + String tcx = Default.ucd().getCase(cx, FULL, TITLE); - String dudx = Default.nfd.normalize(udx); - String dldx = Default.nfd.normalize(ldx); - String dtdx = Default.nfd.normalize(tdx); + String dudx = Default.nfd().normalize(udx); + String dldx = Default.nfd().normalize(ldx); + String dtdx = Default.nfd().normalize(tdx); - String cucx = Default.nfc.normalize(ucx); - String clcx = Default.nfc.normalize(lcx); - String ctcx = Default.nfc.normalize(tcx); + String cucx = Default.nfc().normalize(ucx); + String clcx = Default.nfc().normalize(lcx); + String ctcx = Default.nfc().normalize(tcx); if (!dux.equals(udx) @@ -797,77 +797,77 @@ can help you narrow these down. || !ctx.equals(ctcx) ) { log.println(); - log.println("Difference at " + Default.ucd.getCodeAndName(cp)); - if (!x.equals(ux)) log.println("\tu(cp):\t" + Default.ucd.getCodeAndName(ux)); - if (!x.equals(lx)) log.println("\tl(cp):\t" + Default.ucd.getCodeAndName(lx)); - if (!tx.equals(ux)) log.println("\tt(cp):\t" + Default.ucd.getCodeAndName(tx)); - if (!x.equals(dx)) log.println("\td(cp):\t" + Default.ucd.getCodeAndName(dx)); - if (!x.equals(cx)) log.println("\tc(cp):\t" + Default.ucd.getCodeAndName(cx)); + log.println("Difference at " + Default.ucd().getCodeAndName(cp)); + if (!x.equals(ux)) log.println("\tu(cp):\t" + Default.ucd().getCodeAndName(ux)); + if (!x.equals(lx)) log.println("\tl(cp):\t" + Default.ucd().getCodeAndName(lx)); + if (!tx.equals(ux)) log.println("\tt(cp):\t" + Default.ucd().getCodeAndName(tx)); + if (!x.equals(dx)) log.println("\td(cp):\t" + Default.ucd().getCodeAndName(dx)); + if (!x.equals(cx)) log.println("\tc(cp):\t" + Default.ucd().getCodeAndName(cx)); if (!dux.equals(udx)) { log.println(); - log.println("\td(u(cp)):\t" + Default.ucd.getCodeAndName(dux)); - log.println("\tu(d(cp)):\t" + Default.ucd.getCodeAndName(udx)); + log.println("\td(u(cp)):\t" + Default.ucd().getCodeAndName(dux)); + log.println("\tu(d(cp)):\t" + Default.ucd().getCodeAndName(udx)); } if (!dlx.equals(ldx)) { log.println(); - log.println("\td(l(cp)):\t" + Default.ucd.getCodeAndName(dlx)); - log.println("\tl(d(cp)):\t" + Default.ucd.getCodeAndName(ldx)); + log.println("\td(l(cp)):\t" + Default.ucd().getCodeAndName(dlx)); + log.println("\tl(d(cp)):\t" + Default.ucd().getCodeAndName(ldx)); } if (!dtx.equals(tdx)) { log.println(); - log.println("\td(t(cp)):\t" + Default.ucd.getCodeAndName(dtx)); - log.println("\tt(d(cp)):\t" + Default.ucd.getCodeAndName(tdx)); + log.println("\td(t(cp)):\t" + Default.ucd().getCodeAndName(dtx)); + log.println("\tt(d(cp)):\t" + Default.ucd().getCodeAndName(tdx)); } if (!cux.equals(ucx)) { log.println(); - log.println("\tc(u(cp)):\t" + Default.ucd.getCodeAndName(cux)); - log.println("\tu(c(cp)):\t" + Default.ucd.getCodeAndName(ucx)); + log.println("\tc(u(cp)):\t" + Default.ucd().getCodeAndName(cux)); + log.println("\tu(c(cp)):\t" + Default.ucd().getCodeAndName(ucx)); } if (!clx.equals(lcx)) { log.println(); - log.println("\tc(l(cp)):\t" + Default.ucd.getCodeAndName(clx)); - log.println("\tl(c(cp)):\t" + Default.ucd.getCodeAndName(lcx)); + log.println("\tc(l(cp)):\t" + Default.ucd().getCodeAndName(clx)); + log.println("\tl(c(cp)):\t" + Default.ucd().getCodeAndName(lcx)); } if (!ctx.equals(tcx)) { log.println(); - log.println("\tc(t(cp)):\t" + Default.ucd.getCodeAndName(ctx)); - log.println("\tt(c(cp)):\t" + Default.ucd.getCodeAndName(tcx)); + log.println("\tc(t(cp)):\t" + Default.ucd().getCodeAndName(ctx)); + log.println("\tt(c(cp)):\t" + Default.ucd().getCodeAndName(tcx)); } // ........... if (!udx.equals(dudx)) { log.println(); - log.println("\tu(d(cp)):\t" + Default.ucd.getCodeAndName(udx)); - log.println("\td(u(d(cp))):\t" + Default.ucd.getCodeAndName(dudx)); + log.println("\tu(d(cp)):\t" + Default.ucd().getCodeAndName(udx)); + log.println("\td(u(d(cp))):\t" + Default.ucd().getCodeAndName(dudx)); } if (!ldx.equals(dldx)) { log.println(); - log.println("\tl(d(cp)):\t" + Default.ucd.getCodeAndName(ldx)); - log.println("\td(l(d(cp))):\t" + Default.ucd.getCodeAndName(dldx)); + log.println("\tl(d(cp)):\t" + Default.ucd().getCodeAndName(ldx)); + log.println("\td(l(d(cp))):\t" + Default.ucd().getCodeAndName(dldx)); } if (!tdx.equals(dtdx)) { log.println(); - log.println("\tt(d(cp)):\t" + Default.ucd.getCodeAndName(tdx)); - log.println("\td(t(d(cp))):\t" + Default.ucd.getCodeAndName(dtdx)); + log.println("\tt(d(cp)):\t" + Default.ucd().getCodeAndName(tdx)); + log.println("\td(t(d(cp))):\t" + Default.ucd().getCodeAndName(dtdx)); } if (!ucx.equals(cucx)) { log.println(); - log.println("\tu(c(cp)):\t" + Default.ucd.getCodeAndName(ucx)); - log.println("\tc(u(c(cp))):\t" + Default.ucd.getCodeAndName(cucx)); + log.println("\tu(c(cp)):\t" + Default.ucd().getCodeAndName(ucx)); + log.println("\tc(u(c(cp))):\t" + Default.ucd().getCodeAndName(cucx)); } if (!lcx.equals(clcx)) { log.println(); - log.println("\tl(c(cp)):\t" + Default.ucd.getCodeAndName(lcx)); - log.println("\tc(l(c(cp))):\t" + Default.ucd.getCodeAndName(clcx)); + log.println("\tl(c(cp)):\t" + Default.ucd().getCodeAndName(lcx)); + log.println("\tc(l(c(cp))):\t" + Default.ucd().getCodeAndName(clcx)); } if (!tcx.equals(ctcx)) { log.println(); - log.println("\tt(c(cp)):\t" + Default.ucd.getCodeAndName(tcx)); - log.println("\tc(t(c(cp))):\t" + Default.ucd.getCodeAndName(ctcx)); + log.println("\tt(c(cp)):\t" + Default.ucd().getCodeAndName(tcx)); + log.println("\tc(t(c(cp))):\t" + Default.ucd().getCodeAndName(ctcx)); } } } @@ -880,7 +880,7 @@ can help you narrow these down. if (!doEnd) { for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i); - int cc = Default.ucd.getCombiningClass(cp); + int cc = Default.ucd().getCombiningClass(cp); if (cc == 0) { return s.substring(0, i); } @@ -888,7 +888,7 @@ can help you narrow these down. } else { for (int i = s.length(); i > 0; i -= UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i-1); // will go 2 before if necessary - int cc = Default.ucd.getCombiningClass(cp); + int cc = Default.ucd().getCombiningClass(cp); if (cc == 0) { return s.substring(i); } @@ -903,32 +903,32 @@ can help you narrow these down. static final String upperNames[] = {"", "Other_Upper"}; public static void CheckCaseFold() { - Default.setUCD(); + System.out.println("Checking Case Fold"); for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAssigned(cp) || Default.ucd.isPUA(cp)) continue; + if (!Default.ucd().isAssigned(cp) || Default.ucd().isPUA(cp)) continue; boolean failed = false; - String fullTest = Default.ucd.getCase(Default.ucd.getCase(cp, FULL, UPPER), FULL, LOWER); - String simpleTest = Default.ucd.getCase(Default.ucd.getCase(cp, SIMPLE, UPPER), SIMPLE, LOWER); + String fullTest = Default.ucd().getCase(Default.ucd().getCase(cp, FULL, UPPER), FULL, LOWER); + String simpleTest = Default.ucd().getCase(Default.ucd().getCase(cp, SIMPLE, UPPER), SIMPLE, LOWER); - String full = Default.ucd.getCase(cp, FULL, FOLD); - String simple = Default.ucd.getCase(cp, SIMPLE, FOLD); + String full = Default.ucd().getCase(cp, FULL, FOLD); + String simple = Default.ucd().getCase(cp, SIMPLE, FOLD); String realTest = "\u0360" + UTF16.valueOf(cp) + "\u0334"; - int ccc = Default.ucd.getCombiningClass(cp); + int ccc = Default.ucd().getCombiningClass(cp); for (byte style = FOLD; style < LIMIT_CASE; ++style) { - String fold_NFD = Default.nfd.normalize(Default.ucd.getCase(realTest, FULL, style)); - String NFD_fold = Default.ucd.getCase(Default.nfd.normalize(realTest), FULL, style); + String fold_NFD = Default.nfd().normalize(Default.ucd().getCase(realTest, FULL, style)); + String NFD_fold = Default.ucd().getCase(Default.nfd().normalize(realTest), FULL, style); if (!fold_NFD.equals(NFD_fold)) { Utility.fixDot(); - System.out.println("Case check fails at " + Default.ucd.getCodeAndName(cp)); - System.out.println("\t" + names2[style] + ", then NFD: " + Default.ucd.getCodeAndName(fold_NFD)); - System.out.println("\tNFD, then " + names2[style] + ": " + Default.ucd.getCodeAndName(NFD_fold)); + System.out.println("Case check fails at " + Default.ucd().getCodeAndName(cp)); + System.out.println("\t" + names2[style] + ", then NFD: " + Default.ucd().getCodeAndName(fold_NFD)); + System.out.println("\tNFD, then " + names2[style] + ": " + Default.ucd().getCodeAndName(NFD_fold)); failed = true; } } @@ -954,16 +954,16 @@ can help you narrow these down. if (!full.equals(fullTest)) { Utility.fixDot(); - System.out.println("Case fold fails at " + Default.ucd.getCodeAndName(cp)); - System.out.println(" fullFold(ch): " + Default.ucd.getCodeAndName(full)); - System.out.println(" fullUpper(fullLower(ch)): " + Default.ucd.getCodeAndName(fullTest)); + System.out.println("Case fold fails at " + Default.ucd().getCodeAndName(cp)); + System.out.println(" fullFold(ch): " + Default.ucd().getCodeAndName(full)); + System.out.println(" fullUpper(fullLower(ch)): " + Default.ucd().getCodeAndName(fullTest)); failed = true; } if (!simple.equals(simpleTest)) { Utility.fixDot(); - if (!failed) System.out.println("Case fold fails at " + Default.ucd.getCodeAndName(cp)); - System.out.println(" simpleFold(ch): " + Default.ucd.getCodeAndName(simple)); - System.out.println(" simpleUpper(simpleLower(ch)): " + Default.ucd.getCodeAndName(simpleTest)); + if (!failed) System.out.println("Case fold fails at " + Default.ucd().getCodeAndName(cp)); + System.out.println(" simpleFold(ch): " + Default.ucd().getCodeAndName(simple)); + System.out.println(" simpleUpper(simpleLower(ch)): " + Default.ucd().getCodeAndName(simpleTest)); failed = true; } if (failed) System.out.println(); @@ -971,7 +971,7 @@ can help you narrow these down. } public static void compareBlueberry() { - Default.setUCD(); + UnicodeSet NameStartChar = new UnicodeSet("[A-Z:_a-z\\u00C0-\\u02FF" + "\\u0370-\\u037D\\u037F-\\u2027\\u202A-\\u218F\\u2800-\\uD7FF" @@ -986,8 +986,8 @@ can help you narrow these down. System.out.println("NameChar:"); System.out.println("\t" + NameChar.toPattern(true)); - UCDProperty IDstart = DerivedProperty.make(Mod_ID_Start, Default.ucd); - UCDProperty IDcontinue = DerivedProperty.make(Mod_ID_Continue_NO_Cf, Default.ucd); + UCDProperty IDstart = DerivedProperty.make(Mod_ID_Start, Default.ucd()); + UCDProperty IDcontinue = DerivedProperty.make(Mod_ID_Continue_NO_Cf, Default.ucd()); UnicodeSet IDContinueMinusNameChar = new UnicodeSet(); UnicodeSet IDStartMinusNameChar = new UnicodeSet(); @@ -997,8 +997,8 @@ can help you narrow these down. for (int cp = 0; cp < 0x10FFFF; ++cp) { Utility.dot(cp); - if (Default.ucd.isPUA(cp)) continue; - if (!Default.ucd.isAssigned(cp) && !NameChar.contains(cp)) { + if (Default.ucd().isPUA(cp)) continue; + if (!Default.ucd().isAssigned(cp) && !NameChar.contains(cp)) { UnassignedMinusNameChar.add(cp); } else if (IDcontinue.hasValue(cp) && !NameChar.contains(cp)) { IDContinueMinusNameChar.add(cp); @@ -1012,7 +1012,7 @@ can help you narrow these down. } System.out.println("IDContinueMinusNameChar: "); System.out.println("\t" + IDContinueMinusNameChar.toPattern(true)); - Utility.showSetNames("\t", IDContinueMinusNameChar, false, Default.ucd); + Utility.showSetNames("\t", IDContinueMinusNameChar, false, Default.ucd()); System.out.println("IDStartMinusNameChar: "); System.out.println("\t" + IDStartMinusNameChar.toPattern(true)); System.out.println("IDStartMinusNameStartChar: "); @@ -1022,7 +1022,7 @@ can help you narrow these down. } public static void VerifyIDN() throws IOException { - Default.setUCD(); + System.out.println("VerifyIDN"); System.out.println(); @@ -1043,7 +1043,7 @@ can help you narrow these down. Utility.dot(cp); if (mappedOut.get(cp)) continue; - boolean ucdUnassigned = !Default.ucd.isAllocated(cp); + boolean ucdUnassigned = !Default.ucd().isAllocated(cp); boolean idnUnassigned = unassigned.get(cp); boolean guess = guessSet.get(cp); boolean idnProhibited = prohibited.get(cp); @@ -1072,14 +1072,14 @@ can help you narrow these down. if (cp == 0x3131) { System.out.println("Debug: " + idnProhibited + ", " + idnUnassigned - + ", " + !Default.nfkd.isNormalized(cp) - + ", " + Default.ucd.getCodeAndName(Default.nfkc.normalize(cp)) - + ", " + Default.ucd.getCodeAndName(Default.nfc.normalize(cp))); + + ", " + !Default.nfkd().isNormalized(cp) + + ", " + Default.ucd().getCodeAndName(Default.nfkc().normalize(cp)) + + ", " + Default.ucd().getCodeAndName(Default.nfc().normalize(cp))); } - if (!idnProhibited && ! idnUnassigned && !Default.nfkd.isNormalized(cp)) { - String kc = Default.nfkc.normalize(cp); - String c = Default.nfc.normalize(cp); + if (!idnProhibited && ! idnUnassigned && !Default.nfkd().isNormalized(cp)) { + String kc = Default.nfkc().normalize(cp); + String c = Default.nfc().normalize(cp); if (kc.equals(c)) continue; int cp2; boolean excluded = false; @@ -1131,21 +1131,21 @@ can help you narrow these down. probe = new TreeMap(); idnMap.put(description, probe); } - probe.put(new Integer(cp), Default.ucd.getCodeAndName(cp) + " (" + Default.ucd.getCategoryID(cp) + ")" + option); + probe.put(new Integer(cp), Default.ucd().getCodeAndName(cp) + " (" + Default.ucd().getCategoryID(cp) + ")" + option); } static void showDifferences(PrintWriter log, UnicodeSet s1, String name1, UnicodeSet s2, String name2, boolean both) { if (!s1.equals(s2)) { log.println(); log.println("In " + name1 + ", but NOT " + name2); - Utility.showSetNames(log," ", new UnicodeSet(s1).removeAll(s2), false, false, Default.ucd); + Utility.showSetNames(log," ", new UnicodeSet(s1).removeAll(s2), false, false, Default.ucd()); log.println(); log.println("NOT in " + name1 + ", but in " + name2); - Utility.showSetNames(log," ", new UnicodeSet(s2).removeAll(s1), false, false, Default.ucd); + Utility.showSetNames(log," ", new UnicodeSet(s2).removeAll(s1), false, false, Default.ucd()); log.println(); if (both) { log.println("In both " + name1 + " AND " + name2); - Utility.showSetNames(log," ", new UnicodeSet(s2).retainAll(s1), false, false, Default.ucd); + Utility.showSetNames(log," ", new UnicodeSet(s2).retainAll(s1), false, false, Default.ucd()); log.println(); } } @@ -1154,7 +1154,7 @@ can help you narrow these down. public static void genIDN() throws IOException { PrintWriter out = new PrintWriter(System.out); - Default.setUCD(); + PrintWriter log = Utility.openPrintWriter("IDN-tables.txt", Utility.LATIN1_UNIX); /*UnicodeSet y = UnifiedBinaryProperty.make(CATEGORY + FORMAT).getSet(); @@ -1241,9 +1241,9 @@ FEFF; ZERO WIDTH NO-BREAK SPACE .removeAll(surrogate).removeAll(control); // remove some just to avoid clutter when debugging. UnicodeSetIterator it = new UnicodeSetIterator(temp); while(it.next()) { - if (!Default.ucd.isAssigned(it.codepoint)) continue; - String name = Default.ucd.getName(it.codepoint); - System.out.print(Default.ucd.getCodeAndName(it.codepoint)); + if (!Default.ucd().isAssigned(it.codepoint)) continue; + String name = Default.ucd().getName(it.codepoint); + System.out.print(Default.ucd().getCodeAndName(it.codepoint)); if (name.indexOf("VARIATION") >= 0 || name.indexOf("ZERO") >= 0 || name.indexOf("WORD JOINER") >= 0) { @@ -1300,7 +1300,7 @@ FEFF; ZERO WIDTH NO-BREAK SPACE if (test[i].containsNone(test[j])) continue; log.println(testNames[i] + " and " + testNames[j] + " intersect!"); UnicodeSet intersection = new UnicodeSet(test[i]).retainAll(test[j]); - Utility.showSetNames(log," ", intersection, false, true, Default.ucd); + Utility.showSetNames(log," ", intersection, false, true, Default.ucd()); log.println(); } } @@ -1327,36 +1327,36 @@ FEFF; ZERO WIDTH NO-BREAK SPACE Integer tempInteger = null; for (int i = 0; i < 0x10FFFF; ++i) { - int cat = Default.ucd.getCategory(i); - if (!Default.ucd.isAssigned(i)) continue; + int cat = Default.ucd().getCategory(i); + if (!Default.ucd().isAssigned(i)) continue; //if (cat == Cc || cat == Cf || cat == Co || cat == Cn) continue; // we can skip these //if (Default.ucd.hasComputableName(i)) continue; tempInteger = null; String original = UTF16.valueOf(i); - String caseFold = Default.ucd.getCase(i, FULL, FOLD); + String caseFold = Default.ucd().getCase(i, FULL, FOLD); if (!original.equals(caseFold)) { tempInteger = new Integer(i); B2.put(tempInteger, caseFold); B3.put(tempInteger, caseFold); } - String b = Default.nfkc.normalize(caseFold); - String c = Default.nfkc.normalize(Default.ucd.getCase(b, FULL, FOLD)); + String b = Default.nfkc().normalize(caseFold); + String c = Default.nfkc().normalize(Default.ucd().getCase(b, FULL, FOLD)); if (!c.equals(b)) { if (tempInteger != null) { if (DEBUG) { log.println("Possible Conflict"); - log.println(" " + Default.ucd.getCodeAndName(i)); - log.println(" => " + Default.ucd.getCodeAndName(caseFold)); - log.println(" => " + Default.ucd.getCodeAndName(c)); + log.println(" " + Default.ucd().getCodeAndName(i)); + log.println(" => " + Default.ucd().getCodeAndName(caseFold)); + log.println(" => " + Default.ucd().getCodeAndName(c)); } } else { tempInteger = new Integer(i); if (DEBUG) { - log.println(" " + Default.ucd.getCodeAndName(i)); - log.println(" => " + Default.ucd.getCodeAndName(c)); + log.println(" " + Default.ucd().getCodeAndName(i)); + log.println(" => " + Default.ucd().getCodeAndName(c)); } } if (DEBUG) log.println(); @@ -1367,7 +1367,7 @@ FEFF; ZERO WIDTH NO-BREAK SPACE // PRINTOUT - printIDN_Table(log, "A.1", "Unassigned code points in Unicode " + Default.ucd.getVersion(), A1); + printIDN_Table(log, "A.1", "Unassigned code points in Unicode " + Default.ucd().getVersion(), A1); printIDN_Table(log, "B.1", "Commonly mapped to nothing", B1); printIDN_Map(log, "B.2", "Mapping for lowercase used with NFKC", B2, B3); @@ -1401,7 +1401,7 @@ FEFF; ZERO WIDTH NO-BREAK SPACE int cp = key.intValue(); log.println(Utility.hex(cp, 4) + "; " + Utility.hex(value, 4) + "; " + (!value.equals(other.get(key))? "***" : "") - + Default.ucd.getName(cp)); + + Default.ucd().getName(cp)); } log.println("----- End Table " + tableNumber + " -----"); } @@ -1412,14 +1412,14 @@ FEFF; ZERO WIDTH NO-BREAK SPACE log.println(tableNumber+ " " + description); log.println(""); log.println("----- Start Table " + tableNumber + " -----"); - Utility.showSetNames(log, "", set, false, true, Default.ucd); + Utility.showSetNames(log, "", set, false, true, Default.ucd()); log.println("----- End Table " + tableNumber + " -----"); } public static BitSet guessIDN() { BitSet result = new BitSet(); for (int cp = 0; cp < 0x10FFFF; ++cp) { - int cat = Default.ucd.getCategory(cp); + int cat = Default.ucd().getCategory(cp); // 5.1 Currently-prohibited ASCII characters if (cp < 0x80 && cp != '-' && !(cat == Lu || cat == Ll || cat == Nd)) result.set(cp); @@ -1443,7 +1443,7 @@ FEFF; ZERO WIDTH NO-BREAK SPACE if (cp == 0xFFFD) result.set(cp); // 5.5 Non-character code points - if (Default.ucd.getBinaryProperty(cp, Noncharacter_Code_Point)) result.set(cp); + if (Default.ucd().getBinaryProperty(cp, Noncharacter_Code_Point)) result.set(cp); // 5.6 Surrogate codes if (cat == Cs) result.set(cp); @@ -1654,7 +1654,7 @@ E0020-E007F; [TAGGING CHARACTERS] String key = Utility.fromHex(parts[0]); if (UTF32.length32(key) != 1) throw new ChainException("First IDN field not single character: " + line, null); int cp = UTF32.char32At(key, 0); - if (!Default.ucd.isAssigned(cp) || Default.ucd.isPUA(cp)) throw new ChainException("IDN character unassigned or PUA: " + line, null); + if (!Default.ucd().isAssigned(cp) || Default.ucd().isPUA(cp)) throw new ChainException("IDN character unassigned or PUA: " + line, null); String value = Utility.fromHex(parts[1]); String reason = parts[2].trim(); @@ -1670,24 +1670,24 @@ E0020-E007F; [TAGGING CHARACTERS] for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAssigned(cp) || Default.ucd.isPUA(cp)) continue; + if (!Default.ucd().isAssigned(cp) || Default.ucd().isPUA(cp)) continue; if (mappedOut.get(cp)) continue; String key = UTF32.valueOf32(cp); String value = (String)idnFold.get(key); if (value == null) value = key; String reason = (String)idnWhy.get(key); - String ucdFold = Default.ucd.getCase(cp, FULL, FOLD, "I"); + String ucdFold = Default.ucd().getCase(cp, FULL, FOLD, "I"); if (!ucdFold.equals(value)) { - String b = Default.nfkc.normalize(Default.ucd.getCase(cp, FULL, FOLD, "I")); - String c = Default.nfkc.normalize(Default.ucd.getCase(b, FULL, FOLD, "I")); + String b = Default.nfkc().normalize(Default.ucd().getCase(cp, FULL, FOLD, "I")); + String c = Default.nfkc().normalize(Default.ucd().getCase(b, FULL, FOLD, "I")); if (c.equals(value)) continue; Utility.fixDot(); - System.out.println("Mismatch: " + Default.ucd.getCodeAndName(cp)); - System.out.println(" UCD Case Fold: <" + Default.ucd.getCodeAndName(ucdFold) + ">"); - System.out.println(" IDN Map [" + reason + "]: <" + Default.ucd.getCodeAndName(value) + ">"); + System.out.println("Mismatch: " + Default.ucd().getCodeAndName(cp)); + System.out.println(" UCD Case Fold: <" + Default.ucd().getCodeAndName(ucdFold) + ">"); + System.out.println(" IDN Map [" + reason + "]: <" + Default.ucd().getCodeAndName(value) + ">"); errorCount++; } } @@ -1738,39 +1738,39 @@ E0020-E007F; [TAGGING CHARACTERS] */ public static void diffIgnorable () { - Default.setUCD(); + - UnicodeSet control = UnifiedBinaryProperty.make(CATEGORY + Cf, Default.ucd).getSet(); + UnicodeSet control = UnifiedBinaryProperty.make(CATEGORY + Cf, Default.ucd()).getSet(); System.out.println("Cf"); - Utility.showSetNames("", control, false, Default.ucd); + Utility.showSetNames("", control, false, Default.ucd()); - control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cc, Default.ucd).getSet()); + control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cc, Default.ucd()).getSet()); System.out.println("Cf + Cc"); - Utility.showSetNames("", control, false, Default.ucd); + Utility.showSetNames("", control, false, Default.ucd()); - control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cs, Default.ucd).getSet()); + control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cs, Default.ucd()).getSet()); System.out.println("Cf + Cc + Cs"); - Utility.showSetNames("", control, false, Default.ucd); + Utility.showSetNames("", control, false, Default.ucd()); - control.removeAll(UnifiedBinaryProperty.make(BINARY_PROPERTIES + White_space, Default.ucd).getSet()); + control.removeAll(UnifiedBinaryProperty.make(BINARY_PROPERTIES + White_space, Default.ucd()).getSet()); System.out.println("Cf + Cc + Cs - WhiteSpace"); - Utility.showSetNames("", control, false, Default.ucd); + Utility.showSetNames("", control, false, Default.ucd()); control.add(0x2060,0x206f).add(0xFFF0,0xFFFB).add(0xE0000,0xE0FFF); System.out.println("(Cf + Cc + Cs - WhiteSpace) + ranges"); - Utility.showSetNames("", control, false, Default.ucd); + Utility.showSetNames("", control, false, Default.ucd()); - UnicodeSet odicp = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Other_Default_Ignorable_Code_Point, Default.ucd).getSet(); + UnicodeSet odicp = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Other_Default_Ignorable_Code_Point, Default.ucd()).getSet(); odicp.removeAll(control); System.out.println("Minimal Default Ignorable Code Points"); - Utility.showSetNames("", odicp, true, Default.ucd); + Utility.showSetNames("", odicp, true, Default.ucd()); } @@ -1793,8 +1793,8 @@ E0020-E007F; [TAGGING CHARACTERS] System.out.println(); for (int cp = 0; cp < 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAssigned(cp)) continue; - if (Default.ucd.isPUA(cp)) continue; + if (!Default.ucd().isAssigned(cp)) continue; + if (Default.ucd().isPUA(cp)) continue; if (isNormalized(cp, j)) continue; if (cp == 0xFDFB || cp == 0x0140) { @@ -1813,8 +1813,8 @@ E0020-E007F; [TAGGING CHARACTERS] //norm = Default.ucd.isIdentifier(nfx_x_cp, true); if (plain & !norm) { Utility.fixDot(); - System.out.println("*Not Identifier: " + Default.ucd.getCodeAndName(cp)); - System.out.println(" nfx_x_cp: " + Default.ucd.getCodeAndName(nfx_x_cp)); + System.out.println("*Not Identifier: " + Default.ucd().getCodeAndName(cp)); + System.out.println(" nfx_x_cp: " + Default.ucd().getCodeAndName(nfx_x_cp)); System.out.println(" isIdentifier(nfx_x_cp, true): " + norm); System.out.println(" cat(nfx_x_cp): " + getCategoryID(nfx_x_cp)); @@ -1829,14 +1829,14 @@ E0020-E007F; [TAGGING CHARACTERS] // norm = Default.ucd.isIdentifier(nfx_cp, true); if (plain & !norm) { Utility.fixDot(); - System.out.println(" Changes Category: " + Default.ucd.getCodeAndName(cp)); - System.out.println(" nfx_cp: " + Default.ucd.getCodeAndName(nfx_cp)); + System.out.println(" Changes Category: " + Default.ucd().getCodeAndName(cp)); + System.out.println(" nfx_cp: " + Default.ucd().getCodeAndName(nfx_cp)); System.out.println(" isIdentifier(nfx_cp, true): " + norm); System.out.println(" cat(nfx_cp): " + getCategoryID(nfx_cp)); System.out.println(" isIdentifierStart(cp, true): " + plain); - System.out.println(" cat(cp): " + Default.ucd.getCategoryID(cp)); + System.out.println(" cat(cp): " + Default.ucd().getCategoryID(cp)); System.out.println(); continue; } @@ -1845,24 +1845,24 @@ E0020-E007F; [TAGGING CHARACTERS] } static String getCategoryID(String s) { - if (UTF32.length32(s) == 1) return Default.ucd.getCategoryID(UTF32.char32At(s, 0)); + if (UTF32.length32(s) == 1) return Default.ucd().getCategoryID(UTF32.char32At(s, 0)); StringBuffer result = new StringBuffer(); int cp; for (int i = 0; i < s.length(); i += UTF32.count16(cp)) { cp = UTF32.char32At(s, i); if (i != 0) result.append(' '); - result.append(Default.ucd.getCategoryID(cp)); + result.append(Default.ucd().getCategoryID(cp)); } return result.toString(); } static String normalize(String s, int j) { - if (j < 4) return Default.nf[j].normalize(s); - return Default.ucd.getCase(s, FULL, FOLD); + if (j < 4) return Default.nf(j).normalize(s); + return Default.ucd().getCase(s, FULL, FOLD); } static boolean isNormalized(int cp, int j) { - if (j < 4) return !Default.nf[j].isNormalized(cp); + if (j < 4) return !Default.nf(j).isNormalized(cp); return false; } @@ -1870,21 +1870,21 @@ E0020-E007F; [TAGGING CHARACTERS] public static void NFTest() { for (int j = 0; j < 4; ++j) { - Normalizer nfx = Default.nf[j]; + Normalizer nfx = Default.nf(j); System.out.println(); System.out.println("Testing isNormalized for " + NAMES[j]); System.out.println(); for (int i = 0; i < 0x10FFFF; ++i) { Utility.dot(i); - if (!Default.ucd.isAssigned(i)) continue; - if (Default.ucd.isPUA(i)) continue; + if (!Default.ucd().isAssigned(i)) continue; + if (Default.ucd().isPUA(i)) continue; String s = nfx.normalize(i); boolean differs = !s.equals(UTF32.valueOf32(i)); boolean call = !nfx.isNormalized(i); if (differs != call) { Utility.fixDot(); System.out.println("Problem: differs: " + differs - + ", call: " + call + " " + Default.ucd.getCodeAndName(i)); + + ", call: " + call + " " + Default.ucd().getCodeAndName(i)); } } @@ -1894,7 +1894,7 @@ E0020-E007F; [TAGGING CHARACTERS] static final int EXCEPTION_FLAG = 0x8000000; public static void checkScripts() throws IOException { - Default.setUCD(); + boolean ok; Map m = new TreeMap(); UnicodeSet exceptions = ScriptExceptions.getExceptions(); @@ -1904,9 +1904,9 @@ E0020-E007F; [TAGGING CHARACTERS] show.add(0x207F); for (int i = 0; i < 0x10FFFF; ++i) { - if (!Default.ucd.isAssigned(i)) continue; - byte cat = Default.ucd.getCategory(i); - byte script = Default.ucd.getScript(i); + if (!Default.ucd().isAssigned(i)) continue; + byte cat = Default.ucd().getCategory(i); + byte script = Default.ucd().getScript(i); switch (cat) { case Lo: case Lt: case Ll: case Lu: case Lm: case Mc: case Sk: ok = script != INHERITED_SCRIPT && script != COMMON_SCRIPT; @@ -1919,9 +1919,9 @@ E0020-E007F; [TAGGING CHARACTERS] break; } if (show.contains(i)) { - System.out.println(Default.ucd.getCodeAndName(i) - + "; " + Default.ucd.getScriptID(i) - + "; " + Default.ucd.getCategoryID(i) + System.out.println(Default.ucd().getCodeAndName(i) + + "; " + Default.ucd().getScriptID(i) + + "; " + Default.ucd().getCategoryID(i) ); } if (!ok) { @@ -1935,7 +1935,7 @@ E0020-E007F; [TAGGING CHARACTERS] m.put(key, us); } us.add(i); - int len = Default.ucd.getScriptID(i).length(); + int len = Default.ucd().getScriptID(i).length(); if (maxScriptLen < len) maxScriptLen = len; } } @@ -1952,13 +1952,13 @@ E0020-E007F; [TAGGING CHARACTERS] int start = badChars.getRangeStart(kk); int end = badChars.getRangeEnd(kk); String code = Utility.hex(start) + (start != end ? ".." + Utility.hex(end) : ""); - String scriptName = Default.ucd.getScriptID(start); + String scriptName = Default.ucd().getScriptID(start); String title = "FAIL"; if ((intKey & EXCEPTION_FLAG) != 0) title = "EXCEPTION"; log.println(title + ": " + code + "; " + Utility.repeat(" ", 14 - code.length()) + scriptName + Utility.repeat(" ", maxScriptLen-scriptName.length()) - + " # (" + LCgetCategoryID(start) + ") " + Default.ucd.getName(start) - + (start != end ? ".." + Default.ucd.getName(end) : "") + + " # (" + LCgetCategoryID(start) + ") " + Default.ucd().getName(start) + + (start != end ? ".." + Default.ucd().getName(end) : "") ); } log.println(); @@ -1967,13 +1967,13 @@ E0020-E007F; [TAGGING CHARACTERS] } static public String LCgetCategoryID(int cp) { - byte cat = Default.ucd.getCategory(cp); + byte cat = Default.ucd().getCategory(cp); if (cat == Lu || cat == Lt || cat == Ll) return "LC"; - return Default.ucd.getCategoryID(cp); + return Default.ucd().getCategoryID(cp); } static public void verifyNormalizationStability() { - Default.setUCD(); + verifyNormalizationStability2("3.1.0"); verifyNormalizationStability2("3.0.0"); } @@ -1989,49 +1989,49 @@ E0020-E007F; [TAGGING CHARACTERS] Normalizer oldNFKC = new Normalizer(Normalizer.NFKC, older.getVersion()); Normalizer oldNFKD = new Normalizer(Normalizer.NFKD, older.getVersion()); - System.out.println("Testing " + Default.nfd.getUCDVersion() + " against " + oldNFD.getUCDVersion()); + System.out.println("Testing " + Default.nfd().getUCDVersion() + " against " + oldNFD.getUCDVersion()); for (int i = 0; i <= 0x10FFFF; ++i) { Utility.dot(i); - if (!Default.ucd.isAssigned(i)) continue; - byte cat = Default.ucd.getCategory(i); + if (!Default.ucd().isAssigned(i)) continue; + byte cat = Default.ucd().getCategory(i); if (cat == Cs || cat == PRIVATE_USE) continue; if (i == 0x5e) { System.out.println("debug"); - String test1 = Default.nfkd.normalize(i); + String test1 = Default.nfkd().normalize(i); String test2 = oldNFKD.normalize(i); - System.out.println("Testing (new/old)" + Default.ucd.getCodeAndName(i)); - System.out.println("\t" + Default.ucd.getCodeAndName(test1)); - System.out.println("\t" + Default.ucd.getCodeAndName(test2)); + System.out.println("Testing (new/old)" + Default.ucd().getCodeAndName(i)); + System.out.println("\t" + Default.ucd().getCodeAndName(test1)); + System.out.println("\t" + Default.ucd().getCodeAndName(test2)); } if (older.isAssigned(i)) { - int newCan = Default.ucd.getCombiningClass(i); + int newCan = Default.ucd().getCombiningClass(i); int oldCan = older.getCombiningClass(i); if (newCan != oldCan) { System.out.println("FAILS CCC STABILITY: " + newCan + " != " + oldCan - + "; " + Default.ucd.getCodeAndName(i)); + + "; " + Default.ucd().getCodeAndName(i)); } - verifyEquals(i, "NFD STABILITY (new/old)", Default.nfd.normalize(i), oldNFD.normalize(i)); - verifyEquals(i, "NFC STABILITY (new/old)", Default.nfc.normalize(i), oldNFC.normalize(i)); - verifyEquals(i, "NFKD STABILITY (new/old)", Default.nfkd.normalize(i), oldNFKD.normalize(i)); - verifyEquals(i, "NFKC STABILITY (new/old)", Default.nfkc.normalize(i), oldNFKC.normalize(i)); + verifyEquals(i, "NFD STABILITY (new/old)", Default.nfd().normalize(i), oldNFD.normalize(i)); + verifyEquals(i, "NFC STABILITY (new/old)", Default.nfc().normalize(i), oldNFC.normalize(i)); + verifyEquals(i, "NFKD STABILITY (new/old)", Default.nfkd().normalize(i), oldNFKD.normalize(i)); + verifyEquals(i, "NFKC STABILITY (new/old)", Default.nfkc().normalize(i), oldNFKC.normalize(i)); } else { // not in older version. // (1) If there is a decomp, and it is composed of all OLD characters, then it must NOT compose - if (!Default.nfd.isNormalized(i)) { - String decomp = Default.nfd.normalize(i); + if (!Default.nfd().isNormalized(i)) { + String decomp = Default.nfd().normalize(i); if (noneHaveCategory(decomp, Cn, older)) { - String recomp = Default.nfc.normalize(decomp); + String recomp = Default.nfc().normalize(decomp); if (recomp.equals(UTF16.valueOf(i))) { Utility.fixDot(); - System.out.println("FAILS COMP STABILITY: " + Default.ucd.getCodeAndName(i)); - System.out.println("\t" + Default.ucd.getCodeAndName(decomp)); - System.out.println("\t" + Default.ucd.getCodeAndName(recomp)); + System.out.println("FAILS COMP STABILITY: " + Default.ucd().getCodeAndName(i)); + System.out.println("\t" + Default.ucd().getCodeAndName(decomp)); + System.out.println("\t" + Default.ucd().getCodeAndName(recomp)); System.out.println(); throw new IllegalArgumentException("Comp stability"); } @@ -2054,9 +2054,9 @@ E0020-E007F; [TAGGING CHARACTERS] public static void verifyEquals(int cp, String message, String a, String b) { if (!a.equals(b)) { Utility.fixDot(); - System.out.println("FAILS " + message + ": " + Default.ucd.getCodeAndName(cp)); - System.out.println("\t" + Default.ucd.getCodeAndName(a)); - System.out.println("\t" + Default.ucd.getCodeAndName(b)); + System.out.println("FAILS " + message + ": " + Default.ucd().getCodeAndName(cp)); + System.out.println("\t" + Default.ucd().getCodeAndName(a)); + System.out.println("\t" + Default.ucd().getCodeAndName(b)); System.out.println(); } } @@ -2171,10 +2171,10 @@ E0020-E007F; [TAGGING CHARACTERS] if (lastShowed != cp) { Utility.fixDot(); System.out.println(); - String s = Default.ucd.getDecompositionMapping(cp); - System.out.print(Default.ucd.getCodeAndName(cp)); + String s = Default.ucd().getDecompositionMapping(cp); + System.out.print(Default.ucd().getCodeAndName(cp)); if (showCanonicalDecomposition && !s.equals(UTF32.valueOf32(cp))) { - System.out.print(" => " + Default.ucd.getCodeAndName(s)); + System.out.print(" => " + Default.ucd().getCodeAndName(s)); } System.out.println(); lastShowed = cp; @@ -2182,36 +2182,36 @@ E0020-E007F; [TAGGING CHARACTERS] } public static void test1() { - Default.setUCD(); + for (int i = 0x19; i < 0x10FFFF; ++i) { - System.out.println(Utility.hex(i) + " " + Utility.quoteJavaString(Default.ucd.getName(i))); + System.out.println(Utility.hex(i) + " " + Utility.quoteJavaString(Default.ucd().getName(i))); System.out.print(" " - + ", gc=" + Default.ucd.getCategoryID(i) - + ", bc=" + Default.ucd.getBidiClassID(i) - + ", cc=" + Default.ucd.getCombiningClassID(i) - + ", ea=" + Default.ucd.getEastAsianWidthID(i) - + ", lb=" + Default.ucd.getLineBreakID(i) - + ", dt=" + Default.ucd.getDecompositionTypeID(i) - + ", nt=" + Default.ucd.getNumericTypeID(i) - + ", nv=" + Default.ucd.getNumericValue(i) + + ", gc=" + Default.ucd().getCategoryID(i) + + ", bc=" + Default.ucd().getBidiClassID(i) + + ", cc=" + Default.ucd().getCombiningClassID(i) + + ", ea=" + Default.ucd().getEastAsianWidthID(i) + + ", lb=" + Default.ucd().getLineBreakID(i) + + ", dt=" + Default.ucd().getDecompositionTypeID(i) + + ", nt=" + Default.ucd().getNumericTypeID(i) + + ", nv=" + Default.ucd().getNumericValue(i) ); for (int j = 0; j < UCD_Types.LIMIT_BINARY_PROPERTIES; ++j) { - if (Default.ucd.getBinaryProperty(i,j)) System.out.print(", " + UCD_Names.BP[j]); + if (Default.ucd().getBinaryProperty(i,j)) System.out.print(", " + UCD_Names.BP[j]); } System.out.println(); System.out.println(" " - + ", dm=" + Utility.quoteJavaString(Default.ucd.getDecompositionMapping(i)) - + ", slc=" + Utility.quoteJavaString(Default.ucd.getCase(i, SIMPLE, LOWER)) - + ", stc=" + Utility.quoteJavaString(Default.ucd.getCase(i, SIMPLE, TITLE)) - + ", suc=" + Utility.quoteJavaString(Default.ucd.getCase(i, SIMPLE, UPPER)) - + ", flc=" + Utility.quoteJavaString(Default.ucd.getCase(i, FULL, LOWER)) - + ", ftc=" + Utility.quoteJavaString(Default.ucd.getCase(i, FULL, TITLE)) - + ", fuc=" + Utility.quoteJavaString(Default.ucd.getCase(i, FULL, UPPER)) - + ", sc=" + Utility.quoteJavaString(Default.ucd.getSpecialCase(i)) + + ", dm=" + Utility.quoteJavaString(Default.ucd().getDecompositionMapping(i)) + + ", slc=" + Utility.quoteJavaString(Default.ucd().getCase(i, SIMPLE, LOWER)) + + ", stc=" + Utility.quoteJavaString(Default.ucd().getCase(i, SIMPLE, TITLE)) + + ", suc=" + Utility.quoteJavaString(Default.ucd().getCase(i, SIMPLE, UPPER)) + + ", flc=" + Utility.quoteJavaString(Default.ucd().getCase(i, FULL, LOWER)) + + ", ftc=" + Utility.quoteJavaString(Default.ucd().getCase(i, FULL, TITLE)) + + ", fuc=" + Utility.quoteJavaString(Default.ucd().getCase(i, FULL, UPPER)) + + ", sc=" + Utility.quoteJavaString(Default.ucd().getSpecialCase(i)) ); if (i > 0x180) i = 3 * i / 2; @@ -2219,11 +2219,11 @@ E0020-E007F; [TAGGING CHARACTERS] } static void checkCanonicalProperties() { - Default.setUCD(); - System.out.println(Default.ucd.toString(0x1E0A)); + + System.out.println(Default.ucd().toString(0x1E0A)); System.out.println("Cross-checking canonical equivalence"); - System.out.println("Version: " + Default.ucd.getVersion() + ", " + new Date(Default.ucd.getDate())); + System.out.println("Version: " + Default.ucd().getVersion() + ", " + new Date(Default.ucd().getDate())); showCanonicalDecomposition = true; for (int q = 1; q < 2; ++q) for (int i = 0; i <= 0x10FFFF; ++i) { @@ -2231,35 +2231,35 @@ E0020-E007F; [TAGGING CHARACTERS] if (i == 0x0387) { System.out.println("debug?"); } - byte type = Default.ucd.getDecompositionType(i); + byte type = Default.ucd().getDecompositionType(i); if (type != CANONICAL) continue; - String s = Default.ucd.getDecompositionMapping(i); + String s = Default.ucd().getDecompositionMapping(i); int slen = UTF32.length32(s); int j = UTF32.char32At(s, 0); try { if (q == 0) { - check(i, Default.ucd.getCategory(i), Default.ucd.getCategory(j), UCD_Names.GC, "GeneralCategory"); - check(i, Default.ucd.getCombiningClass(i), Default.ucd.getCombiningClass(j), "CanonicalClass"); - check(i, Default.ucd.getBidiClass(i), Default.ucd.getBidiClass(j), UCD_Names.BC, "BidiClass"); - check(i, Default.ucd.getNumericValue(i), Default.ucd.getNumericValue(j), "NumericValue"); - check(i, Default.ucd.getNumericType(i), Default.ucd.getNumericType(j), UCD_Names.NT, "NumericType"); + check(i, Default.ucd().getCategory(i), Default.ucd().getCategory(j), UCD_Names.GC, "GeneralCategory"); + check(i, Default.ucd().getCombiningClass(i), Default.ucd().getCombiningClass(j), "CanonicalClass"); + check(i, Default.ucd().getBidiClass(i), Default.ucd().getBidiClass(j), UCD_Names.BC, "BidiClass"); + check(i, Default.ucd().getNumericValue(i), Default.ucd().getNumericValue(j), "NumericValue"); + check(i, Default.ucd().getNumericType(i), Default.ucd().getNumericType(j), UCD_Names.NT, "NumericType"); if (false) { for (byte k = LOWER; k < LIMIT_CASE; ++k) { - check(i, Default.ucd.getCase(i, SIMPLE, k), Default.ucd.getCase(j, SIMPLE, k), "Simple("+k+")"); - check(i, Default.ucd.getCase(i, FULL, k), Default.ucd.getCase(j, FULL, k), "Full("+k+")"); + check(i, Default.ucd().getCase(i, SIMPLE, k), Default.ucd().getCase(j, SIMPLE, k), "Simple("+k+")"); + check(i, Default.ucd().getCase(i, FULL, k), Default.ucd().getCase(j, FULL, k), "Full("+k+")"); } } - if (slen == 1) check(i, Default.ucd.getSpecialCase(i), Default.ucd.getSpecialCase(j), "SpecialCase"); + if (slen == 1) check(i, Default.ucd().getSpecialCase(i), Default.ucd().getSpecialCase(j), "SpecialCase"); for (byte k = 0; k < LIMIT_BINARY_PROPERTIES; ++k) { if (k == Hex_Digit) continue; if (k == Radical) continue; if (k == UnifiedIdeograph) continue; if (k == CompositionExclusion) continue; - check(i, Default.ucd.getBinaryProperty(i, k), Default.ucd.getBinaryProperty(j, k), UCD_Names.YN_TABLE, Default.ucd.getBinaryPropertiesID_fromIndex(k)); + check(i, Default.ucd().getBinaryProperty(i, k), Default.ucd().getBinaryProperty(j, k), UCD_Names.YN_TABLE, Default.ucd().getBinaryPropertiesID_fromIndex(k)); } } else { //check(i, Default.ucd.getLineBreak(i), Default.ucd.getLineBreak(j), UCD_Names.LB, "LineBreak"); diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index 6fe04a3ff55..25f717338bb 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ -* $Date: 2004/02/06 18:29:39 $ -* $Revision: 1.37 $ +* $Date: 2004/02/07 01:01:17 $ +* $Revision: 1.38 $ * ******************************************************************************* */ @@ -590,7 +590,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES public static String getDisplay(int cp) { String result = UTF16.valueOf(cp); - byte cat = Default.ucd.getCategory(cp); + byte cat = Default.ucd().getCategory(cp); if (cat == Mn || cat == Me) { result = String.valueOf(DOTTED_CIRCLE) + result; } else if (cat == Cf || cat == Cc || cp == 0x034F || cp == 0x00AD || cp == 0x1806) {
" + count + "" + Utility.hex(ch, " ") @@ -4576,8 +4573,8 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String MN = (String)MismatchedN.get(ch); String MC = (String)MismatchedC.get(ch); String MD = (String)MismatchedD.get(ch); - String chInC = Default.nfc.normalize(ch); - String chInD = Default.nfd.normalize(ch); + String chInC = Default.nfc().normalize(ch); + String chInD = Default.nfd().normalize(ch); log.println("
" + Utility.replace(ucd.getName(ch), ", ", ",
") + "
NFD" + Utility.hex(chInD) @@ -4610,7 +4607,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; static void showDiff(boolean showName, boolean firstColumn, int line, Object chobj) { String ch = chobj.toString(); - String decomp = Default.nfd.normalize(ch); + String decomp = Default.nfd().normalize(ch); if (showName) { if (ch.equals(decomp)) { log.println(//title + counter + " " diff --git a/tools/unicodetools/com/ibm/text/UCD/BuildNames.java b/tools/unicodetools/com/ibm/text/UCD/BuildNames.java index ff504f551b9..564a51e50c8 100644 --- a/tools/unicodetools/com/ibm/text/UCD/BuildNames.java +++ b/tools/unicodetools/com/ibm/text/UCD/BuildNames.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $ -* $Date: 2002/07/30 09:56:41 $ -* $Revision: 1.7 $ +* $Date: 2004/02/07 01:01:17 $ +* $Revision: 1.8 $ * ******************************************************************************* */ @@ -29,8 +29,6 @@ public class BuildNames implements UCD_Types { static final boolean DEBUG = true; public static void main(String[] args) throws IOException { - - Default.setUCD(); collectWords(); } @@ -153,13 +151,13 @@ public class BuildNames implements UCD_Types { int longSum = 0; for (int cp = 0; cp < 0x10FFFF; ++cp) { - if (!Default.ucd.isAllocated(cp)) continue; - if (Default.ucd.hasComputableName(cp)) continue; + if (!Default.ucd().isAllocated(cp)) continue; + if (Default.ucd().hasComputableName(cp)) continue; Utility.dot(cp); String name; - if (Default.ucd.isRepresented(cp)) { - name = Default.ucd.getName(cp, SHORT); + if (Default.ucd().isRepresented(cp)) { + name = Default.ucd().getName(cp, SHORT); log.println(Utility.hex(cp) + " " + name); String backName = Utility.replace(name, UCD_Names.NAME_ABBREVIATIONS, false); if (!name.equals(backName)) { @@ -170,19 +168,19 @@ public class BuildNames implements UCD_Types { // check the string, and its decomposition. This is just to get a good count. String str = UTF16.valueOf(cp); - if (false && !Default.nfkd.isNormalized(cp)) { - str += Default.nfkd.normalize(cp); + if (false && !Default.nfkd().isNormalized(cp)) { + str += Default.nfkd().normalize(cp); } int cp2; for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp2)) { cp2 = UTF16.charAt(str, i); - name = Default.ucd.getName(cp2, SHORT); + name = Default.ucd().getName(cp2, SHORT); if (name == null) continue; //name = transform(name); sum += name.length(); - longSum += Default.ucd.getName(cp2).length(); + longSum += Default.ucd().getName(cp2).length(); used++; // replace numbers & letters diff --git a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java index c28d0a8c41c..bad7874be49 100644 --- a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java +++ b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java @@ -11,6 +11,7 @@ import java.util.TreeMap; import java.util.TreeSet; import com.ibm.icu.dev.test.util.BagFormatter; +import com.ibm.icu.dev.test.util.UnicodeLabel; import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.icu.dev.test.util.ICUPropertyFactory; import com.ibm.icu.lang.UProperty; @@ -19,6 +20,7 @@ import com.ibm.text.utility.Utility; public class CheckICU { static final BagFormatter bf = new BagFormatter(); + static final BagFormatter bf2 = new BagFormatter(); public static void main(String[] args) throws IOException { System.out.println("Start"); @@ -29,6 +31,20 @@ public class CheckICU { static UnicodeSet itemFailures; static ICUPropertyFactory icuFactory; static ToolUnicodePropertySource toolFactory; + + static class ReplaceLabel extends UnicodeLabel { + UnicodeProperty p; + ReplaceLabel(UnicodeProperty p) { + this.p = p; + } + public String getValue(int codepoint, boolean isShort) { + // TODO Auto-generated method stub + return p.getValue(codepoint, isShort).replace('_',' '); + } + public int getMaxWidth(boolean v) { + return p.getMaxWidth(v); + } + } public static void test() throws IOException { checkUCD(); @@ -37,18 +53,23 @@ public class CheckICU { toolFactory = ToolUnicodePropertySource.make("4.0.0"); String[] quickList = { - "Name", + "Block", // "Script", "Bidi_Mirroring_Glyph", "Case_Folding", //"Numeric_Value" }; for (int i = 0; i < quickList.length; ++i) { - testProperty(quickList[i], -1); + //testProperty(quickList[i], -1); + bf2.setValueSource(new ReplaceLabel(toolFactory.getProperty(quickList[i]))) + .setLabelSource(null) + .setNameSource(null) + .setShowCount(false); + bf2.showSetNames(bf2.CONSOLE, quickList[i], new UnicodeSet(0,0x10FFFF)); } if (quickList.length > 0) return; - Collection availableTool = toolFactory.getAvailablePropertyAliases(new TreeSet()); + Collection availableTool = toolFactory.getAvailableAliases(new TreeSet()); - Collection availableICU = icuFactory.getAvailablePropertyAliases(new TreeSet()); + Collection availableICU = icuFactory.getAvailableAliases(new TreeSet()); System.out.println(showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool)); Collection common = new TreeSet(availableICU); common.retainAll(availableTool); @@ -98,7 +119,7 @@ public class CheckICU { private static void testProperty(String prop, int typeFilter) { UnicodeProperty icuProp = icuFactory.getProperty(prop); - int icuType = icuProp.getPropertyType(); + int icuType = icuProp.getType(); if (typeFilter >= 0 && icuType != typeFilter) return; @@ -106,18 +127,18 @@ public class CheckICU { System.out.println("Testing: " + prop); UnicodeProperty toolProp = toolFactory.getProperty(prop); - int toolType = toolProp.getPropertyType(); + int toolType = toolProp.getType(); if (icuType != toolType) { System.out.println("FAILURE Type: ICU: " + UnicodeProperty.getTypeName(icuType) + "\tTool: " + UnicodeProperty.getTypeName(toolType)); } - Collection icuAliases = icuProp.getPropertyAliases(new ArrayList()); - Collection toolAliases = toolProp.getPropertyAliases(new ArrayList()); + Collection icuAliases = icuProp.getAliases(new ArrayList()); + Collection toolAliases = toolProp.getAliases(new ArrayList()); System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases)); - icuAliases = icuProp.getAvailablePropertyValueAliases(new ArrayList()); - toolAliases = toolProp.getAvailablePropertyValueAliases(new ArrayList()); + icuAliases = icuProp.getAvailableValueAliases(new ArrayList()); + toolAliases = toolProp.getAvailableValueAliases(new ArrayList()); System.out.println(showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases)); // TODO do property value aliases @@ -128,8 +149,8 @@ public class CheckICU { System.out.println(); } */ - String icuValue = icuProp.getPropertyValue(i); - String toolValue = toolProp.getPropertyValue(i); + String icuValue = icuProp.getValue(i); + String toolValue = toolProp.getValue(i); if (!equals(icuValue, toolValue)) { itemFailures.add(i); if (firstDiffCP == null) { diff --git a/tools/unicodetools/com/ibm/text/UCD/Compare14652.java b/tools/unicodetools/com/ibm/text/UCD/Compare14652.java index bec9dffd0d7..368c76680d4 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Compare14652.java +++ b/tools/unicodetools/com/ibm/text/UCD/Compare14652.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Compare14652.java,v $ -* $Date: 2003/04/25 01:39:15 $ -* $Revision: 1.2 $ +* $Date: 2004/02/07 01:01:16 $ +* $Revision: 1.3 $ * ******************************************************************************* */ @@ -150,7 +150,7 @@ tolower pw.println("**************************************************"); pw.println(name); pw.println("**************************************************"); - Utility.showSetDifferences(pw, name, contents, guess, guessContents, false, true, null, Default.ucd); + Utility.showSetDifferences(pw, name, contents, guess, guessContents, false, true, null, Default.ucd()); //pw.println(props[i].contents); } } @@ -160,7 +160,7 @@ tolower public static void main(String[] args) throws IOException { - String version = Default.ucd.getVersion(); + String version = Default.ucd().getVersion(); PrintWriter log = Utility.openPrintWriter("Diff14652_" + version + ".txt", Utility.UTF8_WINDOWS); try { log.write('\uFEFF'); @@ -171,8 +171,8 @@ tolower UnicodeSet XID = getSet(DERIVED, Mod_ID_Start).addAll(getSet(DERIVED, Mod_ID_Continue_NO_Cf)); UnicodeSet alphanumSet = new UnicodeSet(alphaSet).addAll(digitSet).addAll(getSet(CATEGORY, Pc)); - Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd); - Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd); + Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd()); + Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd()); } BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1); @@ -259,7 +259,7 @@ xdigit includes digit .removeAll(alpha.contents) .removeAll(cntrl.contents) .removeAll(space.contents); - Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd); + Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd()); UnicodeSet propRemainder = new UnicodeSet(cnSet) .complement() @@ -271,7 +271,7 @@ xdigit includes digit .removeAll(alpha.guessContents) .removeAll(cntrl.guessContents) .removeAll(space.guessContents); - Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd); + Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd()); /* checkDisjoint(new Prop[] {alpha, digit, punct, cntrl}); @@ -318,7 +318,7 @@ xdigit includes digit log.println(); log.println("Fails test: " + name + " disjoint-with " + name2); UnicodeSet diff = new UnicodeSet(set).retainAll(set2); - Utility.showSetNames(log, "", diff, false, false, Default.ucd); + Utility.showSetNames(log, "", diff, false, false, Default.ucd()); } } @@ -332,7 +332,7 @@ xdigit includes digit log.println(); log.println("Fails test:" + name + " includes " + name2); UnicodeSet diff = new UnicodeSet(set2).removeAll(set); - Utility.showSetNames(log, "", diff, false, false, Default.ucd); + Utility.showSetNames(log, "", diff, false, false, Default.ucd()); } } diff --git a/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java b/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java index d480cf32ae4..adfe2f46ebc 100644 --- a/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java +++ b/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $ -* $Date: 2004/02/06 18:30:23 $ -* $Revision: 1.3 $ +* $Date: 2004/02/07 01:01:16 $ +* $Revision: 1.4 $ * ******************************************************************************* */ @@ -142,9 +142,9 @@ public class CompareProperties implements UCD_Types { int total = 0; for (int cp = 0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); - int cat = Default.ucd.getCategory(cp); + int cat = Default.ucd().getCategory(cp); // if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue; - if (!Default.ucd.isAllocated(cp)) continue; + if (!Default.ucd().isAllocated(cp)) continue; for (int i = 0; i < count; ++i) { UCDProperty up = props[i]; @@ -170,14 +170,13 @@ public class CompareProperties implements UCD_Types { } private void getProperties() { - Default.setUCD(); for (int i = 0; i < LIMIT_ENUM; ++i) { // || iType == SCRIPT int iType = i & 0xFF00; if (iType == AGE || iType == JOINING_GROUP || iType == COMBINING_CLASS) continue; if (i == 0x0900) { System.out.println("debug"); } - UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd); + UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd()); if (up == null) continue; if (up.getValueType() < BINARY_PROP) { System.out.println("\tSkipping " + up.getName() + "; value varies"); @@ -384,10 +383,9 @@ public class CompareProperties implements UCD_Types { public static void listDifferences() throws IOException { - Default.setUCD(); PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet"); - output.println("# Generated for " + Default.ucd.getVersion()); + output.println("# Generated for " + Default.ucd().getVersion()); output.println(GenerateData.generateDateLine()); output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)"); @@ -395,7 +393,7 @@ public class CompareProperties implements UCD_Types { for (int i = 1; i < UCD_Types.LIMIT_ENUM; ++i) { int iType = i & 0xFF00; if (iType == UCD_Types.JOINING_GROUP || iType == UCD_Types.AGE || iType == UCD_Types.COMBINING_CLASS || iType == UCD_Types.SCRIPT) continue; - UCDProperty upi = UnifiedBinaryProperty.make(i, Default.ucd); + UCDProperty upi = UnifiedBinaryProperty.make(i, Default.ucd()); if (upi == null) continue; if (!upi.isStandard()) { System.out.println("Skipping " + upi.getName() + "; not standard"); @@ -419,7 +417,7 @@ public class CompareProperties implements UCD_Types { int jType = j & 0xFF00; if (jType == UCD_Types.JOINING_GROUP || jType == UCD_Types.AGE || jType == UCD_Types.COMBINING_CLASS || jType == UCD_Types.SCRIPT || (jType == iType && jType != UCD_Types.BINARY_PROPERTIES)) continue; - UCDProperty upj = UnifiedBinaryProperty.make(j, Default.ucd); + UCDProperty upj = UnifiedBinaryProperty.make(j, Default.ucd()); if (upj == null) continue; if (!upj.isStandard()) continue; if (upj.getValueType() < UCD_Types.BINARY_PROP) continue; @@ -439,9 +437,9 @@ public class CompareProperties implements UCD_Types { int bothCount = 0, i_jPropCount = 0, j_iPropCount = 0, iCount = 0, jCount = 0; for (int cp = 0; cp <= 0x10FFFF; ++cp) { - int cat = Default.ucd.getCategory(cp); + int cat = Default.ucd().getCategory(cp); if (cat == UCD_Types.UNASSIGNED || cat == UCD_Types.PRIVATE_USE || cat == UCD_Types.SURROGATE) continue; - if (!Default.ucd.isAllocated(cp)) continue; + if (!Default.ucd().isAllocated(cp)) continue; boolean iProp = upi.hasValue(cp); boolean jProp = upj.hasValue(cp); diff --git a/tools/unicodetools/com/ibm/text/UCD/Default.java b/tools/unicodetools/com/ibm/text/UCD/Default.java index b0755ec6aca..178b1d3fb5d 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Default.java +++ b/tools/unicodetools/com/ibm/text/UCD/Default.java @@ -9,29 +9,25 @@ import java.util.TimeZone; public final class Default implements UCD_Types { private static String ucdVersion = UCD.latestVersion; - public static UCD ucd; - public static Normalizer nfc; - public static Normalizer nfd; - public static Normalizer nfkc; - public static Normalizer nfkd; - public static Normalizer[] nf = new Normalizer[4]; - - public static void ensureUCD() { - if (ucd == null) setUCD(); - } + private static UCD ucd; + private static Normalizer nfc; + private static Normalizer nfd; + private static Normalizer nfkc; + private static Normalizer nfkd; + private static Normalizer[] nf = new Normalizer[4]; public static void setUCD(String version) { - setUcdVersion(version); + ucdVersion = version; setUCD(); } - public static void setUCD() { - ucd = UCD.make(getUcdVersion()); - nfd = nf[NFD] = new Normalizer(Normalizer.NFD, getUcdVersion()); - nfc = nf[NFC] = new Normalizer(Normalizer.NFC, getUcdVersion()); - nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, getUcdVersion()); - nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, getUcdVersion()); - System.out.println("Loaded UCD" + ucd.getVersion() + " " + (new Date(ucd.getDate()))); + private static void setUCD() { + ucd = UCD.make(ucdVersion()); + nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion()); + nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion()); + nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion()); + nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion()); + System.out.println("Loaded UCD" + ucd().getVersion() + " " + (new Date(ucd().getDate()))); } static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'"); @@ -43,12 +39,34 @@ public final class Default implements UCD_Types { return myDateFormat.format(new Date()); } - public static void setUcdVersion(String ucdVersion) { - Default.ucdVersion = ucdVersion; - } - - public static String getUcdVersion() { + public static String ucdVersion() { + if (ucd() == null) setUCD(); return ucdVersion; } + public static UCD ucd() { + if (ucd() == null) setUCD(); + return ucd; + } + public static Normalizer nfc() { + if (ucd() == null) setUCD(); + return nfc; + } + public static Normalizer nfd() { + if (ucd() == null) setUCD(); + return nfd; + } + public static Normalizer nfkc() { + if (ucd() == null) setUCD(); + return nfkc; + } + public static Normalizer nfkd() { + if (ucd() == null) setUCD(); + return nfkd; + } + public static Normalizer nf(int index) { + if (ucd() == null) setUCD(); + return nf[index]; + } + } \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java index 0408e544ca0..db1f11a9ef0 100644 --- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java +++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $ -* $Date: 2004/02/06 18:30:22 $ -* $Revision: 1.23 $ +* $Date: 2004/02/07 01:01:16 $ +* $Revision: 1.24 $ * ******************************************************************************* */ @@ -32,7 +32,7 @@ public final class DerivedProperty implements UCD_Types { // ADD CONSTANT to UCD_TYPES static public UCDProperty make(int derivedPropertyID) { - return make(derivedPropertyID, Default.ucd); + return make(derivedPropertyID, Default.ucd()); } static public UCDProperty make(int derivedPropertyID, UCD ucd) { @@ -961,7 +961,6 @@ of characters, the first of which has a non-zero combining class. } public static void test() { - Default.setUCD(); /* DerivedProperty dprop = new DerivedProperty(Default.ucd); for (int j = 0; j < LIMIT; ++j) { @@ -973,9 +972,9 @@ of characters, the first of which has a non-zero combining class. for (int cp = 0xA0; cp < 0xFF; ++cp) { System.out.println(); - System.out.println(Default.ucd.getCodeAndName(cp)); + System.out.println(Default.ucd().getCodeAndName(cp)); for (int j = 0; j < DERIVED_PROPERTY_LIMIT; ++j) { - String prop = make(j, Default.ucd).getValue(cp); + String prop = make(j, Default.ucd()).getValue(cp); if (prop.length() != 0) System.out.println("\t" + prop); } } diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java index cd61eafb2c1..3cf31ed2fd8 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $ -* $Date: 2004/02/06 18:30:22 $ -* $Revision: 1.8 $ +* $Date: 2004/02/07 01:01:16 $ +* $Revision: 1.9 $ * ******************************************************************************* */ @@ -36,10 +36,10 @@ abstract public class GenerateBreakTest implements UCD_Types { public static void main(String[] args) throws IOException { System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61"); //Default.setUCD(); - new GenerateGraphemeBreakTest(Default.ucd).run(); - new GenerateWordBreakTest(Default.ucd).run(); - new GenerateLineBreakTest(Default.ucd).run(); - new GenerateSentenceBreakTest(Default.ucd).run(); + new GenerateGraphemeBreakTest(Default.ucd()).run(); + new GenerateWordBreakTest(Default.ucd()).run(); + new GenerateLineBreakTest(Default.ucd()).run(); + new GenerateSentenceBreakTest(Default.ucd()).run(); } GenerateBreakTest(UCD ucd) { diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java index 1f4a3440828..f3f4a36239c 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $ -* $Date: 2004/02/06 18:30:22 $ -* $Revision: 1.14 $ +* $Date: 2004/02/07 01:01:15 $ +* $Revision: 1.15 $ * ******************************************************************************* */ @@ -40,7 +40,6 @@ public class GenerateCaseFolding implements UCD_Types { public static void makeCaseFold(boolean normalized) throws java.io.IOException { PICK_SHORT = NF_CLOSURE = normalized; - Default.setUCD(); log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true)); @@ -142,15 +141,15 @@ public class GenerateCaseFolding implements UCD_Types { static void drawLine(PrintWriter out, int ch, String type, String result) { String comment = ""; if (COMMENT_DIFFS) { - String lower = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER); + String lower = Default.ucd().getCase(UTF16.valueOf(ch), FULL, LOWER); if (!lower.equals(result)) { - String upper = Default.ucd.getCase(UTF16.valueOf(ch), FULL, UPPER); - String lower2 = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER); + String upper = Default.ucd().getCase(UTF16.valueOf(ch), FULL, UPPER); + String lower2 = Default.ucd().getCase(UTF16.valueOf(ch), FULL, LOWER); if (lower.equals(lower2)) { comment = "[Diff " + Utility.hex(lower, " ") + "] "; } else { Utility.fixDot(); - System.out.println("PROBLEM WITH: " + Default.ucd.getCodeAndName(ch)); + System.out.println("PROBLEM WITH: " + Default.ucd().getCodeAndName(ch)); comment = "[DIFF " + Utility.hex(lower, " ") + ", " + Utility.hex(lower2, " ") + "] "; } } @@ -159,7 +158,7 @@ public class GenerateCaseFolding implements UCD_Types { out.println(Utility.hex(ch) + "; " + type + "; " + Utility.hex(result, " ") - + "; # " + comment + Default.ucd.getName(ch)); + + "; # " + comment + Default.ucd().getName(ch)); } static int probeCh = 0x01f0; @@ -175,7 +174,7 @@ public class GenerateCaseFolding implements UCD_Types { for (int ch = 0; ch <= 0x10FFFF; ++ch) { Utility.dot(ch); //if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch)); - if (!Default.ucd.isRepresented(ch)) continue; + if (!Default.ucd().isRepresented(ch)) continue; getClosure(ch, data, full, nfClose, condition); } @@ -221,13 +220,13 @@ public class GenerateCaseFolding implements UCD_Types { } Utility.fixDot(); log.println("Non-Optimal Representative " + message); - log.println(" Rep:\t" + Default.ucd.getCodeAndName(rep)); + log.println(" Rep:\t" + Default.ucd().getCodeAndName(rep)); log.println(" Set:\t" + toString(set,true, true)); } log.println(); log.println(); - log.println(rep + "\t#" + Default.ucd.getName(rep)); + log.println(rep + "\t#" + Default.ucd().getName(rep)); // Add it for all the elements of the set @@ -236,7 +235,7 @@ public class GenerateCaseFolding implements UCD_Types { String s2 = (String)it2.next(); if (s2.equals(rep)) continue; - log.println(s2 + "\t#" + Default.ucd.getName(s2)); + log.println(s2 + "\t#" + Default.ucd().getName(s2)); if (UTF16.countCodePoint(s2) == 1) { repChar.put(UTF32.getCodePointSubstring(s2,0), rep); @@ -261,13 +260,13 @@ public class GenerateCaseFolding implements UCD_Types { if (!full) result <<= 8; String low = lower(upper(s, full, condition), full, condition); if (s.equals(low)) result |= ISLOWER; - else if (PICK_SHORT && Default.nfd.normalize(s).equals(Default.nfd.normalize(low))) result |= ISLOWER; + else if (PICK_SHORT && Default.nfd().normalize(s).equals(Default.nfd().normalize(low))) result |= ISLOWER; - if (s.equals(Default.nfc.normalize(s))) result |= NFC_FORMAT; + if (s.equals(Default.nfc().normalize(s))) result |= NFC_FORMAT; if (show) { Utility.fixDot(); - System.out.println(Utility.hex(result) + ", " + Default.ucd.getCodeAndName(s)); + System.out.println(Utility.hex(result) + ", " + Default.ucd().getCodeAndName(s)); } return result; } @@ -349,10 +348,10 @@ public class GenerateCaseFolding implements UCD_Types { // do funny stuff since we can't modify set while iterating // We don't do this because if the source is not normalized, we don't want to normalize if (nfClose) { - if (add(set, Default.nfd.normalize(s), data)) continue main; - if (add(set, Default.nfc.normalize(s), data)) continue main; - if (add(set, Default.nfkd.normalize(s), data)) continue main; - if (add(set, Default.nfkc.normalize(s), data)) continue main; + if (add(set, Default.nfd().normalize(s), data)) continue main; + if (add(set, Default.nfc().normalize(s), data)) continue main; + if (add(set, Default.nfkd().normalize(s), data)) continue main; + if (add(set, Default.nfkc().normalize(s), data)) continue main; } if (add(set, lower(s, full, condition), data)) continue main; if (add(set, title(s, full, condition), data)) continue main; @@ -376,7 +375,7 @@ public class GenerateCaseFolding implements UCD_Types { return Default.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER); } */ - return Default.ucd.getCase(s, full ? FULL : SIMPLE, LOWER, condition); + return Default.ucd().getCase(s, full ? FULL : SIMPLE, LOWER, condition); } static String upper(String s, boolean full, String condition) { @@ -385,7 +384,7 @@ public class GenerateCaseFolding implements UCD_Types { return Default.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER); } */ - return Default.ucd.getCase(s, full ? FULL : SIMPLE, UPPER, condition); + return Default.ucd().getCase(s, full ? FULL : SIMPLE, UPPER, condition); } static String title(String s, boolean full, String condition) { @@ -394,7 +393,7 @@ public class GenerateCaseFolding implements UCD_Types { return Default.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE); } */ - return Default.ucd.getCase(s, full ? FULL : SIMPLE, TITLE, condition); + return Default.ucd().getCase(s, full ? FULL : SIMPLE, TITLE, condition); } static boolean add(Set set, String s, Map data) { @@ -433,7 +432,7 @@ public class GenerateCaseFolding implements UCD_Types { } first = false; if (name) { - result += Default.ucd.getCodeAndName(s2); + result += Default.ucd().getCodeAndName(s2); } else { result += Utility.hex(s2, " "); } @@ -443,12 +442,12 @@ public class GenerateCaseFolding implements UCD_Types { static boolean specialNormalizationDiffers(int ch) { if (ch == 0x00DF) return true; // es-zed - return !Default.nfkd.isNormalized(ch); + return !Default.nfkd().isNormalized(ch); } static String specialNormalization(String s) { if (s.equals("\u00DF")) return "ss"; - return Default.nfkd.normalize(s); + return Default.nfkd().normalize(s); } static boolean isExcluded(int ch) { @@ -458,14 +457,13 @@ public class GenerateCaseFolding implements UCD_Types { if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A.. if (0x20A8 <= ch && ch <= 0x217B) return true; // skip Rupee.. - byte type = Default.ucd.getDecompositionType(ch); + byte type = Default.ucd().getDecompositionType(ch); if (type == COMPAT_SQUARE) return true; //if (type == COMPAT_UNSPECIFIED) return true; return false; } static void generateSpecialCasing(boolean normalize) throws IOException { - Default.setUCD(); Map sorted = new TreeMap(); String suffix2 = ""; @@ -476,19 +474,19 @@ public class GenerateCaseFolding implements UCD_Types { for (int ch = 0; ch <= 0x10FFFF; ++ch) { Utility.dot(ch); - if (!Default.ucd.isRepresented(ch)) continue; + if (!Default.ucd().isRepresented(ch)) continue; if (!specialNormalizationDiffers(ch)) continue; - String lower = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, LOWER)); - String upper = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, UPPER)); - String title = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, TITLE)); + String lower = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, LOWER)); + String upper = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, UPPER)); + String title = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, TITLE)); String chstr = UTF16.valueOf(ch); String decomp = specialNormalization(chstr); - String flower = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, LOWER)); - String fupper = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, UPPER)); - String ftitle = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, TITLE)); + String flower = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, LOWER)); + String fupper = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, UPPER)); + String ftitle = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, TITLE)); String base = decomp; String blower = specialNormalization(lower); @@ -496,42 +494,42 @@ public class GenerateCaseFolding implements UCD_Types { String btitle = specialNormalization(title); if (true) { - flower = Default.nfc.normalize(flower); - fupper = Default.nfc.normalize(fupper); - ftitle = Default.nfc.normalize(ftitle); - base = Default.nfc.normalize(base); - blower = Default.nfc.normalize(blower); - bupper = Default.nfc.normalize(bupper); - btitle = Default.nfc.normalize(btitle); + flower = Default.nfc().normalize(flower); + fupper = Default.nfc().normalize(fupper); + ftitle = Default.nfc().normalize(ftitle); + base = Default.nfc().normalize(base); + blower = Default.nfc().normalize(blower); + bupper = Default.nfc().normalize(bupper); + btitle = Default.nfc().normalize(btitle); } if (ch == CHECK_CHAR) { - System.out.println("Code: " + Default.ucd.getCodeAndName(ch)); - System.out.println("Decomp: " + Default.ucd.getCodeAndName(decomp)); - System.out.println("Base: " + Default.ucd.getCodeAndName(base)); - System.out.println("SLower: " + Default.ucd.getCodeAndName(lower)); - System.out.println("FLower: " + Default.ucd.getCodeAndName(flower)); - System.out.println("BLower: " + Default.ucd.getCodeAndName(blower)); - System.out.println("STitle: " + Default.ucd.getCodeAndName(title)); - System.out.println("FTitle: " + Default.ucd.getCodeAndName(ftitle)); - System.out.println("BTitle: " + Default.ucd.getCodeAndName(btitle)); - System.out.println("SUpper: " + Default.ucd.getCodeAndName(upper)); - System.out.println("FUpper: " + Default.ucd.getCodeAndName(fupper)); - System.out.println("BUpper: " + Default.ucd.getCodeAndName(bupper)); + System.out.println("Code: " + Default.ucd().getCodeAndName(ch)); + System.out.println("Decomp: " + Default.ucd().getCodeAndName(decomp)); + System.out.println("Base: " + Default.ucd().getCodeAndName(base)); + System.out.println("SLower: " + Default.ucd().getCodeAndName(lower)); + System.out.println("FLower: " + Default.ucd().getCodeAndName(flower)); + System.out.println("BLower: " + Default.ucd().getCodeAndName(blower)); + System.out.println("STitle: " + Default.ucd().getCodeAndName(title)); + System.out.println("FTitle: " + Default.ucd().getCodeAndName(ftitle)); + System.out.println("BTitle: " + Default.ucd().getCodeAndName(btitle)); + System.out.println("SUpper: " + Default.ucd().getCodeAndName(upper)); + System.out.println("FUpper: " + Default.ucd().getCodeAndName(fupper)); + System.out.println("BUpper: " + Default.ucd().getCodeAndName(bupper)); } // presumably if there is a single code point, it would already be in the simple mappings if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1 && UTF16.countCodePoint(title) == 1) { - if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Default.ucd.getCodeAndName(ch)); + if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Default.ucd().getCodeAndName(ch)); continue; } // if there is no change from the base, skip if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) { - if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Default.ucd.getCodeAndName(ch)); + if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Default.ucd().getCodeAndName(ch)); continue; } @@ -544,11 +542,11 @@ public class GenerateCaseFolding implements UCD_Types { // if there are no changes from the original, or the expanded original, skip if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) { - if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Default.ucd.getCodeAndName(ch)); + if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Default.ucd().getCodeAndName(ch)); continue; } - String name = Default.ucd.getName(ch); + String name = Default.ucd().getName(ch); int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1 : ch == 0x130 ? 2 @@ -559,16 +557,16 @@ public class GenerateCaseFolding implements UCD_Types { : UTF16.countCodePoint(fupper) == 2 ? 7 : 8; - if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Default.ucd.getCodeAndName(ch)); + if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Default.ucd().getCodeAndName(ch)); // HACK boolean denormalize = !normalize && order != 6 && order != 7; String mapping = Utility.hex(ch) - + "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Default.nfd.normalize(flower) : flower) - + "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Default.nfd.normalize(ftitle) : ftitle) - + "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Default.nfd.normalize(fupper) : fupper) - + "; # " + Default.ucd.getName(ch); + + "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Default.nfd().normalize(flower) : flower) + + "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Default.nfd().normalize(ftitle) : ftitle) + + "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Default.nfd().normalize(fupper) : fupper) + + "; # " + Default.ucd().getName(ch); // special exclusions if (isExcluded(ch)) { diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java index ee6686e071f..0e80e896abf 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java,v $ -* $Date: 2002/10/05 01:28:58 $ -* $Revision: 1.1 $ +* $Date: 2004/02/07 01:01:15 $ +* $Revision: 1.2 $ * ******************************************************************************* */ @@ -24,7 +24,6 @@ abstract public class GenerateCaseTest implements UCD_Types { public static void main(String[] args) throws IOException { System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61"); - Default.setUCD(); PrintWriter out = Utility.openPrintWriter("CaseTest.txt", Utility.UTF8_WINDOWS); @@ -34,15 +33,15 @@ abstract public class GenerateCaseTest implements UCD_Types { for (int cp = 0; cp < 0x10FFFF; ++cp) { Utility.dot(cp); - if (!Default.ucd.isAllocated(cp)) continue; - if (Default.ucd.isHangulSyllable(cp)) continue; - byte cat = Default.ucd.getCategory(cp); + if (!Default.ucd().isAllocated(cp)) continue; + if (Default.ucd().isHangulSyllable(cp)) continue; + byte cat = Default.ucd().getCategory(cp); if (cp == PRIVATE_USE) continue; - String lower = Default.ucd.getCase(cp, FULL, LOWER); - String upper = Default.ucd.getCase(cp, FULL, UPPER); - String title = Default.ucd.getCase(cp, FULL, TITLE); - String fold = Default.ucd.getCase(cp, FULL, FOLD); + String lower = Default.ucd().getCase(cp, FULL, LOWER); + String upper = Default.ucd().getCase(cp, FULL, UPPER); + String title = Default.ucd().getCase(cp, FULL, TITLE); + String fold = Default.ucd().getCase(cp, FULL, FOLD); if (lower.equals(upper) && lower.equals(title) && lower.equals(fold)) continue; @@ -54,17 +53,17 @@ abstract public class GenerateCaseTest implements UCD_Types { s = s + testChar; - String s2 = Default.nfd.normalize(s); + String s2 = Default.nfd().normalize(s); - String lower1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, LOWER)); - String upper1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, UPPER)); - String title1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, TITLE)); - String fold1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, FOLD)); + String lower1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, LOWER)); + String upper1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, UPPER)); + String title1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, TITLE)); + String fold1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, FOLD)); - if (lower1.equals(Default.nfc.normalize(lower+testChar)) - && upper1.equals(Default.nfc.normalize(upper+testChar)) - && title1.equals(Default.nfc.normalize(title+testChar)) - && fold1.equals(Default.nfc.normalize(fold+testChar)) + if (lower1.equals(Default.nfc().normalize(lower+testChar)) + && upper1.equals(Default.nfc().normalize(upper+testChar)) + && title1.equals(Default.nfc().normalize(title+testChar)) + && fold1.equals(Default.nfc().normalize(fold+testChar)) ) continue; write(out, s, true); @@ -77,17 +76,17 @@ abstract public class GenerateCaseTest implements UCD_Types { static int counter = 0; static void write(PrintWriter out, String ss, boolean doComment) { - String s = Default.nfd.normalize(ss); - String lower = Default.nfc.normalize(Default.ucd.getCase(s, FULL, LOWER)); - String upper = Default.nfc.normalize(Default.ucd.getCase(s, FULL, UPPER)); - String title = Default.nfc.normalize(Default.ucd.getCase(s, FULL, TITLE)); - String fold = Default.nfc.normalize(Default.ucd.getCase(s, FULL, FOLD)); + String s = Default.nfd().normalize(ss); + String lower = Default.nfc().normalize(Default.ucd().getCase(s, FULL, LOWER)); + String upper = Default.nfc().normalize(Default.ucd().getCase(s, FULL, UPPER)); + String title = Default.nfc().normalize(Default.ucd().getCase(s, FULL, TITLE)); + String fold = Default.nfc().normalize(Default.ucd().getCase(s, FULL, FOLD)); out.println(Utility.hex(ss) + "; " + Utility.hex(lower) + "; " + Utility.hex(upper) + "; " + Utility.hex(title) + "; " + Utility.hex(fold) - + (doComment ? "\t# " + Default.ucd.getName(ss) : "") + + (doComment ? "\t# " + Default.ucd().getName(ss) : "") ); counter++; } diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java index 7179c0e0484..bec40627b4c 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $ -* $Date: 2004/02/06 18:30:21 $ -* $Revision: 1.31 $ +* $Date: 2004/02/07 01:01:15 $ +* $Revision: 1.32 $ * ******************************************************************************* */ @@ -28,35 +28,34 @@ public class GenerateData implements UCD_Types { static final String HORIZONTAL_LINE = "# ================================================"; static final void genSplit () { - Default.setUCD(); UnicodeSet split = new UnicodeSet(); UnicodeSet reordrant = new UnicodeSet( "[\u093F\u09BF\u09c7\u09c8\u0abf\u0abf\u0b47\u0bc6\u0bc7\u0bc8" + "\u0d46\u0d47\u0d48\u0dd9\u0dda\u0ddb\u1031\u17be\u17c1\u17c2\u17c3]"); UnicodeSet subjoined = new UnicodeSet(); for (int i = 0; i <= 0x10FFFF; ++i) { - if (!Default.ucd.isAssigned(i)) continue; + if (!Default.ucd().isAssigned(i)) continue; Utility.dot(i); - int cat = Default.ucd.getCategory(i); + int cat = Default.ucd().getCategory(i); if (cat != Mc && cat != Mn && cat != Me) continue; - if (Default.ucd.getName(i).indexOf("SUBJOINED") >= 0) { + if (Default.ucd().getName(i).indexOf("SUBJOINED") >= 0) { System.out.print('*'); subjoined.add(i); continue; } - String decomp = Default.nfd.normalize(i); + String decomp = Default.nfd().normalize(i); //int count = countTypes(decomp, Mc); if (UTF16.countCodePoint(decomp) > 1) split.add(i); } Utility.fixDot(); System.out.println("Split: " + split.size()); - Utility.showSetNames("", split, false, Default.ucd); + Utility.showSetNames("", split, false, Default.ucd()); System.out.println("Reordrant: " + reordrant.size()); - Utility.showSetNames("", reordrant, false, Default.ucd); + Utility.showSetNames("", reordrant, false, Default.ucd()); System.out.println("Subjoined: " + subjoined.size()); - Utility.showSetNames("", subjoined, false, Default.ucd); + Utility.showSetNames("", subjoined, false, Default.ucd()); } static int countTypes(String s, int filter) { @@ -64,7 +63,7 @@ public class GenerateData implements UCD_Types { int cp; for (int i = 0; i < s.length(); i+= UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i); - int cat = Default.ucd.getCategory(i); + int cat = Default.ucd().getCategory(i); if (cat == filter) count++; } return count; @@ -73,7 +72,7 @@ public class GenerateData implements UCD_Types { //static UnifiedBinaryProperty ubp public static void checkHoffman(String test) { - String result = Default.nfkc.normalize(test); + String result = Default.nfkc().normalize(test); System.out.println(Utility.hex(test) + " => " + Utility.hex(result)); System.out.println(); show(test, 0); @@ -85,10 +84,10 @@ public class GenerateData implements UCD_Types { int cp; for (int i = 0; i < s.length(); i += UTF32.count16(cp)) { cp = UTF32.char32At(s, i); - String cc = " " + Default.ucd.getCombiningClass(cp); + String cc = " " + Default.ucd().getCombiningClass(cp); cc = Utility.repeat(" ", 4 - cc.length()) + cc; - System.out.println(Utility.repeat(" ", indent) + Default.ucd.getCode(cp) + cc + " " + Default.ucd.getName(cp)); - String decomp = Default.nfkc.normalize(cp); + System.out.println(Utility.repeat(" ", indent) + Default.ucd().getCode(cp) + cc + " " + Default.ucd().getName(cp)); + String decomp = Default.nfkc().normalize(cp); if (!decomp.equals(UTF32.valueOf32(cp))) { show(decomp, indent + 4); } @@ -130,13 +129,13 @@ public class GenerateData implements UCD_Types { } public static String getFileSuffix(boolean withDVersion) { - return "-" + Default.ucd.getVersion() + return "-" + Default.ucd().getVersion() + ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "") + ".txt"; } public static String getHTMLFileSuffix(boolean withDVersion) { - return "-" + Default.ucd.getVersion() + return "-" + Default.ucd().getVersion() + ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "") + ".html"; } @@ -149,24 +148,24 @@ public class GenerateData implements UCD_Types { log1.println(""); PrintWriter log2 = Utility.openPrintWriter("Log2.xml", Utility.LATIN1_UNIX); - log2.println(""); + log2.println(""); for (int i = 0; i <= 0x10FFFF; ++i) { if (!target.isAllocated(i)) continue; Utility.dot(i); UData t = target.get(i, true); - UData current = Default.ucd.get(i, true); + UData current = Default.ucd().get(i, true); if (i == 0x5E) { System.out.println(target.getDecompositionTypeID(i) + ", " + Utility.hex(target.getDecompositionMapping(i))); - System.out.println(Default.ucd.getDecompositionTypeID(i) - + ", " + Utility.hex(Default.ucd.getDecompositionMapping(i))); + System.out.println(Default.ucd().getDecompositionTypeID(i) + + ", " + Utility.hex(Default.ucd().getDecompositionMapping(i))); } if (t.equals(current)) continue; // print both for comparison log1.println(t.toString(target, UData.ABBREVIATED)); - log2.println(current.toString(Default.ucd, UData.ABBREVIATED)); + log2.println(current.toString(Default.ucd(), UData.ABBREVIATED)); } log1.println(""); log2.println(""); @@ -176,7 +175,7 @@ public class GenerateData implements UCD_Types { public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException { - Default.setUCD(); + String newFile = directory + fileName + getFileSuffix(true); System.out.println("New File: " + newFile); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); @@ -186,7 +185,7 @@ public class GenerateData implements UCD_Types { doHeader(fileName + getFileSuffix(false), output, headerChoice); for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) { - UCDProperty up = DerivedProperty.make(i, Default.ucd); + UCDProperty up = DerivedProperty.make(i, Default.ucd()); if (up == null) continue; boolean keepGoing = true; if (!up.isStandard()) keepGoing = false; @@ -198,7 +197,7 @@ public class GenerateData implements UCD_Types { System.out.print('.'); output.println(HORIZONTAL_LINE); output.println(); - new DerivedPropertyLister(Default.ucd, i, output).print(); + new DerivedPropertyLister(Default.ucd(), i, output).print(); output.flush(); } output.close(); @@ -227,7 +226,7 @@ public class GenerateData implements UCD_Types { */ public static void generateCompExclusions() throws IOException { - Default.setUCD(); + String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; @@ -238,7 +237,7 @@ public class GenerateData implements UCD_Types { output.println("#"); output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table."); output.println("#"); - if (Default.ucd.getVersion().equals("3.2.0")) { + if (Default.ucd().getVersion().equals("3.2.0")) { output.println("# The format of the comments in this file has been updated since the last version,"); output.println("# CompositionExclusions-3.txt. The only substantive change to this file between that"); output.println("# version and this one is the addition of U+2ADC FORKING."); @@ -300,7 +299,7 @@ public class GenerateData implements UCD_Types { public CompLister(PrintWriter output, int type) { this.output = output; - ucdData = Default.ucd; + ucdData = Default.ucd(); oldUCD = UCD.make("3.0.0"); // showOnConsole = true; alwaysBreaks = type <= 2; // CHANGE LATER @@ -340,7 +339,7 @@ public class GenerateData implements UCD_Types { } public static void generatePropertyAliases() throws IOException { - Default.setUCD(); + String prop = ""; String propAbb = ""; String value = ""; @@ -409,7 +408,7 @@ public class GenerateData implements UCD_Types { //System.out.println("debug"); } - UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd); + UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd()); if (up == null) continue; if (!up.isStandard()) continue; @@ -449,7 +448,7 @@ public class GenerateData implements UCD_Types { if (type == SCRIPT) { - value = Default.ucd.getCase(value, FULL, TITLE); + value = Default.ucd().getCase(value, FULL, TITLE); } valueAbb = up.getValue(SHORT); @@ -513,7 +512,7 @@ public class GenerateData implements UCD_Types { UCD.BlockData blockData = new UCD.BlockData(); int blockId = 0; - while (Default.ucd.getBlockData(blockId++, blockData)) { + while (Default.ucd().getBlockData(blockId++, blockData)) { addLine(sorted, "blk", "n/a", blockData.name); } @@ -698,7 +697,7 @@ public class GenerateData implements UCD_Types { // static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1; public static String generateBat(String directory, String fileRoot, String suffix, String[] batName) throws IOException { - String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), true, true); + String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), true, true); if (mostRecent != null) { batName[0] = generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix, mostRecent, directory + fileRoot + suffix); @@ -707,7 +706,7 @@ public class GenerateData implements UCD_Types { return null; } - String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), false, true); + String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), false, true); if (lessRecent != null && !mostRecent.equals(lessRecent)) { generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix, lessRecent, directory + fileRoot + suffix); @@ -736,7 +735,7 @@ public class GenerateData implements UCD_Types { public static void generateVerticalSlice(int startEnum, int endEnum, int headerChoice, String directory, String file) throws IOException { - Default.setUCD(); + String newFile = directory + file + getFileSuffix(true); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; @@ -745,7 +744,7 @@ public class GenerateData implements UCD_Types { doHeader(file + getFileSuffix(false), output, headerChoice); int last = -1; for (int i = startEnum; i < endEnum; ++i) { - UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd); + UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd()); if (up == null) continue; if (up.skipInDerivedListing()) continue; @@ -777,7 +776,7 @@ public class GenerateData implements UCD_Types { } System.out.print("."); if (DEBUG) System.out.println(i); - new MyPropertyLister(Default.ucd, i, output).print(); + new MyPropertyLister(Default.ucd(), i, output).print(); output.flush(); } if (endEnum == LIMIT_ENUM) { @@ -791,13 +790,13 @@ public class GenerateData implements UCD_Types { Set numericValueSet = new TreeSet(); for (int i = 0; i < 0x10FFFF; ++i) { - double nv = Default.ucd.getNumericValue(i); + double nv = Default.ucd().getNumericValue(i); if (Double.isNaN(nv)) continue; numericValueSet.add(new Double(nv)); } Iterator it = numericValueSet.iterator(); while(it.hasNext()) { - new MyFloatLister(Default.ucd, ((Double)it.next()).doubleValue(), output).print(); + new MyFloatLister(Default.ucd(), ((Double)it.next()).doubleValue(), output).print(); output.println(); System.out.print("."); } @@ -810,7 +809,7 @@ public class GenerateData implements UCD_Types { } static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException { - Default.setUCD(); + String newFile = directory + fileName + getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX); String[] batName = {""}; @@ -869,8 +868,8 @@ public class GenerateData implements UCD_Types { for (int ch = 0; ch < 0x10FFFF; ++ch) { Utility.dot(ch); - if (!Default.ucd.isAssigned(ch)) continue; - if (Default.ucd.isPUA(ch)) continue; + if (!Default.ucd().isAssigned(ch)) continue; + if (Default.ucd().isPUA(ch)) continue; String cc = UTF32.valueOf32(ch); writeLine(cc,log, true); } @@ -880,9 +879,9 @@ public class GenerateData implements UCD_Types { for (int ch = 0; ch < 0x10FFFF; ++ch) { Utility.dot(ch); - if (!Default.ucd.isAssigned(ch)) continue; - if (Default.ucd.isPUA(ch)) continue; - int cc = Default.ucd.getCombiningClass(ch); + if (!Default.ucd().isAssigned(ch)) continue; + if (Default.ucd().isPUA(ch)) continue; + int cc = Default.ucd().getCombiningClass(ch); if (example[cc] == null) example[cc] = UTF32.valueOf32(ch); } @@ -896,9 +895,9 @@ public class GenerateData implements UCD_Types { for (int ch = 0; ch < 0x10FFFF; ++ch) { Utility.dot(ch); - if (!Default.ucd.isAssigned(ch)) continue; - if (Default.ucd.isPUA(ch)) continue; - short c = Default.ucd.getCombiningClass(ch); + if (!Default.ucd().isAssigned(ch)) continue; + if (Default.ucd().isPUA(ch)) continue; + short c = Default.ucd().getCombiningClass(ch); if (c == 0) continue; // add character with higher class, same class, lower class @@ -945,19 +944,19 @@ public class GenerateData implements UCD_Types { } static void writeLine(String cc, PrintWriter log, boolean check) { - String c = Default.nfc.normalize(cc); - String d = Default.nfd.normalize(cc); - String kc = Default.nfkc.normalize(cc); - String kd = Default.nfkd.normalize(cc); + String c = Default.nfc().normalize(cc); + String d = Default.nfd().normalize(cc); + String kc = Default.nfkc().normalize(cc); + String kd = Default.nfkd().normalize(cc); if (check & cc.equals(c) && cc.equals(d) && cc.equals(kc) && cc.equals(kd)) return; // consistency check - String dc = Default.nfd.normalize(c); - String dkc = Default.nfd.normalize(kc); + String dc = Default.nfd().normalize(c); + String dkc = Default.nfd().normalize(kc); if (!dc.equals(d) || !dkc.equals(kd)) { System.out.println("Danger Will Robinson!"); Normalizer.SHOW_PROGRESS = true; - d = Default.nfd.normalize(cc); + d = Default.nfd().normalize(cc); } // printout @@ -966,7 +965,7 @@ public class GenerateData implements UCD_Types { + Utility.hex(kc," ") + ";" + Utility.hex(kd," ") + "; # (" + comma(cc) + "; " + comma(c) + "; " + comma(d) + "; " + comma(kc) + "; " + comma(kd) + "; " - + ") " + Default.ucd.getName(cc)); + + ") " + Default.ucd().getName(cc)); } static StringBuffer commaResult = new StringBuffer(); @@ -977,7 +976,7 @@ public class GenerateData implements UCD_Types { int cp; for (int i = 0; i < s.length(); i += UTF32.count16(i)) { cp = UTF32.char32At(s, i); - if (Default.ucd.getCategory(cp) == Mn) commaResult.append('\u25CC'); + if (Default.ucd().getCategory(cp) == Mn) commaResult.append('\u25CC'); UTF32.append32(commaResult, cp); } return commaResult.toString(); @@ -1012,7 +1011,7 @@ public class GenerateData implements UCD_Types { static final void backwardsCompat(String directory, String filename, int[] list) throws IOException { - Default.setUCD(); + String newFile = directory + filename + getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; @@ -1025,7 +1024,7 @@ public class GenerateData implements UCD_Types { int prop = list[i]; log.println(); log.println(HORIZONTAL_LINE); - log.println("###### " + DerivedProperty.make(prop, Default.ucd).getName()); + log.println("###### " + DerivedProperty.make(prop, Default.ucd()).getName()); //log.println(); //log.println(HORIZONTAL_LINE); //new DiffPropertyLister("3.2.0", "1.1.0", log, prop).print(); @@ -1082,9 +1081,9 @@ public class GenerateData implements UCD_Types { log.println(); log.println("Cummulative differences"); - UCDProperty up = DerivedProperty.make(prop, Default.ucd); + UCDProperty up = DerivedProperty.make(prop, Default.ucd()); UnicodeSet newProp = up.getSet(); - Utility.showSetNames(log, "", cummulative.removeAll(newProp), false, false, Default.ucd); + Utility.showSetNames(log, "", cummulative.removeAll(newProp), false, false, Default.ucd()); } } finally { if (log != null) { @@ -1095,7 +1094,7 @@ public class GenerateData implements UCD_Types { } static final void generateAge(String directory, String filename) throws IOException { - Default.setUCD(); + String newFile = directory + filename + getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; @@ -1195,32 +1194,32 @@ public class GenerateData implements UCD_Types { } public static void listCombiningAccents() throws IOException { - Default.setUCD(); + PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true), Utility.LATIN1_UNIX); Set set = new TreeSet(); Set set2 = new TreeSet(); for (int i = 0; i < 0x10FFFF; ++i) { Utility.dot(i); - if (!Default.ucd.isRepresented(i)) continue; + if (!Default.ucd().isRepresented(i)) continue; - if (Default.nfd.isNormalized(i)) { - if (Default.ucd.getScript(i) == LATIN_SCRIPT) { + if (Default.nfd().isNormalized(i)) { + if (Default.ucd().getScript(i) == LATIN_SCRIPT) { int cp = i; String hex = "u" + Utility.hex(cp, 4); - set.add("# yyy $x <> \\" + hex + " ; # " + Default.ucd.getName(cp)); + set.add("# yyy $x <> \\" + hex + " ; # " + Default.ucd().getName(cp)); } continue; } - String decomp = Default.nfd.normalize(i); + String decomp = Default.nfd().normalize(i); int j; for (j = 0; j < decomp.length(); j += UTF16.getCharCount(i)) { int cp = UTF16.charAt(decomp, j); - byte cat = Default.ucd.getCategory(cp); + byte cat = Default.ucd().getCategory(cp); if (cat != Mn) continue; String hex = "u" + Utility.hex(cp, 4); - set.add("# xxx $x <> \\" + hex + " ; # " + Default.ucd.getName(cp)); + set.add("# xxx $x <> \\" + hex + " ; # " + Default.ucd().getName(cp)); } } @@ -1232,7 +1231,7 @@ public class GenerateData implements UCD_Types { } public static void listGreekVowels() throws IOException { - Default.setUCD(); + PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true), Utility.LATIN1_UNIX); Set set = new TreeSet(); Set set2 = new TreeSet(); @@ -1245,14 +1244,14 @@ public class GenerateData implements UCD_Types { for (char i = 0; i < 0xFFFF; ++i) { Utility.dot(i); - if (!Default.ucd.isRepresented(i)) continue; - if (Default.ucd.getScript(i) != GREEK_SCRIPT) continue; - String decomp = Default.nfd.normalize(i); + if (!Default.ucd().isRepresented(i)) continue; + if (Default.ucd().getScript(i) != GREEK_SCRIPT) continue; + String decomp = Default.nfd().normalize(i); if (decomp.indexOf('\u0306') >= 0) continue; // skip breve if (decomp.indexOf('\u0304') >= 0) continue; // skip macron - String comp = Default.nfc.normalize(decomp); + String comp = Default.nfc().normalize(decomp); if (!comp.equals(String.valueOf(i))) continue; // skip compats char first = decomp.charAt(0); @@ -1266,7 +1265,7 @@ public class GenerateData implements UCD_Types { for (int j = 0; j < diphthongStart.length(); ++j) { String v = diphthongStart.substring(j, j+1); char vc = v.charAt(0); - if (Default.ucd.getCategory(vc) == Ll && Default.ucd.getCategory(first) == Lu) continue; + if (Default.ucd().getCategory(vc) == Ll && Default.ucd().getCategory(first) == Lu) continue; if (etas.indexOf(vc) >= 0 && iotas.indexOf(first) >= 0) continue; set.add(new Pair(h + v + first, new Pair(v + decomp, v + i))); } @@ -1292,7 +1291,7 @@ public class GenerateData implements UCD_Types { public static void listKatakana() throws IOException { - Default.setUCD(); + for (char i = 'a'; i <= 'z'; ++i) { doKana(String.valueOf(i)); if (i == 'c') doKana("ch"); @@ -1325,18 +1324,18 @@ public class GenerateData implements UCD_Types { } public static void genTrailingZeros() { - Default.setUCD(); + UnicodeSet result = new UnicodeSet(); for (int i = 0; i < 0x10FFFF; ++i) { if ((i & 0xFFF) == 0) System.out.println("# " + i); - if (!Default.ucd.isAssigned(i)) continue; - if (Default.nfd.isNormalized(i)) continue; - String decomp = Default.nfd.normalize(i); + if (!Default.ucd().isAssigned(i)) continue; + if (Default.nfd().isNormalized(i)) continue; + String decomp = Default.nfd().normalize(i); int cp; for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) { cp = UTF16.charAt(decomp,j); if (j == 0) continue; // skip first - if (Default.ucd.getCombiningClass(cp) == 0) { + if (Default.ucd().getCombiningClass(cp) == 0) { result.add(cp); } } @@ -1349,8 +1348,8 @@ public class GenerateData implements UCD_Types { Utility.hex(start) + (start != end ? ".." + Utility.hex(end) : "") + "; " - + Default.ucd.getName(start) - + (start != end ? ".." + Default.ucd.getName(end) : "")); + + Default.ucd().getName(start) + + (start != end ? ".." + Default.ucd().getName(end) : "")); } System.out.println("TrailingZero count: " + result.size()); } diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java index 40eb77cc760..3c4fc7a9a65 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $ -* $Date: 2004/02/06 18:30:21 $ -* $Revision: 1.13 $ +* $Date: 2004/02/07 01:01:15 $ +* $Revision: 1.14 $ * ******************************************************************************* */ @@ -49,7 +49,7 @@ public final class GenerateHanTransliterator implements UCD_Types { log.println("Unihan check"); log.println(""); - BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8); + BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion(), true, Utility.UTF8); Map properties = new TreeMap(); @@ -252,7 +252,7 @@ public final class GenerateHanTransliterator implements UCD_Types { public static void main(int typeIn) { type = typeIn; - Default.setUCD(); + try { System.out.println("Starting"); System.out.println("Quoting: " + quoteNonLetters.toRules(true)); @@ -277,7 +277,7 @@ public final class GenerateHanTransliterator implements UCD_Types { break; default: throw new IllegalArgumentException("Unexpected option: must be 0..2"); } - filename += Default.ucd.getVersion() + ".txt"; + filename += Default.ucd().getVersion() + ".txt"; err = Utility.openPrintWriter("Transliterate_err.txt", Utility.UTF8_WINDOWS); log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS); @@ -325,7 +325,7 @@ public final class GenerateHanTransliterator implements UCD_Types { String def = (String) unihanMap.get(keyChar); if (!isValidPinyin(def)) { String fixedDef = fixPinyin(def); - err.println(Default.ucd.getCode(keyChar) + "\t" + keyChar + "\t" + fixedDef + "\t#" + def + err.println(Default.ucd().getCode(keyChar) + "\t" + keyChar + "\t" + fixedDef + "\t#" + def + (fixedDef.equals(def) ? " FAIL" : "")); Utility.addToSet(badPinyin, def, keyChar); } @@ -334,7 +334,7 @@ public final class GenerateHanTransliterator implements UCD_Types { String accentDef = digitPinyin_accentPinyin.transliterate(digitDef); if (!accentDef.equals(def)) { err.println("Failed Digit Pinyin: " - + Default.ucd.getCode(keyChar) + "\t" + keyChar + "\t" + + Default.ucd().getCode(keyChar) + "\t" + keyChar + "\t" + def + " => " + digitDef + " => " + accentDef); } @@ -1157,11 +1157,11 @@ U+7878 for (int i = tabPos+1; i < tabPos2; ++i) { int cp = line.charAt(i); - int script = Default.ucd.getScript(cp); + int script = Default.ucd().getScript(cp); if (script != HAN_SCRIPT) { if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT && cp != 0x30FB && cp != 0x30FC) { - System.out.println("Huh: " + Default.ucd.getCodeAndName(cp)); + System.out.println("Huh: " + Default.ucd().getCodeAndName(cp)); } continue; } @@ -1237,15 +1237,15 @@ U+7878 UnicodeSet sPinyin = new UnicodeSet(); for (int i = 0; i < 0x10FFFF; ++i) { - if (!Default.ucd.isAllocated(i)) continue; - if (Default.ucd.getScript(i) != HAN_SCRIPT) continue; + if (!Default.ucd().isAllocated(i)) continue; + if (Default.ucd().getScript(i) != HAN_SCRIPT) continue; Utility.dot(i); String ch = UTF16.valueOf(i); String pinyin = (String) unihanMap.get(ch); if (pinyin == null) { - String ch2 = Default.nfkd.normalize(ch); + String ch2 = Default.nfkd().normalize(ch); pinyin = (String) unihanMap.get(ch2); if (pinyin != null) { addCheck(ch, pinyin, "n/a"); @@ -1688,8 +1688,8 @@ Bad pinyin data: \u4E7F ? LE } static void addCheck2(String word, String definition, String line) { - definition = Default.nfc.normalize(definition); - word = Default.nfc.normalize(word); + definition = Default.nfc().normalize(definition); + word = Default.nfc().normalize(word); if (DO_SIMPLE && UTF16.countCodePoint(word) > 1) return; if (pua.containsSome(word) ) { @@ -1799,7 +1799,7 @@ Bad pinyin data: \u4E7F ? LE static void readUnihanData(String key) throws java.io.IOException { - BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8); + BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion(), true, Utility.UTF8); int count = 0; int lineCounter = 0; @@ -1892,11 +1892,11 @@ Bad pinyin data: \u4E7F ? LE definition = fixDefinition(definition, line); } definition = definition.trim(); - definition = Default.ucd.getCase(definition, FULL, LOWER); + definition = Default.ucd().getCase(definition, FULL, LOWER); if (definition.length() == 0) { Utility.fixDot(); - err.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line)); + err.println("Zero value for " + Default.ucd().getCode(cp) + " on: " + hex.transliterate(line)); } else { addCheck(UTF16.valueOf(cp), definition, line); } @@ -1914,7 +1914,7 @@ Bad pinyin data: \u4E7F ? LE definition = definition.trim(); definition = Utility.replace(definition, " ", " "); definition = Utility.replace(definition, " ", "-"); - definition = Default.ucd.getCase(definition, FULL, LOWER); + definition = Default.ucd().getCase(definition, FULL, LOWER); return definition; } diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java index dc086837af4..fbb451d2465 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java,v $ -* $Date: 2004/02/06 18:30:21 $ -* $Revision: 1.3 $ +* $Date: 2004/02/07 01:01:15 $ +* $Revision: 1.4 $ * ******************************************************************************* */ @@ -27,11 +27,11 @@ public class GenerateLineBreakTest implements UCD_Types { static final String[] hNames = {"L", "V", "T", "LV", "LVT"}; static byte getHangulType(int cp) { - if (Default.ucd.isLeadingJamo(cp)) return hL; - if (Default.ucd.isVowelJamo(cp)) return hV; - if (Default.ucd.isTrailingJamo(cp)) return hT; - if (Default.ucd.isHangulSyllable(cp)) { - if (Default.ucd.isDoubleHangul(cp)) return hLV; + if (Default.ucd().isLeadingJamo(cp)) return hL; + if (Default.ucd().isVowelJamo(cp)) return hV; + if (Default.ucd().isTrailingJamo(cp)) return hT; + if (Default.ucd().isHangulSyllable(cp)) { + if (Default.ucd().isDoubleHangul(cp)) return hLV; return hLVT; } return hNot; @@ -62,7 +62,7 @@ public class GenerateLineBreakTest implements UCD_Types { }; public static void main(String[] args) throws IOException { - Default.setUCD(); + new GenerateLineBreakTest().run(); new GenerateWordBreakTest().run(); @@ -166,7 +166,7 @@ public class GenerateLineBreakTest implements UCD_Types { byte result = getType(cp); if (result == LB_SUP) return "SUP"; if (result >= LB_LIMIT) return hNames[result - LB_LIMIT]; - return Default.ucd.getLineBreakID_fromIndex(result); + return Default.ucd().getLineBreakID_fromIndex(result); } // stuff that subclasses need to override @@ -174,7 +174,7 @@ public class GenerateLineBreakTest implements UCD_Types { if (cp > 0xFFFF) return LB_SUP; byte result = getHangulType(cp); if (result != hNot) return (byte)(result + LB_LIMIT); - return Default.ucd.getLineBreak(cp); + return Default.ucd().getLineBreak(cp); } public int getLimit() { @@ -277,7 +277,7 @@ public class GenerateLineBreakTest implements UCD_Types { } string.append(Utility.hex(cp)); - comment.append(Default.ucd.getName(cp) + " (" + getTypeID(cp) + ")"); + comment.append(Default.ucd().getName(cp) + " (" + getTypeID(cp) + ")"); status = isBreak(source, offset + UTF16.getCharCount(cp), recommended) ? BREAK : NOBREAK; string.append(' ').append(status); @@ -290,7 +290,7 @@ public class GenerateLineBreakTest implements UCD_Types { public void findSamples() { for (int i = 1; i <= 0x10FFFF; ++i) { - if (!Default.ucd.isAllocated(i)) continue; + if (!Default.ucd().isAllocated(i)) continue; if (0xD800 <= i && i <= 0xDFFF) continue; if(i == 0x1100) { System.out.print("here"); @@ -302,7 +302,7 @@ public class GenerateLineBreakTest implements UCD_Types { } for (int i = 0; i < TypeOrder.length; ++i) { String sample = samples[i]; - System.out.println(getTypeID(sample) + ":\t" + Default.ucd.getCodeAndName(sample)); + System.out.println(getTypeID(sample) + ":\t" + Default.ucd().getCodeAndName(sample)); } } @@ -684,7 +684,7 @@ public class GenerateLineBreakTest implements UCD_Types { // other properties // category based - byte cat = Default.ucd.getCategory(cp); + byte cat = Default.ucd().getCategory(cp); if (cat == Cc) return Control; if (cat == Cf) return Extend; if (((1<\n"; + table += "" + Default.ucd().getName(code) + " " + splits[1] + "
" + format.format(count[i][j]) + "
" + quote(sample[i][j]) + "