From ff537d472c8245b18dcb91876cef32f9a4157e4e Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sun, 28 Mar 2004 00:23:30 +0000 Subject: [PATCH] Fixed files screwed up in CVS failure X-SVN-Rev: 14791 --- .../ibm/icu/dev/test/util/BagFormatter.java | 180 ++++++++++++++---- .../icu/dev/test/util/ICUPropertyFactory.java | 18 +- .../icu/dev/test/util/TestBagFormatter.java | 16 +- .../ibm/icu/dev/test/util/TestUtilities.java | 92 ++++++++- .../ibm/icu/dev/test/util/UnicodeLabel.java | 3 + .../com/ibm/icu/dev/test/util/UnicodeMap.java | 75 +++++++- .../icu/dev/test/util/UnicodeProperty.java | 20 +- 7 files changed, 334 insertions(+), 70 deletions(-) diff --git a/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java b/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java index 77518de5027..3e36396b515 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java @@ -5,14 +5,15 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $ - * $Date: 2004/02/24 21:46:21 $ - * $Revision: 1.9 $ + * $Date: 2004/03/28 00:23:30 $ + * $Revision: 1.10 $ * ***************************************************************************************** */ package com.ibm.icu.dev.test.util; import com.ibm.icu.text.*; +import com.ibm.icu.lang.*; import com.ibm.icu.impl.*; import java.io.*; @@ -21,6 +22,7 @@ import java.util.*; import java.text.MessageFormat; public class BagFormatter { + static final boolean DEBUG = false; public static final Transliterator toHTML = Transliterator.createFromRules( "any-html", @@ -49,13 +51,16 @@ public class BagFormatter { private UnicodeProperty.Factory source; private UnicodeLabel nameSource; private UnicodeLabel labelSource; + private UnicodeLabel rangeBreakSource; private UnicodeLabel valueSource; private String propName = ""; private boolean showCount = true; private boolean skipNullValues = true; - private boolean suppressReserved = true; + //private boolean suppressReserved = true; private boolean hexValue = false; private static final String NULL_VALUE = "_NULL_VALUE_"; + private int fullTotal = -1; + private String lineSeparator = "\r\n"; /** * Compare two UnicodeSets, and show the differences @@ -326,10 +331,10 @@ public class BagFormatter { if (result != null) return hcp + result; if (control.contains(codePoint)) return ""; - if (private_use.contains(codePoint)) return ""; + if (private_use.contains(codePoint)) return ""; if (noncharacter.contains(codePoint)) return ""; if (surrogate.contains(codePoint)) return ""; - if (suppressReserved) return ""; + //if (suppressReserved) return ""; return hcp + ""; } @@ -434,8 +439,6 @@ public class BagFormatter { nf.setGroupingUsed(false); } - private String lineSeparator = "\r\n"; - private class MyVisitor extends Visitor { private PrintWriter output; Tabber.MonoTabber myTabber; @@ -453,12 +456,12 @@ public class BagFormatter { if (propName.length() > 0) myTabber.add(propName.length() + 2,Tabber.LEFT); valueSize = getValueSource().getMaxWidth(shortValue); - System.out.println("ValueSize: " + valueSize); + if (DEBUG) System.out.println("ValueSize: " + valueSize); if (valueSize > 0) myTabber.add(valueSize + 2,Tabber.LEFT); // value myTabber.add(3,Tabber.LEFT); // comment character - labelSize = getLabelSource().getMaxWidth(shortLabel); + labelSize = getLabelSource(true).getMaxWidth(shortLabel); if (labelSize > 0) myTabber.add(labelSize + 1,Tabber.LEFT); // value if (mergeRanges && showCount) myTabber.add(5,Tabber.RIGHT); @@ -467,11 +470,12 @@ public class BagFormatter { //myTabber.add(7,Tabber.LEFT); commentSeparator = (showCount || showLiteral != null - || getLabelSource() != UnicodeProperty.NULL || getNameSource() != UnicodeProperty.NULL) + || getLabelSource(true) != UnicodeProperty.NULL + || getNameSource() != UnicodeProperty.NULL) ? "\t #" : ""; - System.out.println("Tabber: " + myTabber.toString()); - System.out.println("Tabber: " + myTabber.process("a\tb\td\td\tf\tg\th")); + if (DEBUG) System.out.println("Tabber: " + myTabber.toString()); + if (DEBUG) System.out.println("Tabber: " + myTabber.process("a\tb\td\td\tf\tg\th")); doAt(c); } @@ -487,7 +491,7 @@ public class BagFormatter { protected void doBefore(Object container, Object o) { if (showSetAlso && container instanceof UnicodeSet) { - output.print("#" + container + lineSeparator); + output.print("#" + container + lineSeparator ); } } @@ -495,7 +499,15 @@ public class BagFormatter { } protected void doAfter(Object container, Object o) { - output.print(lineSeparator + "# Total code points: " + nf.format(counter)); + if (fullTotal != -1 && fullTotal != counter) { + output.print(lineSeparator); + output.print("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here." + lineSeparator); + output.print("# Total code points: " + nf.format(fullTotal) + lineSeparator); + fullTotal = -1; + } else { + output.print(lineSeparator); + output.print("# Total code points: " + nf.format(counter) + lineSeparator); + } } protected void doSimpleAt(Object o) { @@ -520,7 +532,7 @@ public class BagFormatter { + insertLiteral(thing) + "\t" + getName(thing)) - + lineSeparator); + + lineSeparator ); counter++; } } @@ -528,19 +540,19 @@ public class BagFormatter { protected void doAt(Visitor.CodePointRange usi) { if (!mergeRanges) { for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) { - String label = getLabelSource().getValue(cp, shortLabel); - String value = getValue(cp, shortValue); - showLine(cp, cp, label, value); + showLine(cp, cp); } } else { rf.reset(usi.codepoint, usi.codepointEnd + 1); while (rf.next()) { - showLine(rf.start, rf.limit - 1, rf.label, rf.value); + showLine(rf.start, rf.limit - 1); } } } - private void showLine(int start, int end, String label, String value) { + private void showLine(int start, int end) { + String label = getLabelSource(true).getValue(start, shortLabel); + String value = getValue(start, shortValue); if (value == NULL_VALUE) return; counter += end - start + 1; @@ -556,7 +568,7 @@ public class BagFormatter { if (labelSize > 0) { label = "\t" + label; } else if (label.length() > 0) { - throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource().getMaxWidth(shortLabel)); + throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel)); } String count = ""; @@ -575,7 +587,7 @@ public class BagFormatter { + count + insertLiteral(start, end) + getName("\t ", start, end)) - + lineSeparator); + + lineSeparator ); } private String insertLiteral(String thing) { @@ -652,7 +664,7 @@ public class BagFormatter { private class RangeFinder { int start, limit; private int veryLimit; - String label, value; + //String label, value; void reset(int start, int limit) { this.limit = start; this.veryLimit = limit; @@ -661,13 +673,17 @@ public class BagFormatter { if (limit >= veryLimit) return false; start = limit; // set to end of last - label = getLabelSource().getValue(limit, shortLabel); - value = getValue(limit, shortLabel); + String label = getLabelSource(false).getValue(limit, true); + String value = getValue(limit, true); + String breaker = getRangeBreakSource().getValue(limit,true); + if (DEBUG && limit < 0x7F) System.out.println("Label: " + label + ", Value: " + value + ", Break: " + breaker); limit++; for (; limit < veryLimit; limit++) { - String s = getLabelSource().getValue(limit, shortLabel); - String v = getValue(limit, shortLabel); - if (!equalTo(s, label) || !equalTo(v, value)) break; + String s = getLabelSource(false).getValue(limit, true); + String v = getValue(limit, true); + String b = getRangeBreakSource().getValue(limit, true); + if (DEBUG && limit < 0x7F) System.out.println("*Label: " + label + ", Value: " + value + ", Break: " + breaker); + if (!equalTo(s, label) || !equalTo(v, value) || !equalTo(b, breaker)) break; } // at this point, limit is the first item that has a different label than source // OR, we got to the end, and limit == veryLimit @@ -711,7 +727,7 @@ public class BagFormatter { return this; } - public UnicodeLabel getLabelSource() { + public UnicodeLabel getLabelSource(boolean visible) { if (labelSource == null) { Map labelMap = new HashMap(); //labelMap.put("Lo","L&"); @@ -820,19 +836,6 @@ public class BagFormatter { if (o != null) output.append(o.toString()); } } - /** - * @return - */ - public String getLineSeparator() { - return lineSeparator; - } - - /** - * @param string - */ - public void setLineSeparator(String string) { - lineSeparator = string; - } /** * @param label @@ -936,4 +939,97 @@ public class BagFormatter { return this; } + /** + * @return + */ + public int getFullTotal() { + return fullTotal; + } + + /** + * @param i + */ + public BagFormatter setFullTotal(int i) { + fullTotal = i; + return this; + } + + /** + * @return + */ + public String getLineSeparator() { + return lineSeparator; + } + + /** + * @param string + */ + public BagFormatter setLineSeparator(String string) { + lineSeparator = string; + return this; + } + + /** + * @return + */ + public UnicodeLabel getRangeBreakSource() { + if (rangeBreakSource == null) { + Map labelMap = new HashMap(); + // reflects the code point types on p 25 + labelMap.put("Lo", "G&"); + labelMap.put("Lm", "G&"); + labelMap.put("Lu", "G&"); + labelMap.put("Lt", "G&"); + labelMap.put("Ll", "G&"); + labelMap.put("Mn", "G&"); + labelMap.put("Me", "G&"); + labelMap.put("Mc", "G&"); + labelMap.put("Nd", "G&"); + labelMap.put("Nl", "G&"); + labelMap.put("No", "G&"); + labelMap.put("Zs", "G&"); + labelMap.put("Pd", "G&"); + labelMap.put("Ps", "G&"); + labelMap.put("Pe", "G&"); + labelMap.put("Pc", "G&"); + labelMap.put("Po", "G&"); + labelMap.put("Pi", "G&"); + labelMap.put("Pf", "G&"); + labelMap.put("Sm", "G&"); + labelMap.put("Sc", "G&"); + labelMap.put("Sk", "G&"); + labelMap.put("So", "G&"); + + labelMap.put("Zl", "Cf"); + labelMap.put("Zp", "Cf"); + + rangeBreakSource = + new UnicodeProperty + .FilteredProperty( + getUnicodePropertyFactory().getProperty( + "General_Category"), + new UnicodeProperty.MapFilter(labelMap)) + .setAllowValueAliasCollisions(true); + + /* + "Cn", // = Other, Not Assigned 0 + "Cc", // = Other, Control 15 + "Cf", // = Other, Format 16 + UnicodeProperty.UNUSED, // missing + "Co", // = Other, Private Use 18 + "Cs", // = Other, Surrogate 19 + */ + } + return rangeBreakSource; + } + + /** + * @param label + */ + public BagFormatter setRangeBreakSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + rangeBreakSource = label; + return this; + } + } \ No newline at end of file diff --git a/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java b/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java index faa47d780e2..dd4a32e3e64 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java @@ -6,26 +6,34 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java,v $ - * $Date: 2004/02/25 01:44:45 $ - * $Revision: 1.4 $ + * $Date: 2004/03/28 00:23:30 $ + * $Revision: 1.5 $ * ***************************************************************************************** */ package com.ibm.icu.dev.test.util; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; -import java.util.List; +import java.util.Set; import java.util.Locale; import java.util.Map; +import java.util.List; +import java.util.Arrays; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; +import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.Normalizer; import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; import com.ibm.icu.util.VersionInfo; diff --git a/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java b/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java index 7555876508f..8fcd1c19882 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java,v $ - * $Date: 2004/02/25 01:44:45 $ - * $Revision: 1.10 $ + * $Date: 2004/03/28 00:23:30 $ + * $Revision: 1.11 $ * ***************************************************************************************** */ @@ -14,17 +14,19 @@ package com.ibm.icu.dev.test.util; // TODO integrate this into the test framework +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.TreeSet; +import java.util.Iterator; import java.io.IOException; import java.io.PrintWriter; import java.text.Collator; -import java.util.Comparator; -import java.util.Iterator; import java.util.Locale; import java.util.Set; -import java.util.TreeSet; -import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UScript; +import com.ibm.icu.lang.UProperty; import com.ibm.icu.text.Transliterator; import com.ibm.icu.text.UnicodeSet; @@ -107,7 +109,7 @@ public class TestBagFormatter { UnicodeProperty.Factory ups = ICUPropertyFactory.make(); us = ups.getSet("gc=mn", null, null); BagFormatter.CONSOLE.println("gc=mn"); - bf.showSetNames(BagFormatter.CONSOLE, us); + bf.showSetNames(bf.CONSOLE, us); if (true) return; //showNames("Name", ".*MARK.*"); diff --git a/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java b/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java index 913ce60a000..10e4dff51e1 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java @@ -1,7 +1,11 @@ package com.ibm.icu.dev.test.util; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; import java.text.NumberFormat; import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -12,6 +16,7 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import com.ibm.icu.dev.test.AbstractTestLog; import com.ibm.icu.dev.test.TestBoilerplate; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.impl.Utility; @@ -32,7 +37,7 @@ public class TestUtilities extends TestFmwk { UnicodeMap map1 = new UnicodeMap(); Map map2 = new HashMap(); Map map3 = new TreeMap(); - UnicodeMap.Equator equator = new UnicodeMap.SimpleEquator(); + UnicodeMap.Equator equator = UnicodeMap.SIMPLE_EQUATOR; SortedSet log = new TreeSet(); static String[] TEST_VALUES = {null, "A", "B", "C", "D", "E", "F"}; static Random random = new Random(12345); @@ -54,16 +59,21 @@ public class TestUtilities extends TestFmwk { map2.put(new Integer(start), value); check(counter); } + checkNext(LIMIT); + logln("Setting General Category"); map1 = new UnicodeMap(); - map2 = new HashMap(); - for (int cp = 0; cp < SET_LIMIT; ++cp) { + map2 = new TreeMap(); + for (int cp = 0; cp <= SET_LIMIT; ++cp) { int enumValue = UCharacter.getIntPropertyValue(cp, propEnum); //if (enumValue <= 0) continue; // for smaller set String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG); map1.put(cp, value); map2.put(new Integer(cp), value); - } + } + checkNext(Integer.MAX_VALUE); + + logln("Comparing General Category"); check(-1); logln("Comparing Values"); @@ -82,7 +92,7 @@ public class TestUtilities extends TestFmwk { if (!TestBoilerplate.verifySetsIdentical(this, set1, set2)) { throw new IllegalArgumentException("Halting"); } - } + } // check boilerplate List argList = new ArrayList(); @@ -95,6 +105,34 @@ public class TestUtilities extends TestFmwk { // TODO: the following is not being reached new UnicodeSetBoilerplate().run(args); } + + private void checkNext(int limit) { + logln("Comparing nextRange"); + UnicodeMap.MapIterator mi = new UnicodeMap.MapIterator(map1); + Map map3 = new TreeMap(); + while (mi.nextRange()) { + //System.out.println(Utility.hex(mi.codepoint) + ".." + Utility.hex(mi.codepointEnd) + " => " + mi.value); + for (int i = mi.codepoint; i <= mi.codepointEnd; ++i) { + if (i >= limit) continue; + map3.put(new Integer(i), mi.value); + } + } + checkMap(map2, map3); + + logln("Comparing next"); + mi.reset(); + map3 = new TreeMap(); + Object lastValue = new Object(); + while (mi.next()) { + if (!UnicodeMap.SIMPLE_EQUATOR.isEqual(lastValue, mi.value)) { + // System.out.println("Change: " + Utility.hex(mi.codepoint) + " => " + mi.value); + lastValue = mi.value; + } + if (mi.codepoint >= limit) continue; + map3.put(new Integer(mi.codepoint), mi.value); + } + checkMap(map2, map3); + } public void check(int counter) { for (int i = 0; i < LIMIT; ++i) { @@ -111,6 +149,48 @@ public class TestUtilities extends TestFmwk { } } + void checkMap(Map m1, Map m2) { + if (m1.equals(m2)) return; + StringBuffer buffer = new StringBuffer(); + Set m1entries = m1.entrySet(); + Set m2entries = m2.entrySet(); + getEntries("\r\nIn First, and not Second", m1entries, m2entries, buffer, 20); + getEntries("\r\nIn Second, and not First", m2entries, m1entries, buffer, 20); + errln(buffer.toString()); + } + + static Comparator ENTRY_COMPARATOR = new Comparator() { + public int compare(Object o1, Object o2) { + if (o1 == o2) return 0; + if (o1 == null) return -1; + if (o2 == null) return 1; + Map.Entry a = (Map.Entry) o1; + Map.Entry b = (Map.Entry) o2; + int result = compare2(a.getKey(), b.getKey()); + if (result != 0) return result; + return compare2(a.getValue(), b.getValue()); + } + private int compare2(Object o1, Object o2) { + if (o1 == o2) return 0; + if (o1 == null) return -1; + if (o2 == null) return 1; + return ((Comparable)o1).compareTo(o2); + } + }; + + private void getEntries(String title, Set m1entries, Set m2entries, StringBuffer buffer, int limit) { + Set m1_m2 = new TreeSet(ENTRY_COMPARATOR); + m1_m2.addAll(m1entries); + m1_m2.removeAll(m2entries); + buffer.append(title + ": " + m1_m2.size() + "\r\n"); + for (Iterator it = m1_m2.iterator(); it.hasNext();) { + if (limit-- < 0) return; + Map.Entry entry = (Map.Entry) it.next(); + buffer.append(entry.getKey()).append(" => ") + .append(entry.getValue()).append("\r\n"); + } + } + static final int SET_LIMIT = 0x10FFFF; static final int CHECK_LIMIT = 0xFFFF; static final NumberFormat pf = NumberFormat.getPercentInstance(); @@ -148,7 +228,7 @@ public class TestUtilities extends TestFmwk { System.gc(); double start = System.currentTimeMillis(); for (int j = 0; j < iterations; ++j) - for (int cp = 0; cp < SET_LIMIT; ++cp) { + for (int cp = 0; cp <= SET_LIMIT; ++cp) { int enumValue = UCharacter.getIntPropertyValue(cp, propEnum); if (enumValue <= 0) continue; // for smaller set String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG); diff --git a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeLabel.java b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeLabel.java index cddc87af835..8d319190eb5 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeLabel.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeLabel.java @@ -4,7 +4,9 @@ import com.ibm.icu.impl.Utility; import com.ibm.icu.text.UTF16; public abstract class UnicodeLabel { + public abstract String getValue(int codepoint, boolean isShort); + public String getValue(String s, String separator, boolean withCodePoint) { if (s.length() == 1) { // optimize simple case return getValue(s.charAt(0), withCodePoint); @@ -18,6 +20,7 @@ public abstract class UnicodeLabel { } return sb.toString(); } + public int getMaxWidth(boolean isShort) { return 0; } diff --git a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java index 5c439d774b7..641cd2ebcb9 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java @@ -3,6 +3,8 @@ package com.ibm.icu.dev.test.util; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.TreeSet; @@ -24,11 +26,17 @@ public final class UnicodeMap implements Cloneable { private int length = 2; private int[] transitions = {0,0x110000,0,0,0,0,0,0,0,0}; private Object[] values = new Object[10]; - { - values[1] = "TERMINAL"; // just for debugging - } + private int lastIndex = 0; + public UnicodeMap(Equator equator) { + this.equator = equator; + } + + public UnicodeMap() { + this(SIMPLE_EQUATOR); + } + /* Boilerplate */ public boolean equals(Object other) { if (other == null) return false; @@ -98,7 +106,9 @@ public final class UnicodeMap implements Cloneable { public interface Equator { /** * Comparator function. If overridden, must handle case of null, - * and compare any two objects in the array + * and compare any two objects that could be compared. + * Must obey normal rules of symmetry: a=b => b=a + * and transitivity: a=b & b=c => a=b) * @param a * @param b * @return @@ -106,13 +116,14 @@ public final class UnicodeMap implements Cloneable { public boolean isEqual(Object a, Object b); /** + * Must obey normal rules: a=b => getHashCode(a)=getHashCode(b) * @param object * @return */ public int getHashCode(Object object); } - public static final class SimpleEquator implements Equator { + private static final class SimpleEquator implements Equator { public boolean isEqual(Object a, Object b) { if (a == b) return true; if (a == null || b == null) return false; @@ -123,8 +134,8 @@ public final class UnicodeMap implements Cloneable { return a.hashCode(); } } - private static Equator SIMPLE = new SimpleEquator(); - private Equator equator = SIMPLE; + public static Equator SIMPLE_EQUATOR = new SimpleEquator(); + private Equator equator = SIMPLE_EQUATOR; /** * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1] @@ -442,6 +453,56 @@ public final class UnicodeMap implements Cloneable { return values[_findIndex(codepoint)]; } + /** + * Follow the style used by UnicodeSetIterator + */ + public static class MapIterator { + public int codepoint; + public int codepointEnd; + public Object value; + + private UnicodeMap map; + private int index; + private int startRange; + private int endRange; + private Object lastValue; + + public MapIterator(UnicodeMap map) { + reset(map); + } + // note: length of 2 means {0, 110000}. Only want to index up to 0! + public boolean nextRange() { + if (index < 0 || index >= map.length - 1) return false; + value = map.values[index]; + codepoint = startRange = map.transitions[index++]; + codepointEnd = endRange = map.transitions[index] - 1; // -1 to make limit into end + return true; + } + public boolean next() { + if (startRange > endRange) { + //System.out.println("***" + Utility.hex(startRange) + ".." + Utility.hex(endRange)); + if (!nextRange()) return false; + // index now points AFTER the start of the range + lastValue = map.values[index-1]; + //System.out.println("***" + Utility.hex(codepoint) + ".." + Utility.hex(codepointEnd) + " => " + lastValue); + } + value = lastValue; + codepoint = codepointEnd = startRange++; // set to first, and iterate + return true; + } + + public MapIterator reset() { + index = 0; + startRange = 0; + endRange = -1; + return this; + } + public MapIterator reset(UnicodeMap map) { + this.map = map; + return reset(); + } + } + public String toString() { return toString(null); } diff --git a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java index 75c2db45236..4d43aff6dda 100644 --- a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java +++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java @@ -3,13 +3,18 @@ package com.ibm.icu.dev.test.util; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.TreeSet; + +import sun.io.UnknownCharacterException; import com.ibm.icu.impl.Utility; import com.ibm.icu.text.UTF16; @@ -18,7 +23,7 @@ import com.ibm.icu.text.UnicodeSetIterator; public abstract class UnicodeProperty extends UnicodeLabel { - public static boolean DEBUG = true; + public static boolean DEBUG = false; public static String CHECK_NAME = "FC_NFKC_Closure"; public static int CHECK_VALUE = 0x037A; @@ -221,6 +226,7 @@ public abstract class UnicodeProperty extends UnicodeLabel { return getSet(propAndValue, null, null); } } + public static class FilteredProperty extends UnicodeProperty { private UnicodeProperty property; @@ -458,6 +464,14 @@ public abstract class UnicodeProperty extends UnicodeLabel { return version; } } + + public static class UnicodeMapProperty extends SimpleProperty { + private UnicodeMap unicodeMap; + protected String _getValue(int codepoint) { + return (String) unicodeMap.getValue(codepoint); + } + } + public final String getValue(int codepoint, boolean getShortest) { String result = getValue(codepoint); @@ -533,7 +547,7 @@ public abstract class UnicodeProperty extends UnicodeLabel { if (isType(STRING_OR_MISC_MASK)) { for (int i = 0; i <= 0x10FFFF; ++i) { String value = getValue(i); - if (matcher.matches(value)) { + if (value != null && matcher.matches(value)) { result.add(i); } } @@ -655,7 +669,7 @@ public abstract class UnicodeProperty extends UnicodeLabel { // we can do this with char, since no surrogates are involved for (int i = 0; i < source.length(); ++i) { char ch = source.charAt(i); - if (ch == '_' || ch == ' ' || ch == '-') { + if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) { gotOne = true; } else { char ch2 = Character.toLowerCase(ch);