performance fixes for UnicodeMap

X-SVN-Rev: 14493
This commit is contained in:
Mark Davis 2004-02-12 08:11:32 +00:00
parent ed21be41c5
commit 73529e8ad9
3 changed files with 239 additions and 16 deletions

View file

@ -0,0 +1,219 @@
package com.ibm.icu.dev.test.util;
import java.text.NumberFormat;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.UnicodeSet;
public class TestUtilities extends TestFmwk {
static final int LIMIT = 0x15; // limit to make testing more realistic in terms of collisions
static final int ITERATIONS = 1000000;
static final boolean SHOW_PROGRESS = false;
static final boolean DEBUG = false;
public static void main(String[] args) throws Exception {
new TestUtilities().run(args);
}
UnicodeMap map1 = new UnicodeMap();
Map map2 = new HashMap();
Map map3 = new TreeMap();
UnicodeMap.Equator equator = new UnicodeMap.SimpleEquator();
SortedSet log = new TreeSet();
public void TestUnicodeMap() {
Random random = new Random(12345);
String[] values = {null, "A", "B", "C", "D", "E", "F"};
// do random change to both, then compare
logln("Comparing against HashMap");
for (int counter = 0; counter < ITERATIONS; ++counter) {
int start = random.nextInt(LIMIT);
String value = values[random.nextInt(values.length)];
String logline = Utility.hex(start) + "\t" + value;
if (SHOW_PROGRESS) logln(counter + "\t" + logline);
log.add(logline);
if (DEBUG && counter == 144) {
System.out.println(" debug");
}
map1.put(start, value);
map2.put(new Integer(start), value);
check(counter);
}
logln("Setting General Category");
map1 = new UnicodeMap();
map2 = new HashMap();
for (int cp = 0; cp < SET_LIMIT; ++cp) {
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
//if (enumValue <= 0) continue; // for smaller set
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
map1.put(cp, value);
map2.put(new Integer(cp), value);
}
logln("Comparing General Category");
check(-1);
logln("Comparing Values");
Set values1 = (Set) map1.getAvailableValues(new TreeSet());
Set values2 = new TreeSet(map2.values());
Set temp;
if (!values1.equals(values2)) {
errln("Values differ:");
errln("UnicodeMap - HashMap");
temp = new TreeSet(values1);
temp.removeAll(values2);
errln(show(temp));
errln("HashMap - UnicodeMap");
temp = new TreeSet(values2);
temp.removeAll(values1);
errln(show(temp));
} else {
logln("Comparing Sets");
for (Iterator it = values1.iterator(); it.hasNext();) {
Object value = it.next();
logln(value == null ? "null" : value.toString());
UnicodeSet set1 = map1.getSet(value);
UnicodeSet set2 = getSet(map2, value);
if (!set1.equals(set2)) {
errln("Sets differ:");
errln("UnicodeMap - HashMap");
errln(new UnicodeSet(set1).removeAll(set2).toPattern(true));
errln("HashMap - UnicodeMap");
errln(new UnicodeSet(set2).removeAll(set1).toPattern(true));
}
}
}
}
public void check(int counter) {
for (int i = 0; i < LIMIT; ++i) {
Object value1 = map1.getValue(i);
Object value2 = map2.get(new Integer(i));
if (!equator.isEqual(value1, value2)) {
errln(counter + " Difference at " + Utility.hex(i)
+ "\t UnicodeMap: " + value1
+ "\t HashMap: " + value2);
errln("UnicodeMap: " + map1);
errln("Log: " + show(log));
errln("HashMap: " + show(map2));
throw new IllegalArgumentException("Halting");
}
}
}
static final int SET_LIMIT = 0x10FFFF;
static final int CHECK_LIMIT = 0xFFFF;
static final NumberFormat pf = NumberFormat.getPercentInstance();
static final NumberFormat nf = NumberFormat.getInstance();
public void TestTime() {
double hashTime, umTime, icuTime, treeTime;
umTime = checkSetTime(20, 0);
hashTime = checkSetTime(20, 1);
logln("Percentage: " + pf.format(hashTime/umTime));
treeTime = checkSetTime(20, 3);
logln("Percentage: " + pf.format(treeTime/umTime));
//logln(map1.toString());
umTime = checkGetTime(1000, 0);
hashTime = checkGetTime(1000, 1);
logln("Percentage: " + pf.format(hashTime/umTime));
icuTime = checkGetTime(1000, 2);
logln("Percentage: " + pf.format(icuTime/umTime));
treeTime = checkGetTime(1000, 3);
logln("Percentage: " + pf.format(treeTime/umTime));
}
int propEnum = UProperty.GENERAL_CATEGORY;
double checkSetTime(int iterations, int type) {
_checkSetTime(1,type);
double result = _checkSetTime(iterations, type);
logln((type == 0 ? "UnicodeMap" : type == 1 ? "HashMap" : type == 2 ? "ICU" : "TreeMap") + "\t" + nf.format(result));
return result;
}
double _checkSetTime(int iterations, int type) {
map1 = new UnicodeMap();
map2 = new HashMap();
System.gc();
double start = System.currentTimeMillis();
for (int j = 0; j < iterations; ++j)
for (int cp = 0; cp < SET_LIMIT; ++cp) {
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
if (enumValue <= 0) continue; // for smaller set
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
switch(type) {
case 0: map1.put(cp, value); break;
case 1: map2.put(new Integer(cp), value); break;
case 3: map3.put(new Integer(cp), value); break;
}
}
double end = System.currentTimeMillis();
return (end-start)/1000/iterations;
}
double checkGetTime(int iterations, int type) {
_checkGetTime(1,type);
double result = _checkGetTime(iterations, type);
logln((type == 0 ? "UnicodeMap" : type == 1 ? "HashMap" : type == 2 ? "ICU" : "TreeMap") + "\t" + nf.format(result));
return result;
}
double _checkGetTime(int iterations, int type) {
System.gc();
double start = System.currentTimeMillis();
for (int j = 0; j < iterations; ++j)
for (int cp = 0; cp < CHECK_LIMIT; ++cp) {
switch (type) {
case 0: map1.getValue(cp); break;
case 1: map2.get(new Integer(cp)); break;
case 2:
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
//if (enumValue <= 0) continue;
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
break;
case 3: map3.get(new Integer(cp)); break;
}
}
double end = System.currentTimeMillis();
return (end-start)/1000/iterations;
}
String show(Collection c) {
StringBuffer buffer = new StringBuffer();
for (Iterator it = c.iterator(); it.hasNext();) {
buffer.append(it.next() + "\r\n");
}
return buffer.toString();
}
String show(Map m) {
StringBuffer buffer = new StringBuffer();
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
Object key = it.next();
buffer.append(key + "=>" + m.get(key) + "\r\n");
}
return buffer.toString();
}
UnicodeSet getSet(Map m, Object value) {
UnicodeSet result = new UnicodeSet();
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
Object key = it.next();
Object val = m.get(key);
if (!val.equals(value)) continue;
result.add(((Integer)key).intValue());
}
return result;
}
}

View file

@ -79,7 +79,7 @@ public final class UnicodeMap {
* @param codepoint
* @return
*/
private int findIndex(int c) {
private int _findIndex(int c) {
int lo = 0;
int hi = length - 1;
int i = (lo + hi) >>> 1;
@ -98,7 +98,7 @@ public final class UnicodeMap {
}
private void _checkFind(int codepoint, int value) {
int other = _findIndex(codepoint);
int other = __findIndex(codepoint);
if (other != value) {
throw new IllegalArgumentException("Invariant failed: binary search"
+ "\t" + Utility.hex(codepoint) + ": " + value
@ -106,7 +106,7 @@ public final class UnicodeMap {
}
}
private int _findIndex(int codepoint) {
private int __findIndex(int codepoint) {
// TODO use binary search
for (int i = length-1; i > 0; --i) {
if (transitions[i] <= codepoint) return i;
@ -140,7 +140,7 @@ public final class UnicodeMap {
* @param index
* @param count
*/
private void removeAt(int index, int count) {
private void _removeAt(int index, int count) {
for (int i = index + count; i < length; ++i) {
transitions[i-count] = transitions[i];
values[i-count] = values[i];
@ -154,7 +154,7 @@ public final class UnicodeMap {
* @param index
* @param count
*/
private void insertGapAt(int index, int count) {
private void _insertGapAt(int index, int count) {
int newLength = length + count;
int[] oldtransitions = transitions;
Object[] oldvalues = values;
@ -181,7 +181,7 @@ public final class UnicodeMap {
* @return this, for chaining
*/
private UnicodeMap _put(int codepoint, Object value) {
int baseIndex = findIndex(codepoint);
int baseIndex = _findIndex(codepoint);
int limitIndex = baseIndex + 1;
// cases are (a) value is already set
if (equator.isEqual(values[baseIndex], value)) return this;
@ -199,13 +199,13 @@ public final class UnicodeMap {
// A1a connects with previous & following, so remove index
if (connectsWithPrevious) {
if (connectsWithFollowing) {
removeAt(baseIndex, 2);
_removeAt(baseIndex, 2);
return this;
}
removeAt(baseIndex, 1); // extend previous
_removeAt(baseIndex, 1); // extend previous
return this;
} else if (connectsWithFollowing) {
removeAt(baseIndex, 1); // extend following backwards
_removeAt(baseIndex, 1); // extend following backwards
transitions[baseIndex] = codepoint;
return this;
}
@ -220,7 +220,7 @@ public final class UnicodeMap {
} else {
// otherwise insert new transition
transitions[baseIndex] = codepoint+1; // fix following range
insertGapAt(baseIndex, 1);
_insertGapAt(baseIndex, 1);
values[baseIndex] = value;
transitions[baseIndex] = codepoint;
}
@ -236,14 +236,14 @@ public final class UnicodeMap {
--transitions[limitIndex];
return this;
} else {
insertGapAt(limitIndex, 1);
_insertGapAt(limitIndex, 1);
transitions[limitIndex] = codepoint;
values[limitIndex] = value;
}
return this;
}
// CASE: in middle of range
insertGapAt(++baseIndex,2);
_insertGapAt(++baseIndex,2);
transitions[baseIndex] = codepoint;
values[baseIndex] = value;
transitions[++baseIndex] = codepoint + 1;
@ -330,11 +330,14 @@ public final class UnicodeMap {
*/
public UnicodeSet getSet(Object value, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
for (int i = 0; i < length; ++i) {
if (values[i] == value) result.add(transitions[i], transitions[i+1]);
for (int i = 0; i < length - 1; ++i) {
if (values[i] == value) result.add(transitions[i], transitions[i+1]-1);
}
return result;
}
public UnicodeSet getSet(Object value) {
return getSet(value,null);
}
/**
* Returns the list of possible values. Deposits into
* result if it is not null. Remember to clear if you just want
@ -343,7 +346,7 @@ public final class UnicodeMap {
*/
public Collection getAvailableValues(Collection result) {
if (result == null) result = new HashSet();
for (int i = 0; i < length; ++i) {
for (int i = 0; i < length - 1; ++i) {
Object value = values[i];
if (value == null) continue;
if (result.contains(value)) continue;
@ -361,7 +364,7 @@ public final class UnicodeMap {
if (codepoint < 0 || codepoint > 0x10FFFF) {
throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
}
return values[findIndex(codepoint)];
return values[_findIndex(codepoint)];
}
public String toString() {

View file

@ -479,6 +479,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
Iterator it2 = getValueAliases(value,temp).iterator();
while (it2.hasNext()) {
String value2 = (String)it2.next();
System.out.println("Values:" + value2);
if (matcher.matches(value2)
|| matcher.matches(toSkeleton(value2))) {
cacheValueToSet.getSet(value, result);