mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
performance fixes for UnicodeMap
X-SVN-Rev: 14493
This commit is contained in:
parent
ed21be41c5
commit
73529e8ad9
3 changed files with 239 additions and 16 deletions
219
icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java
Normal file
219
icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java
Normal file
|
@ -0,0 +1,219 @@
|
|||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.text.NumberFormat;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class TestUtilities extends TestFmwk {
|
||||
static final int LIMIT = 0x15; // limit to make testing more realistic in terms of collisions
|
||||
static final int ITERATIONS = 1000000;
|
||||
static final boolean SHOW_PROGRESS = false;
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new TestUtilities().run(args);
|
||||
}
|
||||
|
||||
UnicodeMap map1 = new UnicodeMap();
|
||||
Map map2 = new HashMap();
|
||||
Map map3 = new TreeMap();
|
||||
UnicodeMap.Equator equator = new UnicodeMap.SimpleEquator();
|
||||
SortedSet log = new TreeSet();
|
||||
|
||||
public void TestUnicodeMap() {
|
||||
Random random = new Random(12345);
|
||||
String[] values = {null, "A", "B", "C", "D", "E", "F"};
|
||||
// do random change to both, then compare
|
||||
logln("Comparing against HashMap");
|
||||
for (int counter = 0; counter < ITERATIONS; ++counter) {
|
||||
int start = random.nextInt(LIMIT);
|
||||
String value = values[random.nextInt(values.length)];
|
||||
String logline = Utility.hex(start) + "\t" + value;
|
||||
if (SHOW_PROGRESS) logln(counter + "\t" + logline);
|
||||
log.add(logline);
|
||||
if (DEBUG && counter == 144) {
|
||||
System.out.println(" debug");
|
||||
}
|
||||
map1.put(start, value);
|
||||
map2.put(new Integer(start), value);
|
||||
check(counter);
|
||||
}
|
||||
logln("Setting General Category");
|
||||
map1 = new UnicodeMap();
|
||||
map2 = new HashMap();
|
||||
for (int cp = 0; cp < SET_LIMIT; ++cp) {
|
||||
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
|
||||
//if (enumValue <= 0) continue; // for smaller set
|
||||
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
map1.put(cp, value);
|
||||
map2.put(new Integer(cp), value);
|
||||
}
|
||||
logln("Comparing General Category");
|
||||
check(-1);
|
||||
logln("Comparing Values");
|
||||
Set values1 = (Set) map1.getAvailableValues(new TreeSet());
|
||||
Set values2 = new TreeSet(map2.values());
|
||||
Set temp;
|
||||
if (!values1.equals(values2)) {
|
||||
errln("Values differ:");
|
||||
errln("UnicodeMap - HashMap");
|
||||
temp = new TreeSet(values1);
|
||||
temp.removeAll(values2);
|
||||
errln(show(temp));
|
||||
errln("HashMap - UnicodeMap");
|
||||
temp = new TreeSet(values2);
|
||||
temp.removeAll(values1);
|
||||
errln(show(temp));
|
||||
} else {
|
||||
logln("Comparing Sets");
|
||||
for (Iterator it = values1.iterator(); it.hasNext();) {
|
||||
Object value = it.next();
|
||||
logln(value == null ? "null" : value.toString());
|
||||
UnicodeSet set1 = map1.getSet(value);
|
||||
UnicodeSet set2 = getSet(map2, value);
|
||||
if (!set1.equals(set2)) {
|
||||
errln("Sets differ:");
|
||||
errln("UnicodeMap - HashMap");
|
||||
errln(new UnicodeSet(set1).removeAll(set2).toPattern(true));
|
||||
errln("HashMap - UnicodeMap");
|
||||
errln(new UnicodeSet(set2).removeAll(set1).toPattern(true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void check(int counter) {
|
||||
for (int i = 0; i < LIMIT; ++i) {
|
||||
Object value1 = map1.getValue(i);
|
||||
Object value2 = map2.get(new Integer(i));
|
||||
if (!equator.isEqual(value1, value2)) {
|
||||
errln(counter + " Difference at " + Utility.hex(i)
|
||||
+ "\t UnicodeMap: " + value1
|
||||
+ "\t HashMap: " + value2);
|
||||
errln("UnicodeMap: " + map1);
|
||||
errln("Log: " + show(log));
|
||||
errln("HashMap: " + show(map2));
|
||||
throw new IllegalArgumentException("Halting");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static final int SET_LIMIT = 0x10FFFF;
|
||||
static final int CHECK_LIMIT = 0xFFFF;
|
||||
static final NumberFormat pf = NumberFormat.getPercentInstance();
|
||||
static final NumberFormat nf = NumberFormat.getInstance();
|
||||
|
||||
public void TestTime() {
|
||||
double hashTime, umTime, icuTime, treeTime;
|
||||
umTime = checkSetTime(20, 0);
|
||||
hashTime = checkSetTime(20, 1);
|
||||
logln("Percentage: " + pf.format(hashTime/umTime));
|
||||
treeTime = checkSetTime(20, 3);
|
||||
logln("Percentage: " + pf.format(treeTime/umTime));
|
||||
//logln(map1.toString());
|
||||
|
||||
umTime = checkGetTime(1000, 0);
|
||||
hashTime = checkGetTime(1000, 1);
|
||||
logln("Percentage: " + pf.format(hashTime/umTime));
|
||||
icuTime = checkGetTime(1000, 2);
|
||||
logln("Percentage: " + pf.format(icuTime/umTime));
|
||||
treeTime = checkGetTime(1000, 3);
|
||||
logln("Percentage: " + pf.format(treeTime/umTime));
|
||||
}
|
||||
|
||||
int propEnum = UProperty.GENERAL_CATEGORY;
|
||||
|
||||
double checkSetTime(int iterations, int type) {
|
||||
_checkSetTime(1,type);
|
||||
double result = _checkSetTime(iterations, type);
|
||||
logln((type == 0 ? "UnicodeMap" : type == 1 ? "HashMap" : type == 2 ? "ICU" : "TreeMap") + "\t" + nf.format(result));
|
||||
return result;
|
||||
}
|
||||
double _checkSetTime(int iterations, int type) {
|
||||
map1 = new UnicodeMap();
|
||||
map2 = new HashMap();
|
||||
System.gc();
|
||||
double start = System.currentTimeMillis();
|
||||
for (int j = 0; j < iterations; ++j)
|
||||
for (int cp = 0; cp < SET_LIMIT; ++cp) {
|
||||
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
|
||||
if (enumValue <= 0) continue; // for smaller set
|
||||
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
switch(type) {
|
||||
case 0: map1.put(cp, value); break;
|
||||
case 1: map2.put(new Integer(cp), value); break;
|
||||
case 3: map3.put(new Integer(cp), value); break;
|
||||
}
|
||||
}
|
||||
double end = System.currentTimeMillis();
|
||||
return (end-start)/1000/iterations;
|
||||
}
|
||||
|
||||
double checkGetTime(int iterations, int type) {
|
||||
_checkGetTime(1,type);
|
||||
double result = _checkGetTime(iterations, type);
|
||||
logln((type == 0 ? "UnicodeMap" : type == 1 ? "HashMap" : type == 2 ? "ICU" : "TreeMap") + "\t" + nf.format(result));
|
||||
return result;
|
||||
}
|
||||
double _checkGetTime(int iterations, int type) {
|
||||
System.gc();
|
||||
double start = System.currentTimeMillis();
|
||||
for (int j = 0; j < iterations; ++j)
|
||||
for (int cp = 0; cp < CHECK_LIMIT; ++cp) {
|
||||
switch (type) {
|
||||
case 0: map1.getValue(cp); break;
|
||||
case 1: map2.get(new Integer(cp)); break;
|
||||
case 2:
|
||||
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
|
||||
//if (enumValue <= 0) continue;
|
||||
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
break;
|
||||
case 3: map3.get(new Integer(cp)); break;
|
||||
}
|
||||
}
|
||||
double end = System.currentTimeMillis();
|
||||
return (end-start)/1000/iterations;
|
||||
}
|
||||
|
||||
String show(Collection c) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (Iterator it = c.iterator(); it.hasNext();) {
|
||||
buffer.append(it.next() + "\r\n");
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
String show(Map m) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
|
||||
Object key = it.next();
|
||||
buffer.append(key + "=>" + m.get(key) + "\r\n");
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
UnicodeSet getSet(Map m, Object value) {
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
|
||||
Object key = it.next();
|
||||
Object val = m.get(key);
|
||||
if (!val.equals(value)) continue;
|
||||
result.add(((Integer)key).intValue());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -79,7 +79,7 @@ public final class UnicodeMap {
|
|||
* @param codepoint
|
||||
* @return
|
||||
*/
|
||||
private int findIndex(int c) {
|
||||
private int _findIndex(int c) {
|
||||
int lo = 0;
|
||||
int hi = length - 1;
|
||||
int i = (lo + hi) >>> 1;
|
||||
|
@ -98,7 +98,7 @@ public final class UnicodeMap {
|
|||
}
|
||||
|
||||
private void _checkFind(int codepoint, int value) {
|
||||
int other = _findIndex(codepoint);
|
||||
int other = __findIndex(codepoint);
|
||||
if (other != value) {
|
||||
throw new IllegalArgumentException("Invariant failed: binary search"
|
||||
+ "\t" + Utility.hex(codepoint) + ": " + value
|
||||
|
@ -106,7 +106,7 @@ public final class UnicodeMap {
|
|||
}
|
||||
}
|
||||
|
||||
private int _findIndex(int codepoint) {
|
||||
private int __findIndex(int codepoint) {
|
||||
// TODO use binary search
|
||||
for (int i = length-1; i > 0; --i) {
|
||||
if (transitions[i] <= codepoint) return i;
|
||||
|
@ -140,7 +140,7 @@ public final class UnicodeMap {
|
|||
* @param index
|
||||
* @param count
|
||||
*/
|
||||
private void removeAt(int index, int count) {
|
||||
private void _removeAt(int index, int count) {
|
||||
for (int i = index + count; i < length; ++i) {
|
||||
transitions[i-count] = transitions[i];
|
||||
values[i-count] = values[i];
|
||||
|
@ -154,7 +154,7 @@ public final class UnicodeMap {
|
|||
* @param index
|
||||
* @param count
|
||||
*/
|
||||
private void insertGapAt(int index, int count) {
|
||||
private void _insertGapAt(int index, int count) {
|
||||
int newLength = length + count;
|
||||
int[] oldtransitions = transitions;
|
||||
Object[] oldvalues = values;
|
||||
|
@ -181,7 +181,7 @@ public final class UnicodeMap {
|
|||
* @return this, for chaining
|
||||
*/
|
||||
private UnicodeMap _put(int codepoint, Object value) {
|
||||
int baseIndex = findIndex(codepoint);
|
||||
int baseIndex = _findIndex(codepoint);
|
||||
int limitIndex = baseIndex + 1;
|
||||
// cases are (a) value is already set
|
||||
if (equator.isEqual(values[baseIndex], value)) return this;
|
||||
|
@ -199,13 +199,13 @@ public final class UnicodeMap {
|
|||
// A1a connects with previous & following, so remove index
|
||||
if (connectsWithPrevious) {
|
||||
if (connectsWithFollowing) {
|
||||
removeAt(baseIndex, 2);
|
||||
_removeAt(baseIndex, 2);
|
||||
return this;
|
||||
}
|
||||
removeAt(baseIndex, 1); // extend previous
|
||||
_removeAt(baseIndex, 1); // extend previous
|
||||
return this;
|
||||
} else if (connectsWithFollowing) {
|
||||
removeAt(baseIndex, 1); // extend following backwards
|
||||
_removeAt(baseIndex, 1); // extend following backwards
|
||||
transitions[baseIndex] = codepoint;
|
||||
return this;
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ public final class UnicodeMap {
|
|||
} else {
|
||||
// otherwise insert new transition
|
||||
transitions[baseIndex] = codepoint+1; // fix following range
|
||||
insertGapAt(baseIndex, 1);
|
||||
_insertGapAt(baseIndex, 1);
|
||||
values[baseIndex] = value;
|
||||
transitions[baseIndex] = codepoint;
|
||||
}
|
||||
|
@ -236,14 +236,14 @@ public final class UnicodeMap {
|
|||
--transitions[limitIndex];
|
||||
return this;
|
||||
} else {
|
||||
insertGapAt(limitIndex, 1);
|
||||
_insertGapAt(limitIndex, 1);
|
||||
transitions[limitIndex] = codepoint;
|
||||
values[limitIndex] = value;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
// CASE: in middle of range
|
||||
insertGapAt(++baseIndex,2);
|
||||
_insertGapAt(++baseIndex,2);
|
||||
transitions[baseIndex] = codepoint;
|
||||
values[baseIndex] = value;
|
||||
transitions[++baseIndex] = codepoint + 1;
|
||||
|
@ -330,11 +330,14 @@ public final class UnicodeMap {
|
|||
*/
|
||||
public UnicodeSet getSet(Object value, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (values[i] == value) result.add(transitions[i], transitions[i+1]);
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
if (values[i] == value) result.add(transitions[i], transitions[i+1]-1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
public UnicodeSet getSet(Object value) {
|
||||
return getSet(value,null);
|
||||
}
|
||||
/**
|
||||
* Returns the list of possible values. Deposits into
|
||||
* result if it is not null. Remember to clear if you just want
|
||||
|
@ -343,7 +346,7 @@ public final class UnicodeMap {
|
|||
*/
|
||||
public Collection getAvailableValues(Collection result) {
|
||||
if (result == null) result = new HashSet();
|
||||
for (int i = 0; i < length; ++i) {
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
Object value = values[i];
|
||||
if (value == null) continue;
|
||||
if (result.contains(value)) continue;
|
||||
|
@ -361,7 +364,7 @@ public final class UnicodeMap {
|
|||
if (codepoint < 0 || codepoint > 0x10FFFF) {
|
||||
throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
|
||||
}
|
||||
return values[findIndex(codepoint)];
|
||||
return values[_findIndex(codepoint)];
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
|
@ -479,6 +479,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
Iterator it2 = getValueAliases(value,temp).iterator();
|
||||
while (it2.hasNext()) {
|
||||
String value2 = (String)it2.next();
|
||||
System.out.println("Values:" + value2);
|
||||
if (matcher.matches(value2)
|
||||
|| matcher.matches(toSkeleton(value2))) {
|
||||
cacheValueToSet.getSet(value, result);
|
||||
|
|
Loading…
Add table
Reference in a new issue