mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
quick and dirty collation test
X-SVN-Rev: 9615
This commit is contained in:
parent
9a1d641471
commit
ad9daf070c
2 changed files with 354 additions and 2 deletions
351
tools/unicodetools/com/ibm/text/UCD/CheckCollator.java
Normal file
351
tools/unicodetools/com/ibm/text/UCD/CheckCollator.java
Normal file
|
@ -0,0 +1,351 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CheckCollator.java,v $
|
||||
* $Date: 2002/08/08 15:35:01 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
// http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
import java.text.NumberFormat;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* This is a quick and dirty program to get some idea of collation performance, comparing old Java to new stuff.
|
||||
*/
|
||||
abstract public class CheckCollator {
|
||||
static final String PREFIX = "C:\\ICUInternal\\icu4c\\collation-perf-data\\TestNames_";
|
||||
static final boolean DO_RAW = false;
|
||||
|
||||
static final NumberFormat nf = NumberFormat.getInstance();
|
||||
static final NumberFormat percent = NumberFormat.getPercentInstance();
|
||||
static {
|
||||
nf.setMaximumFractionDigits(2);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
|
||||
// later, drive off of args
|
||||
|
||||
// choices are: Asian, Chinese, Japanese, Japanese_h, Japanese_k, Korean, Latin, Russian, Thai
|
||||
test(Locale.KOREAN, "Korean");
|
||||
test(Locale.ENGLISH, "Latin");
|
||||
test(Locale.FRENCH, "Latin");
|
||||
test(Locale.JAPANESE, "Japanese");
|
||||
}
|
||||
|
||||
public static void test(Locale loc, String name) throws IOException {
|
||||
|
||||
System.out.println();
|
||||
System.out.println("Testing " + loc.getDisplayName() + ", file: " + name);
|
||||
System.out.println();
|
||||
|
||||
// get test data
|
||||
|
||||
String fileName = PREFIX + name + ".txt";
|
||||
|
||||
FileInputStream fis = new FileInputStream(fileName);
|
||||
InputStreamReader isr = new InputStreamReader(fis, "UnicodeLittle");
|
||||
BufferedReader br = new BufferedReader(isr, 32*1024);
|
||||
|
||||
int counter = 0;
|
||||
|
||||
ArrayList list = new ArrayList();
|
||||
while (true) {
|
||||
String line = Utility.readDataLine(br);
|
||||
if (line == null) break;
|
||||
if (line.length() == 0) continue;
|
||||
Utility.dot(counter++);
|
||||
list.add(line);
|
||||
}
|
||||
System.out.println("Read " + counter + " lines in file");
|
||||
|
||||
int limit = 800; // put a limit on it to save time
|
||||
|
||||
// pump it up if there aren't very many
|
||||
while (list.size() < limit) {
|
||||
list.addAll(list);
|
||||
}
|
||||
|
||||
int size = list.size();
|
||||
|
||||
|
||||
// later, adjust these so we always get a reasonble number of tries
|
||||
|
||||
int extraIterations = 200;
|
||||
if (size > limit) size = limit;
|
||||
|
||||
String[] tests = new String [size];
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
tests[i] = (String) list.get(i);
|
||||
}
|
||||
|
||||
// get collators
|
||||
|
||||
com.ibm.icu.text.Collator newCol = com.ibm.icu.text.Collator.getInstance(loc);
|
||||
java.text.Collator oldCol = java.text.Collator.getInstance(loc);
|
||||
|
||||
|
||||
double startTime, endTime;
|
||||
double delta, oldDelta;
|
||||
String probe;
|
||||
|
||||
|
||||
// load classes at least once before starting
|
||||
|
||||
newCol.compare("a", "b");
|
||||
oldCol.compare("a", "b");
|
||||
|
||||
// ================================================
|
||||
// check sort key size
|
||||
|
||||
int stringSize = 0, newSize = 0, oldSize = 0;
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
stringSize += tests[i].length() * 2;
|
||||
byte[] newKey = newCol.getCollationKey(tests[i]).toByteArray();
|
||||
newSize += newKey.length;
|
||||
byte[] oldKey = oldCol.getCollationKey(tests[i]).toByteArray();
|
||||
oldSize += oldKey.length;
|
||||
}
|
||||
delta = stringSize/(size + 0.0);
|
||||
System.out.println("string size: " + nf.format(delta) + " bytes per key");
|
||||
System.out.println();
|
||||
|
||||
delta = oldDelta = (oldSize/(size + 0.0));
|
||||
System.out.println("old sortkey size: " + nf.format(delta) + " bytes per key ");
|
||||
delta = (newSize/(size + 0.0));
|
||||
System.out.println("new sortkey size: " + nf.format(delta) + " bytes per key " + percent.format(delta/oldDelta));
|
||||
System.out.println();
|
||||
|
||||
// ================================================
|
||||
// Sort Key: old time
|
||||
|
||||
// get overhead time
|
||||
counter = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int j = 0; j < size; ++j) {
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
double overhead = (1000*(endTime - startTime) / counter);
|
||||
System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");
|
||||
|
||||
counter = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
oldCol.getCollationKey(probe);
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
|
||||
System.out.println("Old sort key time: " + nf.format(delta)
|
||||
+ " micros (" + counter + " iterations)");
|
||||
|
||||
// Sort Key: new time
|
||||
|
||||
counter = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
newCol.getCollationKey(probe);
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
delta = (1000*(endTime - startTime) / counter) - overhead;
|
||||
System.out.println("New sort key time: " + nf.format(delta)
|
||||
+ " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
|
||||
System.out.println();
|
||||
|
||||
// ================================================
|
||||
// Raw Compare
|
||||
|
||||
if (DO_RAW) {
|
||||
// get overhead time
|
||||
counter = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
int opt = 0; // to keep the compiler from optimizing out
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int j = 0; j < size; ++j) {
|
||||
opt ^= probe.compareTo(tests[j]);
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
overhead = (1000*(endTime - startTime) / counter);
|
||||
System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");
|
||||
|
||||
// Raw Compare: old time
|
||||
|
||||
counter = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int j = 0; j < size; ++j) {
|
||||
opt ^= oldCol.compare(probe, tests[j]);
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
|
||||
System.out.println("Old raw compare time: " + nf.format(delta)
|
||||
+ " micros (" + counter + " iterations)");
|
||||
|
||||
// Raw Compare: new time
|
||||
|
||||
counter = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int j = 0; j < size; ++j) {
|
||||
opt ^= newCol.compare(probe, tests[j]);
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
delta = (1000*(endTime - startTime) / counter) - overhead;
|
||||
System.out.println("New raw compare time: " + nf.format(delta)
|
||||
+ " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
// ================================================
|
||||
// Binary Search
|
||||
// note: I don't worry about getting the binary search precisely right, since I just want to
|
||||
// see which strings would get compared.
|
||||
|
||||
// overhead
|
||||
|
||||
int iterations = (size * extraIterations);
|
||||
startTime = System.currentTimeMillis();
|
||||
Arrays.sort(tests);
|
||||
int opt2 = 0; // keep from optimizing out
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
opt2 ^= Arrays.binarySearch(tests, probe);
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
overhead = delta = (1000*(endTime - startTime) / iterations);
|
||||
System.out.println("Overhead: " + nf.format(delta)
|
||||
+ " micros (" + iterations + " iterations)");
|
||||
|
||||
// old time
|
||||
|
||||
startTime = System.currentTimeMillis();
|
||||
Arrays.sort(tests, oldCol);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
opt2 ^= Arrays.binarySearch(tests, probe, oldCol);
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||||
System.out.println("Old binary search time: " + nf.format(delta)
|
||||
+ " micros (" + iterations + " iterations)");
|
||||
|
||||
|
||||
// new time
|
||||
|
||||
Arrays.sort(tests, newCol);
|
||||
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
probe = tests[i];
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
opt2 ^= Arrays.binarySearch(tests, probe, newCol);
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||||
System.out.println("New binary search time: " + nf.format(delta)
|
||||
+ " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
|
||||
System.out.println();
|
||||
|
||||
// ================================================
|
||||
// Sort
|
||||
|
||||
String[] sortTests = (String[]) tests.clone();
|
||||
extraIterations = 5;
|
||||
iterations = (size * extraIterations);
|
||||
|
||||
// overhead
|
||||
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
|
||||
Arrays.sort(sortTests);
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
overhead = delta = (1000*(endTime - startTime) / iterations);
|
||||
System.out.println("overhead: " + nf.format(delta)
|
||||
+ " micros (" + iterations + " iterations)");
|
||||
|
||||
// old time
|
||||
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
|
||||
Arrays.sort(sortTests, oldCol);
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||||
System.out.println("Old sort time: " + nf.format(delta)
|
||||
+ " micros (" + iterations + " iterations)");
|
||||
|
||||
// new time
|
||||
|
||||
startTime = System.currentTimeMillis();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int k = 0; k < extraIterations; ++k) {
|
||||
System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
|
||||
Arrays.sort(sortTests, newCol);
|
||||
}
|
||||
}
|
||||
endTime = System.currentTimeMillis();
|
||||
delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||||
System.out.println("New sort time: " + nf.format(delta)
|
||||
+ " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
|
||||
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2002/08/04 21:38:45 $
|
||||
* $Revision: 1.20 $
|
||||
* $Date: 2002/08/08 15:35:01 $
|
||||
* $Revision: 1.21 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -79,6 +79,7 @@ public final class Main implements UCD_Types {
|
|||
|
||||
|
||||
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
|
||||
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
|
||||
|
||||
else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
|
||||
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
|
||||
|
|
Loading…
Add table
Reference in a new issue