mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
parent
eed6388dab
commit
4fff0085e5
8 changed files with 148 additions and 58 deletions
|
@ -4,7 +4,81 @@ Copyright (C) 2012, International Business Machines Corporation and others. All
|
|||
|
||||
README for ICU4J Performance Test
|
||||
|
||||
Please note that to run the performance test for ICU4J as a part of continuous build, you will need to setup Perl with the following modules:
|
||||
a) Statistics/Distribution.pm
|
||||
b) Statistics/Descriptive.pm
|
||||
c) XML/LibXML.pm
|
||||
This directory includes a number of performance tests. Most are
|
||||
comparing ICU operations with built in Java functions. Many tests run
|
||||
numerous iterations with a variety of locales.
|
||||
|
||||
Several tests create .html output files that can be opened and viewed in a web browser.
|
||||
|
||||
The collation test produces output in the terminal window. Some are executed
|
||||
individually via command line and others run via an `ant` command.
|
||||
|
||||
Note: Tests with "_r" in the name are obsolete tests that compared
|
||||
versions of ICU4J with each other. These may be useful in the future,
|
||||
but require reworking to locate, compile, and run different versions.
|
||||
|
||||
|
||||
Note: To run the performance test for ICU4J as a part of continuous build, you will
|
||||
need to set up Perl with the following modules:
|
||||
a) Statistics/Distribution.pm
|
||||
b) Statistics/Descriptive.pm
|
||||
c) XML/LibXML.pm
|
||||
|
||||
CONTINUOUS BUILD:
|
||||
To run a set of performance tests defined in file perlftests.pl, use this command:
|
||||
ant continuous-build
|
||||
|
||||
Output is created in perf.xml. This output contains results comparing ICU and JDK for the
|
||||
following operations
|
||||
DateFmt-open
|
||||
DateFmt-parse
|
||||
NumFmt-open
|
||||
NumFnt-parse
|
||||
Collation in several locales
|
||||
|
||||
COLLATION TESTS
|
||||
The collation tests run only on the command line with tabular output:
|
||||
perl collationperf.pl |& tee collation_output.txt
|
||||
|
||||
|
||||
OTHER COMMAND LINE TESTS
|
||||
Additional tests are run from the command line, each producing an HTML
|
||||
output file with with the name "perf" followed by a timestamp of when
|
||||
it was run. For example:
|
||||
|
||||
"perf Jul 22 141434.html"
|
||||
|
||||
Each result can be loaded for review in a browser.
|
||||
|
||||
SETUP:
|
||||
The environment variable PERL5LIB must be set as follows:
|
||||
export PERL5LIB=`pwd`
|
||||
|
||||
Then the command line is run for each as follows:
|
||||
perl dateformatperf.pl
|
||||
perl converterperf.pl
|
||||
perl decimalformat.pl
|
||||
perl normperf.pl
|
||||
perl ucharacterperf.pl
|
||||
perl unicodesetperf.pl
|
||||
|
||||
|
||||
converterperf compares ICU Decoder and ICU Encoder with JDK versions for timing.
|
||||
|
||||
decimalformatperf compares JDK with ICU in contruction, parsing, and
|
||||
formatting in en_US and de_DE locales.
|
||||
|
||||
normperf tests various normalization methods in both JKD and ICU usign
|
||||
a variety of locales
|
||||
|
||||
ucharacterperf compares JDK with ICU for character handling with
|
||||
digits, numeric values, types of characters, casing, and other
|
||||
attributes
|
||||
|
||||
unicodesetperf compares UnicodeSet with HashSet with the following:
|
||||
UnicodeSetAdd
|
||||
HashSetAdd
|
||||
UnicodSetContains
|
||||
HashSetContains
|
||||
UnicodeSetIterate
|
||||
HashSetIterate
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
use strict;
|
||||
|
||||
# Assume we are running within the icu4j root directory
|
||||
# Assume we are running within the icu4j/perf-tests root directory
|
||||
use lib 'src/com/ibm/icu/dev/test/perf';
|
||||
use Dataset;
|
||||
|
||||
|
@ -16,8 +16,11 @@ use Dataset;
|
|||
# Test class
|
||||
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.ConverterPerformanceTest';
|
||||
|
||||
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar:../icu4j-charset.jar';
|
||||
|
||||
# Methods to be tested. Each pair represents a test method and
|
||||
# a baseline method which is used for comparison.
|
||||
# Some tests do not compile at this time.
|
||||
my @METHODS = (
|
||||
## ['TestByteToCharConverter', 'TestByteToCharConverterICU'],
|
||||
## ['TestCharToByteConverter', 'TestCharToByteConverterICU'],
|
||||
|
@ -27,33 +30,34 @@ my @METHODS = (
|
|||
|
||||
# Patterns which define the set of characters used for testing.
|
||||
|
||||
my $SOURCEDIR ="src/com/ibm/icu/dev/test/perf/data/conversion/";
|
||||
my $SOURCEDIR ="./data/conversion/";
|
||||
|
||||
# Note that some tests are unavailable
|
||||
my @OPTIONS = (
|
||||
# src text src encoding test encoding
|
||||
[ "arabic.txt", "UTF-8", "csisolatinarabic"],
|
||||
[ "french.txt", "UTF-8", "csisolatin1"],
|
||||
[ "greek.txt", "UTF-8", "csisolatingreek"],
|
||||
[ "hebrew.txt", "UTF-8", "csisolatinhebrew"],
|
||||
# [ "hindi.txt" , "UTF-8", "iscii"],
|
||||
[ "japanese.txt", "UTF-8", "EUC-JP"],
|
||||
# [ "japanese.txt", "UTF-8", "csiso2022jp"],
|
||||
[ "japanese.txt", "UTF-8", "shift_jis"],
|
||||
# [ "korean.txt", "UTF-8", "csiso2022kr"],
|
||||
[ "korean.txt", "UTF-8", "EUC-KR"],
|
||||
[ "s-chinese.txt", "UTF-8", "EUC_CN"],
|
||||
[ "arabic.txt", "UTF-8", "UTF-8"],
|
||||
[ "french.txt", "UTF-8", "UTF-8"],
|
||||
[ "greek.txt", "UTF-8", "UTF-8"],
|
||||
[ "hebrew.txt", "UTF-8", "UTF-8"],
|
||||
[ "hindi.txt" , "UTF-8", "UTF-8"],
|
||||
[ "japanese.txt", "UTF-8", "UTF-8"],
|
||||
[ "korean.txt", "UTF-8", "UTF-8"],
|
||||
[ "s-chinese.txt", "UTF-8", "UTF-8"],
|
||||
[ "french.txt", "UTF-8", "UTF-16BE"],
|
||||
[ "french.txt", "UTF-8", "UTF-16LE"],
|
||||
[ "english.txt", "UTF-8", "US-ASCII"],
|
||||
);
|
||||
# src text src encoding test encoding
|
||||
[ "arabic.txt", "UTF-8", "csisolatinarabic"],
|
||||
[ "french.txt", "UTF-8", "csisolatin1"],
|
||||
[ "greek.txt", "UTF-8", "csisolatingreek"],
|
||||
[ "hebrew.txt", "UTF-8", "csisolatinhebrew"],
|
||||
# [ "hindi.txt" , "UTF-8", "iscii"],
|
||||
[ "japanese.txt", "UTF-8", "EUC-JP"],
|
||||
[ "japanese.txt", "UTF-8", "csiso2022jp"],
|
||||
# [ "japanese.txt", "UTF-8", "shift_jis"],
|
||||
[ "korean.txt", "UTF-8", "csiso2022kr"],
|
||||
# [ "korean.txt", "UTF-8", "EUC-KR"],
|
||||
[ "s-chinese.txt", "UTF-8", "EUC_CN"],
|
||||
[ "arabic.txt", "UTF-8", "UTF-8"],
|
||||
[ "french.txt", "UTF-8", "UTF-8"],
|
||||
[ "greek.txt", "UTF-8", "UTF-8"],
|
||||
[ "hebrew.txt", "UTF-8", "UTF-8"],
|
||||
[ "hindi.txt" , "UTF-8", "UTF-8"],
|
||||
[ "japanese.txt", "UTF-8", "UTF-8"],
|
||||
[ "korean.txt", "UTF-8", "UTF-8"],
|
||||
[ "s-chinese.txt", "UTF-8", "UTF-8"],
|
||||
[ "french.txt", "UTF-8", "UTF-16BE"],
|
||||
[ "french.txt", "UTF-8", "UTF-16LE"],
|
||||
[ "english.txt", "UTF-8", "US-ASCII"],
|
||||
);
|
||||
|
||||
my $CALIBRATE = 2; # duration in seconds for initial calibration
|
||||
my $DURATION = 10; # duration in seconds for each pass
|
||||
|
@ -108,7 +112,7 @@ EOF
|
|||
|
||||
print HTML "<P><TABLE $TABLEATTR><TR><TD>\n";
|
||||
print HTML "<P><B>$testMethod vs. $baselineMethod</B></P>\n";
|
||||
|
||||
|
||||
print HTML "<P><TABLE $TABLEATTR BGCOLOR=\"#CCFFFF\">\n";
|
||||
print HTML "<TR><TD>Options</TD><TD>$testMethod</TD>";
|
||||
print HTML "<TD>$baselineMethod</TD><TD>Ratio</TD></TR>\n";
|
||||
|
@ -213,17 +217,19 @@ sub measure2 {
|
|||
sub measure1 {
|
||||
my $method = shift;
|
||||
my $pat = shift;
|
||||
my $iterCount = shift; # actually might be -seconds/pass
|
||||
my $param3 = shift; # Either -seconds/pass or iteration count
|
||||
|
||||
my $iterCount = 0; # Set later based on param3.
|
||||
|
||||
out("<P>Measuring $method for input file @$pat[0] for encoding @$pat[2] , ");
|
||||
if ($iterCount > 0) {
|
||||
if ($param3 > 0) {
|
||||
$iterCount = $param3;
|
||||
out("$iterCount iterations/pass, $NUMPASSES passes</P>\n");
|
||||
} else {
|
||||
out(-$iterCount, " seconds/pass, $NUMPASSES passes</P>\n");
|
||||
}
|
||||
my $timePerPass = -$param3;
|
||||
out(-$timePerPass, " seconds/pass, $NUMPASSES passes</P>\n");
|
||||
|
||||
# is $iterCount actually -seconds/pass?
|
||||
if ($iterCount < 0) {
|
||||
# Value given was -seconds/pass
|
||||
|
||||
# calibrate: estimate ms/iteration
|
||||
print "Calibrating...";
|
||||
|
@ -234,16 +240,16 @@ sub measure1 {
|
|||
$data[0] *= 1.0e+3;
|
||||
|
||||
my $timePerIter = 1.0e-3 * $data[0] / $data[1];
|
||||
|
||||
# determine iterations/pass
|
||||
$iterCount = int(-$iterCount / $timePerIter + 0.5);
|
||||
|
||||
|
||||
# determine iterations/pass from timePerPass and timePerIteration
|
||||
$iterCount = int($timePerPass / $timePerIter + 0.5);
|
||||
|
||||
out("<P>Calibration pass ($CALIBRATE sec): ");
|
||||
out("$data[0] ms, ");
|
||||
out("$data[1] iterations = ");
|
||||
out(formatSeconds(4, $timePerIter), "/iteration<BR>\n");
|
||||
}
|
||||
|
||||
|
||||
# run passes
|
||||
print "Measuring $iterCount iterations x $NUMPASSES passes...";
|
||||
my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
|
||||
|
@ -291,11 +297,11 @@ sub callJava {
|
|||
my $pat = shift;
|
||||
my $n = shift;
|
||||
my $passes = shift;
|
||||
|
||||
|
||||
my $fileName = $SOURCEDIR.@$pat[0] ;
|
||||
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
|
||||
|
||||
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -T @$pat[2]";
|
||||
|
||||
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -T @$pat[2]";
|
||||
print "[$cmd]\n"; # for debugging
|
||||
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
|
||||
my @out;
|
||||
|
@ -406,7 +412,7 @@ sub formatNumber {
|
|||
my $mult = shift;
|
||||
my $a = shift;
|
||||
my $delta = shift; # may be undef
|
||||
|
||||
|
||||
my $result = formatSigDig($sigdig, $a*$mult);
|
||||
if (defined($delta)) {
|
||||
my $d = formatSigDig($sigdig, $delta*$mult);
|
||||
|
@ -441,13 +447,13 @@ sub formatSeconds {
|
|||
my $a = shift;
|
||||
my $delta = shift; # may be undef
|
||||
|
||||
my @MULT = (1 , 1e3, 1e6, 1e9);
|
||||
my @SUFF = ('s' , 'ms', 'us', 'ns');
|
||||
my @MULT = (1 , 1e3, 1e6, 1e9, 1e12);
|
||||
my @SUFF = ('s' , 'ms', 'us', 'ns', 'ps');
|
||||
|
||||
# Determine our scale
|
||||
my $i = 0;
|
||||
++$i while ($a*$MULT[$i] < 1 && $i < @MULT);
|
||||
|
||||
|
||||
formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i];
|
||||
}
|
||||
|
||||
|
@ -465,7 +471,7 @@ sub formatPercent {
|
|||
my $sigdig = shift;
|
||||
my $a = shift;
|
||||
my $delta = shift; # may be undef
|
||||
|
||||
|
||||
formatNumber($sigdig, 100, $a, $delta) . ' %';
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@ use Dataset;
|
|||
# Test class
|
||||
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.DateFormatPerformanceTest';
|
||||
|
||||
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
|
||||
|
||||
# Methods to be tested. Each pair represents a test method and
|
||||
# a baseline method which is used for comparison.
|
||||
my @METHODS = (
|
||||
|
@ -281,7 +283,7 @@ sub callJava {
|
|||
|
||||
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
|
||||
|
||||
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
|
||||
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
|
||||
print "[$cmd]\n"; # for debugging
|
||||
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
|
||||
my @out;
|
||||
|
|
|
@ -16,6 +16,8 @@ use Dataset;
|
|||
# Test class
|
||||
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.DecimalFormatPerformanceTest';
|
||||
|
||||
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
|
||||
|
||||
# Methods to be tested. Each pair represents a test method and
|
||||
# a baseline method which is used for comparison.
|
||||
my @METHODS = (
|
||||
|
@ -276,7 +278,7 @@ sub callJava {
|
|||
|
||||
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
|
||||
|
||||
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
|
||||
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
|
||||
print "[$cmd]\n"; # for debugging
|
||||
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
|
||||
my @out;
|
||||
|
|
|
@ -16,6 +16,8 @@ use Dataset;
|
|||
# Test class
|
||||
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.NormalizerPerformanceTest';
|
||||
|
||||
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
|
||||
|
||||
# Methods to be tested. Each pair represents a test method and
|
||||
# a baseline method which is used for comparison.
|
||||
my @METHODS = (
|
||||
|
@ -30,7 +32,7 @@ my @METHODS = (
|
|||
|
||||
# Patterns which define the set of characters used for testing.
|
||||
|
||||
my $SOURCEDIR ="src/com/ibm/icu/dev/test/perf/data/collation/";
|
||||
my $SOURCEDIR ="data/collation/";
|
||||
|
||||
my @OPTIONS = (
|
||||
# src text src encoding mode
|
||||
|
@ -88,7 +90,7 @@ my @OPTIONS = (
|
|||
[ "TestNames_SerbianSH.txt", "UTF-8", "l"],
|
||||
[ "TestNames_SerbianSR.txt", "UTF-8", "l"],
|
||||
[ "TestNames_Thai.txt", "UTF-8", "l"],
|
||||
[ "Testnames_Russian.txt", "UTF-8", "l"],
|
||||
[ "TestNames_Russian.txt", "UTF-8", "l"],
|
||||
);
|
||||
|
||||
my $CALIBRATE = 2; # duration in seconds for initial calibration
|
||||
|
@ -331,7 +333,7 @@ sub callJava {
|
|||
my $fileName = $SOURCEDIR . @$pat[0] ;
|
||||
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
|
||||
|
||||
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -@$pat[2]";
|
||||
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -@$pat[2]";
|
||||
print "[$cmd]\n"; # for debugging
|
||||
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
|
||||
my @out;
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
use XML::LibXML;
|
||||
|
||||
# Assume we are running within the icu4j root directory
|
||||
use lib 'src/com/ibm/icu/dev/test/perf';
|
||||
use lib '.';
|
||||
use Dataset;
|
||||
my $OS=$^O;
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@ use Dataset;
|
|||
# Test class
|
||||
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UCharacterPerf';
|
||||
|
||||
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
|
||||
|
||||
# Methods to be tested. Each pair represents a test method and
|
||||
# a baseline method which is used for comparison.
|
||||
my @METHODS = (['JDKDigit', 'Digit'],
|
||||
|
@ -278,7 +280,7 @@ sub callJava {
|
|||
|
||||
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
|
||||
|
||||
my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat";
|
||||
my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat";
|
||||
print "[$cmd]\n"; # for debugging
|
||||
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
|
||||
my @out;
|
||||
|
|
|
@ -16,6 +16,8 @@ use Dataset;
|
|||
# Test class
|
||||
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UnicodeSetPerf';
|
||||
|
||||
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
|
||||
|
||||
# Methods to be tested. Each pair represents a test method and
|
||||
# a baseline method which is used for comparison.
|
||||
my @METHODS = (
|
||||
|
@ -268,7 +270,7 @@ sub callJava {
|
|||
|
||||
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
|
||||
|
||||
my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat";
|
||||
my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat";
|
||||
print "[$cmd]\n"; # for debugging
|
||||
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
|
||||
my @out;
|
||||
|
|
Loading…
Add table
Reference in a new issue