ICU-21638 re-enable some of the ICU4J performance tests

See #1773
This commit is contained in:
Craig Cornelius 2021-07-23 17:19:42 +00:00
parent eed6388dab
commit 4fff0085e5
8 changed files with 148 additions and 58 deletions

View file

@ -4,7 +4,81 @@ Copyright (C) 2012, International Business Machines Corporation and others. All
README for ICU4J Performance Test
Please note that to run the performance test for ICU4J as a part of continuous build, you will need to setup Perl with the following modules:
a) Statistics/Distribution.pm
b) Statistics/Descriptive.pm
c) XML/LibXML.pm
This directory includes a number of performance tests. Most are
comparing ICU operations with built in Java functions. Many tests run
numerous iterations with a variety of locales.
Several tests create .html output files that can be opened and viewed in a web browser.
The collation test produces output in the terminal window. Some are executed
individually via command line and others run via an `ant` command.
Note: Tests with "_r" in the name are obsolete tests that compared
versions of ICU4J with each other. These may be useful in the future,
but require reworking to locate, compile, and run different versions.
Note: To run the performance test for ICU4J as a part of continuous build, you will
need to set up Perl with the following modules:
a) Statistics/Distribution.pm
b) Statistics/Descriptive.pm
c) XML/LibXML.pm
CONTINUOUS BUILD:
To run a set of performance tests defined in file perlftests.pl, use this command:
ant continuous-build
Output is created in perf.xml. This output contains results comparing ICU and JDK for the
following operations
DateFmt-open
DateFmt-parse
NumFmt-open
NumFnt-parse
Collation in several locales
COLLATION TESTS
The collation tests run only on the command line with tabular output:
perl collationperf.pl |& tee collation_output.txt
OTHER COMMAND LINE TESTS
Additional tests are run from the command line, each producing an HTML
output file with with the name "perf" followed by a timestamp of when
it was run. For example:
"perf Jul 22 141434.html"
Each result can be loaded for review in a browser.
SETUP:
The environment variable PERL5LIB must be set as follows:
export PERL5LIB=`pwd`
Then the command line is run for each as follows:
perl dateformatperf.pl
perl converterperf.pl
perl decimalformat.pl
perl normperf.pl
perl ucharacterperf.pl
perl unicodesetperf.pl
converterperf compares ICU Decoder and ICU Encoder with JDK versions for timing.
decimalformatperf compares JDK with ICU in contruction, parsing, and
formatting in en_US and de_DE locales.
normperf tests various normalization methods in both JKD and ICU usign
a variety of locales
ucharacterperf compares JDK with ICU for character handling with
digits, numeric values, types of characters, casing, and other
attributes
unicodesetperf compares UnicodeSet with HashSet with the following:
UnicodeSetAdd
HashSetAdd
UnicodSetContains
HashSetContains
UnicodeSetIterate
HashSetIterate

View file

@ -8,7 +8,7 @@
use strict;
# Assume we are running within the icu4j root directory
# Assume we are running within the icu4j/perf-tests root directory
use lib 'src/com/ibm/icu/dev/test/perf';
use Dataset;
@ -16,8 +16,11 @@ use Dataset;
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.ConverterPerformanceTest';
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar:../icu4j-charset.jar';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
# Some tests do not compile at this time.
my @METHODS = (
## ['TestByteToCharConverter', 'TestByteToCharConverterICU'],
## ['TestCharToByteConverter', 'TestCharToByteConverterICU'],
@ -27,33 +30,34 @@ my @METHODS = (
# Patterns which define the set of characters used for testing.
my $SOURCEDIR ="src/com/ibm/icu/dev/test/perf/data/conversion/";
my $SOURCEDIR ="./data/conversion/";
# Note that some tests are unavailable
my @OPTIONS = (
# src text src encoding test encoding
[ "arabic.txt", "UTF-8", "csisolatinarabic"],
[ "french.txt", "UTF-8", "csisolatin1"],
[ "greek.txt", "UTF-8", "csisolatingreek"],
[ "hebrew.txt", "UTF-8", "csisolatinhebrew"],
# [ "hindi.txt" , "UTF-8", "iscii"],
[ "japanese.txt", "UTF-8", "EUC-JP"],
# [ "japanese.txt", "UTF-8", "csiso2022jp"],
[ "japanese.txt", "UTF-8", "shift_jis"],
# [ "korean.txt", "UTF-8", "csiso2022kr"],
[ "korean.txt", "UTF-8", "EUC-KR"],
[ "s-chinese.txt", "UTF-8", "EUC_CN"],
[ "arabic.txt", "UTF-8", "UTF-8"],
[ "french.txt", "UTF-8", "UTF-8"],
[ "greek.txt", "UTF-8", "UTF-8"],
[ "hebrew.txt", "UTF-8", "UTF-8"],
[ "hindi.txt" , "UTF-8", "UTF-8"],
[ "japanese.txt", "UTF-8", "UTF-8"],
[ "korean.txt", "UTF-8", "UTF-8"],
[ "s-chinese.txt", "UTF-8", "UTF-8"],
[ "french.txt", "UTF-8", "UTF-16BE"],
[ "french.txt", "UTF-8", "UTF-16LE"],
[ "english.txt", "UTF-8", "US-ASCII"],
);
# src text src encoding test encoding
[ "arabic.txt", "UTF-8", "csisolatinarabic"],
[ "french.txt", "UTF-8", "csisolatin1"],
[ "greek.txt", "UTF-8", "csisolatingreek"],
[ "hebrew.txt", "UTF-8", "csisolatinhebrew"],
# [ "hindi.txt" , "UTF-8", "iscii"],
[ "japanese.txt", "UTF-8", "EUC-JP"],
[ "japanese.txt", "UTF-8", "csiso2022jp"],
# [ "japanese.txt", "UTF-8", "shift_jis"],
[ "korean.txt", "UTF-8", "csiso2022kr"],
# [ "korean.txt", "UTF-8", "EUC-KR"],
[ "s-chinese.txt", "UTF-8", "EUC_CN"],
[ "arabic.txt", "UTF-8", "UTF-8"],
[ "french.txt", "UTF-8", "UTF-8"],
[ "greek.txt", "UTF-8", "UTF-8"],
[ "hebrew.txt", "UTF-8", "UTF-8"],
[ "hindi.txt" , "UTF-8", "UTF-8"],
[ "japanese.txt", "UTF-8", "UTF-8"],
[ "korean.txt", "UTF-8", "UTF-8"],
[ "s-chinese.txt", "UTF-8", "UTF-8"],
[ "french.txt", "UTF-8", "UTF-16BE"],
[ "french.txt", "UTF-8", "UTF-16LE"],
[ "english.txt", "UTF-8", "US-ASCII"],
);
my $CALIBRATE = 2; # duration in seconds for initial calibration
my $DURATION = 10; # duration in seconds for each pass
@ -108,7 +112,7 @@ EOF
print HTML "<P><TABLE $TABLEATTR><TR><TD>\n";
print HTML "<P><B>$testMethod vs. $baselineMethod</B></P>\n";
print HTML "<P><TABLE $TABLEATTR BGCOLOR=\"#CCFFFF\">\n";
print HTML "<TR><TD>Options</TD><TD>$testMethod</TD>";
print HTML "<TD>$baselineMethod</TD><TD>Ratio</TD></TR>\n";
@ -213,17 +217,19 @@ sub measure2 {
sub measure1 {
my $method = shift;
my $pat = shift;
my $iterCount = shift; # actually might be -seconds/pass
my $param3 = shift; # Either -seconds/pass or iteration count
my $iterCount = 0; # Set later based on param3.
out("<P>Measuring $method for input file @$pat[0] for encoding @$pat[2] , ");
if ($iterCount > 0) {
if ($param3 > 0) {
$iterCount = $param3;
out("$iterCount iterations/pass, $NUMPASSES passes</P>\n");
} else {
out(-$iterCount, " seconds/pass, $NUMPASSES passes</P>\n");
}
my $timePerPass = -$param3;
out(-$timePerPass, " seconds/pass, $NUMPASSES passes</P>\n");
# is $iterCount actually -seconds/pass?
if ($iterCount < 0) {
# Value given was -seconds/pass
# calibrate: estimate ms/iteration
print "Calibrating...";
@ -234,16 +240,16 @@ sub measure1 {
$data[0] *= 1.0e+3;
my $timePerIter = 1.0e-3 * $data[0] / $data[1];
# determine iterations/pass
$iterCount = int(-$iterCount / $timePerIter + 0.5);
# determine iterations/pass from timePerPass and timePerIteration
$iterCount = int($timePerPass / $timePerIter + 0.5);
out("<P>Calibration pass ($CALIBRATE sec): ");
out("$data[0] ms, ");
out("$data[1] iterations = ");
out(formatSeconds(4, $timePerIter), "/iteration<BR>\n");
}
# run passes
print "Measuring $iterCount iterations x $NUMPASSES passes...";
my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
@ -291,11 +297,11 @@ sub callJava {
my $pat = shift;
my $n = shift;
my $passes = shift;
my $fileName = $SOURCEDIR.@$pat[0] ;
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -T @$pat[2]";
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -T @$pat[2]";
print "[$cmd]\n"; # for debugging
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
my @out;
@ -406,7 +412,7 @@ sub formatNumber {
my $mult = shift;
my $a = shift;
my $delta = shift; # may be undef
my $result = formatSigDig($sigdig, $a*$mult);
if (defined($delta)) {
my $d = formatSigDig($sigdig, $delta*$mult);
@ -441,13 +447,13 @@ sub formatSeconds {
my $a = shift;
my $delta = shift; # may be undef
my @MULT = (1 , 1e3, 1e6, 1e9);
my @SUFF = ('s' , 'ms', 'us', 'ns');
my @MULT = (1 , 1e3, 1e6, 1e9, 1e12);
my @SUFF = ('s' , 'ms', 'us', 'ns', 'ps');
# Determine our scale
my $i = 0;
++$i while ($a*$MULT[$i] < 1 && $i < @MULT);
formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i];
}
@ -465,7 +471,7 @@ sub formatPercent {
my $sigdig = shift;
my $a = shift;
my $delta = shift; # may be undef
formatNumber($sigdig, 100, $a, $delta) . ' %';
}

View file

@ -16,6 +16,8 @@ use Dataset;
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.DateFormatPerformanceTest';
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
my @METHODS = (
@ -281,7 +283,7 @@ sub callJava {
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
print "[$cmd]\n"; # for debugging
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
my @out;

View file

@ -16,6 +16,8 @@ use Dataset;
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.DecimalFormatPerformanceTest';
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
my @METHODS = (
@ -276,7 +278,7 @@ sub callJava {
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS";
print "[$cmd]\n"; # for debugging
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
my @out;

View file

@ -16,6 +16,8 @@ use Dataset;
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.NormalizerPerformanceTest';
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
my @METHODS = (
@ -30,7 +32,7 @@ my @METHODS = (
# Patterns which define the set of characters used for testing.
my $SOURCEDIR ="src/com/ibm/icu/dev/test/perf/data/collation/";
my $SOURCEDIR ="data/collation/";
my @OPTIONS = (
# src text src encoding mode
@ -88,7 +90,7 @@ my @OPTIONS = (
[ "TestNames_SerbianSH.txt", "UTF-8", "l"],
[ "TestNames_SerbianSR.txt", "UTF-8", "l"],
[ "TestNames_Thai.txt", "UTF-8", "l"],
[ "Testnames_Russian.txt", "UTF-8", "l"],
[ "TestNames_Russian.txt", "UTF-8", "l"],
);
my $CALIBRATE = 2; # duration in seconds for initial calibration
@ -331,7 +333,7 @@ sub callJava {
my $fileName = $SOURCEDIR . @$pat[0] ;
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -@$pat[2]";
my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -@$pat[2]";
print "[$cmd]\n"; # for debugging
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
my @out;

View file

@ -9,7 +9,7 @@
use XML::LibXML;
# Assume we are running within the icu4j root directory
use lib 'src/com/ibm/icu/dev/test/perf';
use lib '.';
use Dataset;
my $OS=$^O;

View file

@ -16,6 +16,8 @@ use Dataset;
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UCharacterPerf';
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
my @METHODS = (['JDKDigit', 'Digit'],
@ -278,7 +280,7 @@ sub callJava {
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat";
my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat";
print "[$cmd]\n"; # for debugging
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
my @out;

View file

@ -16,6 +16,8 @@ use Dataset;
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UnicodeSetPerf';
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
my @METHODS = (
@ -268,7 +270,7 @@ sub callJava {
my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat";
my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat";
print "[$cmd]\n"; # for debugging
open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
my @out;