From 4fff0085e5aeb3944843b018637399ea2a60addd Mon Sep 17 00:00:00 2001 From: Craig Cornelius Date: Fri, 23 Jul 2021 17:19:42 +0000 Subject: [PATCH] ICU-21638 re-enable some of the ICU4J performance tests See #1773 --- icu4j/perf-tests/README.txt | 82 ++++++++++++++++++++-- icu4j/perf-tests/converterperf.pl | 98 ++++++++++++++------------- icu4j/perf-tests/dateformatperf.pl | 4 +- icu4j/perf-tests/decimalformatperf.pl | 4 +- icu4j/perf-tests/normperf.pl | 8 ++- icu4j/perf-tests/perftests.pl | 2 +- icu4j/perf-tests/ucharacterperf.pl | 4 +- icu4j/perf-tests/unicodesetperf.pl | 4 +- 8 files changed, 148 insertions(+), 58 deletions(-) diff --git a/icu4j/perf-tests/README.txt b/icu4j/perf-tests/README.txt index e20444db92f..b20f0b8796e 100644 --- a/icu4j/perf-tests/README.txt +++ b/icu4j/perf-tests/README.txt @@ -4,7 +4,81 @@ Copyright (C) 2012, International Business Machines Corporation and others. All README for ICU4J Performance Test -Please note that to run the performance test for ICU4J as a part of continuous build, you will need to setup Perl with the following modules: - a) Statistics/Distribution.pm - b) Statistics/Descriptive.pm - c) XML/LibXML.pm +This directory includes a number of performance tests. Most are +comparing ICU operations with built in Java functions. Many tests run +numerous iterations with a variety of locales. + +Several tests create .html output files that can be opened and viewed in a web browser. + +The collation test produces output in the terminal window. Some are executed +individually via command line and others run via an `ant` command. + +Note: Tests with "_r" in the name are obsolete tests that compared +versions of ICU4J with each other. These may be useful in the future, +but require reworking to locate, compile, and run different versions. + + +Note: To run the performance test for ICU4J as a part of continuous build, you will +need to set up Perl with the following modules: + a) Statistics/Distribution.pm + b) Statistics/Descriptive.pm + c) XML/LibXML.pm + +CONTINUOUS BUILD: + To run a set of performance tests defined in file perlftests.pl, use this command: + ant continuous-build + +Output is created in perf.xml. This output contains results comparing ICU and JDK for the +following operations + DateFmt-open + DateFmt-parse + NumFmt-open + NumFnt-parse + Collation in several locales + +COLLATION TESTS + The collation tests run only on the command line with tabular output: + perl collationperf.pl |& tee collation_output.txt + + +OTHER COMMAND LINE TESTS +Additional tests are run from the command line, each producing an HTML +output file with with the name "perf" followed by a timestamp of when +it was run. For example: + + "perf Jul 22 141434.html" + +Each result can be loaded for review in a browser. + +SETUP: +The environment variable PERL5LIB must be set as follows: + export PERL5LIB=`pwd` + +Then the command line is run for each as follows: + perl dateformatperf.pl + perl converterperf.pl + perl decimalformat.pl + perl normperf.pl + perl ucharacterperf.pl + perl unicodesetperf.pl + + +converterperf compares ICU Decoder and ICU Encoder with JDK versions for timing. + +decimalformatperf compares JDK with ICU in contruction, parsing, and +formatting in en_US and de_DE locales. + +normperf tests various normalization methods in both JKD and ICU usign +a variety of locales + +ucharacterperf compares JDK with ICU for character handling with +digits, numeric values, types of characters, casing, and other +attributes + +unicodesetperf compares UnicodeSet with HashSet with the following: + UnicodeSetAdd + HashSetAdd + UnicodSetContains + HashSetContains + UnicodeSetIterate + HashSetIterate diff --git a/icu4j/perf-tests/converterperf.pl b/icu4j/perf-tests/converterperf.pl index 27e03c9976e..8e6703648d3 100755 --- a/icu4j/perf-tests/converterperf.pl +++ b/icu4j/perf-tests/converterperf.pl @@ -8,7 +8,7 @@ use strict; -# Assume we are running within the icu4j root directory +# Assume we are running within the icu4j/perf-tests root directory use lib 'src/com/ibm/icu/dev/test/perf'; use Dataset; @@ -16,8 +16,11 @@ use Dataset; # Test class my $TESTCLASS = 'com.ibm.icu.dev.test.perf.ConverterPerformanceTest'; +my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar:../icu4j-charset.jar'; + # Methods to be tested. Each pair represents a test method and # a baseline method which is used for comparison. +# Some tests do not compile at this time. my @METHODS = ( ## ['TestByteToCharConverter', 'TestByteToCharConverterICU'], ## ['TestCharToByteConverter', 'TestCharToByteConverterICU'], @@ -27,33 +30,34 @@ my @METHODS = ( # Patterns which define the set of characters used for testing. -my $SOURCEDIR ="src/com/ibm/icu/dev/test/perf/data/conversion/"; +my $SOURCEDIR ="./data/conversion/"; +# Note that some tests are unavailable my @OPTIONS = ( -# src text src encoding test encoding - [ "arabic.txt", "UTF-8", "csisolatinarabic"], - [ "french.txt", "UTF-8", "csisolatin1"], - [ "greek.txt", "UTF-8", "csisolatingreek"], - [ "hebrew.txt", "UTF-8", "csisolatinhebrew"], -# [ "hindi.txt" , "UTF-8", "iscii"], - [ "japanese.txt", "UTF-8", "EUC-JP"], -# [ "japanese.txt", "UTF-8", "csiso2022jp"], - [ "japanese.txt", "UTF-8", "shift_jis"], -# [ "korean.txt", "UTF-8", "csiso2022kr"], - [ "korean.txt", "UTF-8", "EUC-KR"], - [ "s-chinese.txt", "UTF-8", "EUC_CN"], - [ "arabic.txt", "UTF-8", "UTF-8"], - [ "french.txt", "UTF-8", "UTF-8"], - [ "greek.txt", "UTF-8", "UTF-8"], - [ "hebrew.txt", "UTF-8", "UTF-8"], - [ "hindi.txt" , "UTF-8", "UTF-8"], - [ "japanese.txt", "UTF-8", "UTF-8"], - [ "korean.txt", "UTF-8", "UTF-8"], - [ "s-chinese.txt", "UTF-8", "UTF-8"], - [ "french.txt", "UTF-8", "UTF-16BE"], - [ "french.txt", "UTF-8", "UTF-16LE"], - [ "english.txt", "UTF-8", "US-ASCII"], - ); +# src text src encoding test encoding + [ "arabic.txt", "UTF-8", "csisolatinarabic"], + [ "french.txt", "UTF-8", "csisolatin1"], + [ "greek.txt", "UTF-8", "csisolatingreek"], + [ "hebrew.txt", "UTF-8", "csisolatinhebrew"], +# [ "hindi.txt" , "UTF-8", "iscii"], + [ "japanese.txt", "UTF-8", "EUC-JP"], + [ "japanese.txt", "UTF-8", "csiso2022jp"], +# [ "japanese.txt", "UTF-8", "shift_jis"], + [ "korean.txt", "UTF-8", "csiso2022kr"], +# [ "korean.txt", "UTF-8", "EUC-KR"], + [ "s-chinese.txt", "UTF-8", "EUC_CN"], + [ "arabic.txt", "UTF-8", "UTF-8"], + [ "french.txt", "UTF-8", "UTF-8"], + [ "greek.txt", "UTF-8", "UTF-8"], + [ "hebrew.txt", "UTF-8", "UTF-8"], + [ "hindi.txt" , "UTF-8", "UTF-8"], + [ "japanese.txt", "UTF-8", "UTF-8"], + [ "korean.txt", "UTF-8", "UTF-8"], + [ "s-chinese.txt", "UTF-8", "UTF-8"], + [ "french.txt", "UTF-8", "UTF-16BE"], + [ "french.txt", "UTF-8", "UTF-16LE"], + [ "english.txt", "UTF-8", "US-ASCII"], + ); my $CALIBRATE = 2; # duration in seconds for initial calibration my $DURATION = 10; # duration in seconds for each pass @@ -108,7 +112,7 @@ EOF print HTML "

\n"; print HTML "

$testMethod vs. $baselineMethod

\n"; - + print HTML "

\n"; print HTML ""; print HTML "\n"; @@ -213,17 +217,19 @@ sub measure2 { sub measure1 { my $method = shift; my $pat = shift; - my $iterCount = shift; # actually might be -seconds/pass + my $param3 = shift; # Either -seconds/pass or iteration count + + my $iterCount = 0; # Set later based on param3. out("

Measuring $method for input file @$pat[0] for encoding @$pat[2] , "); - if ($iterCount > 0) { + if ($param3 > 0) { + $iterCount = $param3; out("$iterCount iterations/pass, $NUMPASSES passes

\n"); } else { - out(-$iterCount, " seconds/pass, $NUMPASSES passes

\n"); - } + my $timePerPass = -$param3; + out(-$timePerPass, " seconds/pass, $NUMPASSES passes

\n"); - # is $iterCount actually -seconds/pass? - if ($iterCount < 0) { + # Value given was -seconds/pass # calibrate: estimate ms/iteration print "Calibrating..."; @@ -234,16 +240,16 @@ sub measure1 { $data[0] *= 1.0e+3; my $timePerIter = 1.0e-3 * $data[0] / $data[1]; - - # determine iterations/pass - $iterCount = int(-$iterCount / $timePerIter + 0.5); - + + # determine iterations/pass from timePerPass and timePerIteration + $iterCount = int($timePerPass / $timePerIter + 0.5); + out("

Calibration pass ($CALIBRATE sec): "); out("$data[0] ms, "); out("$data[1] iterations = "); out(formatSeconds(4, $timePerIter), "/iteration
\n"); } - + # run passes print "Measuring $iterCount iterations x $NUMPASSES passes..."; my @t = callJava($method, $pat, $iterCount, $NUMPASSES); @@ -291,11 +297,11 @@ sub callJava { my $pat = shift; my $n = shift; my $passes = shift; - + my $fileName = $SOURCEDIR.@$pat[0] ; my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; - - my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -T @$pat[2]"; + + my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -T @$pat[2]"; print "[$cmd]\n"; # for debugging open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; my @out; @@ -406,7 +412,7 @@ sub formatNumber { my $mult = shift; my $a = shift; my $delta = shift; # may be undef - + my $result = formatSigDig($sigdig, $a*$mult); if (defined($delta)) { my $d = formatSigDig($sigdig, $delta*$mult); @@ -441,13 +447,13 @@ sub formatSeconds { my $a = shift; my $delta = shift; # may be undef - my @MULT = (1 , 1e3, 1e6, 1e9); - my @SUFF = ('s' , 'ms', 'us', 'ns'); + my @MULT = (1 , 1e3, 1e6, 1e9, 1e12); + my @SUFF = ('s' , 'ms', 'us', 'ns', 'ps'); # Determine our scale my $i = 0; ++$i while ($a*$MULT[$i] < 1 && $i < @MULT); - + formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i]; } @@ -465,7 +471,7 @@ sub formatPercent { my $sigdig = shift; my $a = shift; my $delta = shift; # may be undef - + formatNumber($sigdig, 100, $a, $delta) . ' %'; } diff --git a/icu4j/perf-tests/dateformatperf.pl b/icu4j/perf-tests/dateformatperf.pl index b4ebe6897c8..be24ce075d1 100755 --- a/icu4j/perf-tests/dateformatperf.pl +++ b/icu4j/perf-tests/dateformatperf.pl @@ -16,6 +16,8 @@ use Dataset; # Test class my $TESTCLASS = 'com.ibm.icu.dev.test.perf.DateFormatPerformanceTest'; +my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar'; + # Methods to be tested. Each pair represents a test method and # a baseline method which is used for comparison. my @METHODS = ( @@ -281,7 +283,7 @@ sub callJava { my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; - my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS"; + my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS"; print "[$cmd]\n"; # for debugging open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; my @out; diff --git a/icu4j/perf-tests/decimalformatperf.pl b/icu4j/perf-tests/decimalformatperf.pl index a9852cde238..685adaf7f25 100755 --- a/icu4j/perf-tests/decimalformatperf.pl +++ b/icu4j/perf-tests/decimalformatperf.pl @@ -16,6 +16,8 @@ use Dataset; # Test class my $TESTCLASS = 'com.ibm.icu.dev.test.perf.DecimalFormatPerformanceTest'; +my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar'; + # Methods to be tested. Each pair represents a test method and # a baseline method which is used for comparison. my @METHODS = ( @@ -276,7 +278,7 @@ sub callJava { my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; - my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS"; + my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -L @$pat[0] \"@$pat[1]\" \"@$pat[2]\" -r $THREADS"; print "[$cmd]\n"; # for debugging open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; my @out; diff --git a/icu4j/perf-tests/normperf.pl b/icu4j/perf-tests/normperf.pl index 3bfba5f6866..3c05c53c98f 100755 --- a/icu4j/perf-tests/normperf.pl +++ b/icu4j/perf-tests/normperf.pl @@ -16,6 +16,8 @@ use Dataset; # Test class my $TESTCLASS = 'com.ibm.icu.dev.test.perf.NormalizerPerformanceTest'; +my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar'; + # Methods to be tested. Each pair represents a test method and # a baseline method which is used for comparison. my @METHODS = ( @@ -30,7 +32,7 @@ my @METHODS = ( # Patterns which define the set of characters used for testing. -my $SOURCEDIR ="src/com/ibm/icu/dev/test/perf/data/collation/"; +my $SOURCEDIR ="data/collation/"; my @OPTIONS = ( # src text src encoding mode @@ -88,7 +90,7 @@ my @OPTIONS = ( [ "TestNames_SerbianSH.txt", "UTF-8", "l"], [ "TestNames_SerbianSR.txt", "UTF-8", "l"], [ "TestNames_Thai.txt", "UTF-8", "l"], - [ "Testnames_Russian.txt", "UTF-8", "l"], + [ "TestNames_Russian.txt", "UTF-8", "l"], ); my $CALIBRATE = 2; # duration in seconds for initial calibration @@ -331,7 +333,7 @@ sub callJava { my $fileName = $SOURCEDIR . @$pat[0] ; my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; - my $cmd = "java -classpath classes $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -@$pat[2]"; + my $cmd = "java -classpath $CLASSES $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -@$pat[2]"; print "[$cmd]\n"; # for debugging open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; my @out; diff --git a/icu4j/perf-tests/perftests.pl b/icu4j/perf-tests/perftests.pl index b7758da3b31..bed179cd425 100755 --- a/icu4j/perf-tests/perftests.pl +++ b/icu4j/perf-tests/perftests.pl @@ -9,7 +9,7 @@ use XML::LibXML; # Assume we are running within the icu4j root directory -use lib 'src/com/ibm/icu/dev/test/perf'; +use lib '.'; use Dataset; my $OS=$^O; diff --git a/icu4j/perf-tests/ucharacterperf.pl b/icu4j/perf-tests/ucharacterperf.pl index b3a0461bf30..01fbc9526c8 100755 --- a/icu4j/perf-tests/ucharacterperf.pl +++ b/icu4j/perf-tests/ucharacterperf.pl @@ -16,6 +16,8 @@ use Dataset; # Test class my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UCharacterPerf'; +my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar'; + # Methods to be tested. Each pair represents a test method and # a baseline method which is used for comparison. my @METHODS = (['JDKDigit', 'Digit'], @@ -278,7 +280,7 @@ sub callJava { my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; - my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat"; + my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat"; print "[$cmd]\n"; # for debugging open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; my @out; diff --git a/icu4j/perf-tests/unicodesetperf.pl b/icu4j/perf-tests/unicodesetperf.pl index 165cb5f44e4..42138ab5ca9 100755 --- a/icu4j/perf-tests/unicodesetperf.pl +++ b/icu4j/perf-tests/unicodesetperf.pl @@ -16,6 +16,8 @@ use Dataset; # Test class my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UnicodeSetPerf'; +my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar'; + # Methods to be tested. Each pair represents a test method and # a baseline method which is used for comparison. my @METHODS = ( @@ -268,7 +270,7 @@ sub callJava { my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; - my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat"; + my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat"; print "[$cmd]\n"; # for debugging open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; my @out;

Options$testMethod$baselineMethodRatio