diff --git a/Makefile.am b/Makefile.am index a7a1f413..3e988816 100644 --- a/Makefile.am +++ b/Makefile.am @@ -9,7 +9,7 @@ AUTOMAKE_OPTIONS = foreign SUBDIRS = . src # Always include gmock in distributions. -DIST_SUBDIRS = $(subdirs) src conformance +DIST_SUBDIRS = $(subdirs) src conformance benchmarks # Build gmock before we build protobuf tests. We don't add gmock to SUBDIRS # because then "make check" would also build and run all of gmock's own tests, @@ -36,6 +36,10 @@ clean-local: echo "Making clean in conformance"; \ cd conformance && $(MAKE) $(AM_MAKEFLAGS) clean; \ fi; \ + if test -e benchmarks/Makefile; then \ + echo "Making clean in benchmarks"; \ + cd benchmarks && $(MAKE) $(AM_MAKEFLAGS) clean; \ + fi; \ if test -e objectivec/DevTools; then \ echo "Cleaning any ObjC pyc files"; \ rm -f objectivec/DevTools/*.pyc; \ diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am new file mode 100644 index 00000000..1e162eb1 --- /dev/null +++ b/benchmarks/Makefile.am @@ -0,0 +1,66 @@ + +benchmarks_protoc_inputs = \ + benchmarks.proto \ + benchmark_messages_proto3.proto + +benchmarks_protoc_inputs_proto2 = \ + benchmark_messages_proto2.proto + +benchmarks_protoc_outputs = \ + benchmarks.pb.cc \ + benchmarks.pb.h \ + benchmark_messages_proto3.pb.cc \ + benchmark_messages_proto3.pb.h + +benchmarks_protoc_outputs_proto2 = \ + benchmark_messages_proto2.pb.cc \ + benchmark_messages_proto2.pb.h + +bin_PROGRAMS = generate-datasets + +generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la +generate_datasets_SOURCES = generate_datasets.cc +generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) +nodist_generate_datasets_SOURCES = \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) + +# Explicit deps because BUILT_SOURCES are only done before a "make all/check" +# so a direct "make test_cpp" could fail if parallel enough. +# See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually +generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h + +$(benchmarks_protoc_outputs): protoc_middleman +$(benchmarks_protoc_outputs_proto2): protoc_middleman2 + +CLEANFILES = \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) \ + protoc_middleman \ + protoc_middleman2 \ + dataset.* + +if USE_EXTERNAL_PROTOC + +protoc_middleman: $(benchmarks_protoc_inputs) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs) + touch protoc_middleman + +protoc_middleman2: $(benchmarks_protoc_inputs_proto2) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2) + touch protoc_middleman2 + +else + +# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is +# relative to srcdir, which may not be the same as the current directory when +# building out-of-tree. +protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) ) + touch protoc_middleman + +protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) ) + touch protoc_middleman + +endif diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..c9027805 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,28 @@ + +# Protocol Buffers Benchmarks + +This directory contains benchmarking schemas and data sets that you +can use to test a variety of performance scenarios against your +protobuf language runtime. + +The schema for the datasets is described in `benchmarks.proto`. + +Generate the data sets like so: + +``` +$ make +$ ./generate-datasets +Wrote dataset: dataset.google_message1_proto3.pb +Wrote dataset: dataset.google_message1_proto2.pb +Wrote dataset: dataset.google_message2.pb +$ +``` + +Each data set will be written to its own file. Benchmarks will +likely want to run several benchmarks against each data set (parse, +serialize, possibly JSON, possibly using different APIs, etc). + +We would like to add more data sets. In general we will favor data sets +that make the overall suite diverse without being too large or having +too many similar tests. Ideally everyone can run through the entire +suite without the test run getting too long. diff --git a/benchmarks/google_speed.proto b/benchmarks/benchmark_messages_proto2.proto similarity index 91% rename from benchmarks/google_speed.proto rename to benchmarks/benchmark_messages_proto2.proto index 16f6d678..01f67a1a 100644 --- a/benchmarks/google_speed.proto +++ b/benchmarks/benchmark_messages_proto2.proto @@ -1,11 +1,14 @@ +// Benchmark messages for proto2. + syntax = "proto2"; -package benchmarks; +package benchmarks.proto2; +option java_package = "com.google.protobuf.benchmarks"; -option java_outer_classname = "GoogleSpeed"; +// This is the default, but we specify it here explicitly. option optimize_for = SPEED; -message SpeedMessage1 { +message GoogleMessage1 { required string field1 = 1; optional string field9 = 9; optional string field18 = 18; @@ -40,7 +43,7 @@ message SpeedMessage1 { optional int32 field23 = 23 [default=0]; optional bool field24 = 24 [default=false]; optional int32 field25 = 25 [default=0]; - optional SpeedMessage1SubMessage field15 = 15; + optional GoogleMessage1SubMessage field15 = 15; optional bool field78 = 78; optional int32 field67 = 67 [default=0]; optional int32 field68 = 68; @@ -49,7 +52,7 @@ message SpeedMessage1 { optional int32 field131 = 131 [default=0]; } -message SpeedMessage1SubMessage { +message GoogleMessage1SubMessage { optional int32 field1 = 1 [default=0]; optional int32 field2 = 2 [default=0]; optional int32 field3 = 3 [default=0]; @@ -72,7 +75,7 @@ message SpeedMessage1SubMessage { optional uint64 field300 = 300; } -message SpeedMessage2 { +message GoogleMessage2 { optional string field1 = 1; optional int64 field3 = 3; optional int64 field4 = 4; @@ -112,7 +115,7 @@ message SpeedMessage2 { repeated int32 field73 = 73; optional int32 field20 = 20 [default=0]; optional string field24 = 24; - optional SpeedMessage2GroupedMessage field31 = 31; + optional GoogleMessage2GroupedMessage field31 = 31; } repeated string field128 = 128; optional int64 field131 = 131; @@ -123,7 +126,7 @@ message SpeedMessage2 { optional bool field206 = 206 [default=false]; } -message SpeedMessage2GroupedMessage { +message GoogleMessage2GroupedMessage { optional float field1 = 1; optional float field2 = 2; optional float field3 = 3 [default=0.0]; diff --git a/benchmarks/benchmark_messages_proto3.proto b/benchmarks/benchmark_messages_proto3.proto new file mode 100644 index 00000000..32f58698 --- /dev/null +++ b/benchmarks/benchmark_messages_proto3.proto @@ -0,0 +1,76 @@ +// Benchmark messages for proto3. + +syntax = "proto3"; + +package benchmarks.proto3; +option java_package = "com.google.protobuf.benchmarks"; + +// This is the default, but we specify it here explicitly. +option optimize_for = SPEED; + +message GoogleMessage1 { + string field1 = 1; + string field9 = 9; + string field18 = 18; + bool field80 = 80; + bool field81 = 81; + int32 field2 = 2; + int32 field3 = 3; + int32 field280 = 280; + int32 field6 = 6; + int64 field22 = 22; + string field4 = 4; + repeated fixed64 field5 = 5; + bool field59 = 59; + string field7 = 7; + int32 field16 = 16; + int32 field130 = 130; + bool field12 = 12; + bool field17 = 17; + bool field13 = 13; + bool field14 = 14; + int32 field104 = 104; + int32 field100 = 100; + int32 field101 = 101; + string field102 = 102; + string field103 = 103; + int32 field29 = 29; + bool field30 = 30; + int32 field60 = 60; + int32 field271 = 271; + int32 field272 = 272; + int32 field150 = 150; + int32 field23 = 23; + bool field24 = 24; + int32 field25 = 25; + GoogleMessage1SubMessage field15 = 15; + bool field78 = 78; + int32 field67 = 67; + int32 field68 = 68; + int32 field128 = 128; + string field129 = 129; + int32 field131 = 131; +} + +message GoogleMessage1SubMessage { + int32 field1 = 1; + int32 field2 = 2; + int32 field3 = 3; + string field15 = 15; + bool field12 = 12; + int64 field13 = 13; + int64 field14 = 14; + int32 field16 = 16; + int32 field19 = 19; + bool field20 = 20; + bool field28 = 28; + fixed64 field21 = 21; + int32 field22 = 22; + bool field23 = 23; + bool field206 = 206; + fixed32 field203 = 203; + int32 field204 = 204; + string field205 = 205; + uint64 field207 = 207; + uint64 field300 = 300; +} diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto new file mode 100644 index 00000000..51c0b548 --- /dev/null +++ b/benchmarks/benchmarks.proto @@ -0,0 +1,63 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto3"; +package benchmarks; +option java_package = "com.google.protobuf.benchmarks"; + +message BenchmarkDataset { + // Name of the benchmark dataset. This should be unique across all datasets. + // Should only contain word characters: [a-zA-Z0-9_] + string name = 1; + + // Fully-qualified name of the protobuf message for this dataset. + // It will be one of the messages defined benchmark_messages_proto2.proto + // or benchmark_messages_proto3.proto. + // + // Implementations that do not support reflection can implement this with + // an explicit "if/else" chain that lists every known message defined + // in those files. + string message_name = 2; + + // The payload(s) for this dataset. They should be parsed or serialized + // in sequence, in a loop, ie. + // + // while (!benchmarkDone) { // Benchmark runner decides when to exit. + // for (i = 0; i < benchmark.payload.length; i++) { + // parse(benchmark.payload[i]) + // } + // } + // + // This is intended to let datasets include a variety of data to provide + // potentially more realistic results than just parsing the same message + // over and over. A single message parsed repeatedly could yield unusually + // good branch prediction performance. + repeated bytes payload = 3; +} diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc new file mode 100644 index 00000000..61e7adf1 --- /dev/null +++ b/benchmarks/generate_datasets.cc @@ -0,0 +1,117 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include "benchmarks.pb.h" + +using benchmarks::BenchmarkDataset; +using google::protobuf::Descriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::Message; +using google::protobuf::MessageFactory; + +std::set names; + +const char *file_prefix = "dataset."; +const char *file_suffix = ".pb"; + +void WriteFileWithPayloads(const std::string& name, + const std::string& message_name, + const std::vector& payload) { + if (!names.insert(name).second) { + std::cerr << "Duplicate test name: " << name << "\n"; + abort(); + } + + // First verify that this message name exists in our set of benchmark messages + // and that these payloads are valid for the given message. + const Descriptor* d = + DescriptorPool::generated_pool()->FindMessageTypeByName(message_name); + + if (!d) { + std::cerr << "For dataset " << name << ", no such message: " + << message_name << "\n"; + abort(); + } + + Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New(); + + for (size_t i = 0; i < payload.size(); i++) { + if (!m->ParseFromString(payload[i])) { + std::cerr << "For dataset " << name << ", payload[" << i << "] fails " + << "to parse\n"; + abort(); + } + } + + BenchmarkDataset dataset; + dataset.set_name(name); + dataset.set_message_name(message_name); + for (size_t i = 0; i < payload.size(); i++) { + dataset.add_payload()->assign(payload[i]); + } + + std::ofstream writer; + std::string fname = file_prefix + name + file_suffix; + writer.open(fname.c_str()); + dataset.SerializeToOstream(&writer); + writer.close(); + + std::cerr << "Wrote dataset: " << fname << "\n"; +} + +void WriteFile(const std::string& name, const std::string& message_name, + const std::string& payload) { + std::vector payloads; + payloads.push_back(payload); + WriteFileWithPayloads(name, message_name, payloads); +} + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << + "', please make sure you are running " + "this command from the benchmarks/ " + "directory.\n"; + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + +int main() { + WriteFile("google_message1_proto3", "benchmarks.proto3.GoogleMessage1", + ReadFile("google_message1.dat")); + WriteFile("google_message1_proto2", "benchmarks.proto2.GoogleMessage1", + ReadFile("google_message1.dat")); + + // Not in proto3 because it has a group, which is not supported. + WriteFile("google_message2", "benchmarks.proto2.GoogleMessage2", + ReadFile("google_message2.dat")); +} diff --git a/configure.ac b/configure.ac index 33a6c64d..d56a7047 100644 --- a/configure.ac +++ b/configure.ac @@ -180,5 +180,5 @@ export CFLAGS export CXXFLAGS AC_CONFIG_SUBDIRS([gmock]) -AC_CONFIG_FILES([Makefile src/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc]) +AC_CONFIG_FILES([Makefile src/Makefile benchmarks/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc]) AC_OUTPUT diff --git a/tests.sh b/tests.sh index fd81b764..6a9439a5 100755 --- a/tests.sh +++ b/tests.sh @@ -36,6 +36,9 @@ build_cpp() { internal_build_cpp make check -j2 cd conformance && make test_cpp && cd .. + + # Verify benchmarking code can build successfully. + cd benchmarks && make && ./generate-datasets && cd .. } build_cpp_distcheck() {