Added framework for generating/consuming benchmarking data sets.
This takes the code that was sitting in benchmarks/ already and makes it easier for language-specific benchmarks to consume. Future PRs will enhance this so that the language-specific benchmarks can report metrics back that will be tracked over time in PerfKit.
This commit is contained in:
parent
f53f911793
commit
2e83110230
7 changed files with 384 additions and 10 deletions
|
@ -9,7 +9,7 @@ AUTOMAKE_OPTIONS = foreign
|
|||
SUBDIRS = . src
|
||||
|
||||
# Always include gmock in distributions.
|
||||
DIST_SUBDIRS = $(subdirs) src conformance
|
||||
DIST_SUBDIRS = $(subdirs) src conformance benchmarks
|
||||
|
||||
# Build gmock before we build protobuf tests. We don't add gmock to SUBDIRS
|
||||
# because then "make check" would also build and run all of gmock's own tests,
|
||||
|
@ -36,6 +36,10 @@ clean-local:
|
|||
echo "Making clean in conformance"; \
|
||||
cd conformance && $(MAKE) $(AM_MAKEFLAGS) clean; \
|
||||
fi; \
|
||||
if test -e benchmarks/Makefile; then \
|
||||
echo "Making clean in benchmarks"; \
|
||||
cd benchmarks && $(MAKE) $(AM_MAKEFLAGS) clean; \
|
||||
fi; \
|
||||
if test -e objectivec/DevTools; then \
|
||||
echo "Cleaning any ObjC pyc files"; \
|
||||
rm -f objectivec/DevTools/*.pyc; \
|
||||
|
|
75
benchmarks/Makefile.am
Normal file
75
benchmarks/Makefile.am
Normal file
|
@ -0,0 +1,75 @@
|
|||
|
||||
benchmarks_protoc_inputs = \
|
||||
benchmarks.proto \
|
||||
benchmark_messages_proto3.proto
|
||||
|
||||
benchmarks_protoc_inputs_proto2 = \
|
||||
benchmark_messages_proto2.proto
|
||||
|
||||
benchmarks_protoc_outputs = \
|
||||
benchmarks.pb.cc \
|
||||
benchmarks.pb.h \
|
||||
benchmark_messages_proto3.pb.cc \
|
||||
benchmark_messages_proto3.pb.h
|
||||
|
||||
benchmarks_protoc_outputs_proto2 = \
|
||||
benchmark_messages_proto2.pb.cc \
|
||||
benchmark_messages_proto2.pb.h
|
||||
|
||||
bin_PROGRAMS = generate-datasets
|
||||
|
||||
generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
|
||||
generate_datasets_SOURCES = generate_datasets.cc
|
||||
generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
|
||||
nodist_generate_datasets_SOURCES = \
|
||||
google_message1.h \
|
||||
google_message2.h \
|
||||
$(benchmarks_protoc_outputs) \
|
||||
$(benchmarks_protoc_outputs_proto2)
|
||||
|
||||
# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check"
|
||||
# so a direct "make test_cpp" could fail if parallel enough.
|
||||
generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h
|
||||
|
||||
$(benchmarks_protoc_outputs): protoc_middleman
|
||||
$(benchmarks_protoc_outputs_proto2): protoc_middleman2
|
||||
|
||||
google_message1.h: google_message1.dat
|
||||
xxd -i $< $@
|
||||
|
||||
google_message2.h: google_message2.dat
|
||||
xxd -i $< $@
|
||||
|
||||
CLEANFILES = \
|
||||
$(benchmarks_protoc_outputs) \
|
||||
$(benchmarks_protoc_outputs_proto2) \
|
||||
google_message1.h \
|
||||
google_message2.h \
|
||||
protoc_middleman \
|
||||
protoc_middleman2 \
|
||||
dataset.*
|
||||
|
||||
if USE_EXTERNAL_PROTOC
|
||||
|
||||
protoc_middleman: $(benchmarks_protoc_inputs)
|
||||
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
|
||||
touch protoc_middleman
|
||||
|
||||
protoc_middleman2: $(benchmarks_protoc_inputs_proto2)
|
||||
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
|
||||
touch protoc_middleman2
|
||||
|
||||
else
|
||||
|
||||
# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
|
||||
# relative to srcdir, which may not be the same as the current directory when
|
||||
# building out-of-tree.
|
||||
protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
|
||||
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
|
||||
touch protoc_middleman
|
||||
|
||||
protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
|
||||
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
|
||||
touch protoc_middleman
|
||||
|
||||
endif
|
|
@ -1,11 +1,14 @@
|
|||
// Benchmark messages for proto2.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package benchmarks;
|
||||
package benchmarks.p2;
|
||||
option java_package = "com.google.protobuf.benchmarks";
|
||||
|
||||
option java_outer_classname = "GoogleSpeed";
|
||||
// This is the default, but we specify it here explicitly.
|
||||
option optimize_for = SPEED;
|
||||
|
||||
message SpeedMessage1 {
|
||||
message GoogleMessage1 {
|
||||
required string field1 = 1;
|
||||
optional string field9 = 9;
|
||||
optional string field18 = 18;
|
||||
|
@ -40,7 +43,7 @@ message SpeedMessage1 {
|
|||
optional int32 field23 = 23 [default=0];
|
||||
optional bool field24 = 24 [default=false];
|
||||
optional int32 field25 = 25 [default=0];
|
||||
optional SpeedMessage1SubMessage field15 = 15;
|
||||
optional GoogleMessage1SubMessage field15 = 15;
|
||||
optional bool field78 = 78;
|
||||
optional int32 field67 = 67 [default=0];
|
||||
optional int32 field68 = 68;
|
||||
|
@ -49,7 +52,7 @@ message SpeedMessage1 {
|
|||
optional int32 field131 = 131 [default=0];
|
||||
}
|
||||
|
||||
message SpeedMessage1SubMessage {
|
||||
message GoogleMessage1SubMessage {
|
||||
optional int32 field1 = 1 [default=0];
|
||||
optional int32 field2 = 2 [default=0];
|
||||
optional int32 field3 = 3 [default=0];
|
||||
|
@ -72,7 +75,7 @@ message SpeedMessage1SubMessage {
|
|||
optional uint64 field300 = 300;
|
||||
}
|
||||
|
||||
message SpeedMessage2 {
|
||||
message GoogleMessage2 {
|
||||
optional string field1 = 1;
|
||||
optional int64 field3 = 3;
|
||||
optional int64 field4 = 4;
|
||||
|
@ -112,7 +115,7 @@ message SpeedMessage2 {
|
|||
repeated int32 field73 = 73;
|
||||
optional int32 field20 = 20 [default=0];
|
||||
optional string field24 = 24;
|
||||
optional SpeedMessage2GroupedMessage field31 = 31;
|
||||
optional GoogleMessage2GroupedMessage field31 = 31;
|
||||
}
|
||||
repeated string field128 = 128;
|
||||
optional int64 field131 = 131;
|
||||
|
@ -123,7 +126,7 @@ message SpeedMessage2 {
|
|||
optional bool field206 = 206 [default=false];
|
||||
}
|
||||
|
||||
message SpeedMessage2GroupedMessage {
|
||||
message GoogleMessage2GroupedMessage {
|
||||
optional float field1 = 1;
|
||||
optional float field2 = 2;
|
||||
optional float field3 = 3 [default=0.0];
|
76
benchmarks/benchmark_messages_proto3.proto
Normal file
76
benchmarks/benchmark_messages_proto3.proto
Normal file
|
@ -0,0 +1,76 @@
|
|||
// Benchmark messages for proto3.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package benchmarks.p3;
|
||||
option java_package = "com.google.protobuf.benchmarks";
|
||||
|
||||
// This is the default, but we specify it here explicitly.
|
||||
option optimize_for = SPEED;
|
||||
|
||||
message GoogleMessage1 {
|
||||
string field1 = 1;
|
||||
string field9 = 9;
|
||||
string field18 = 18;
|
||||
bool field80 = 80;
|
||||
bool field81 = 81;
|
||||
int32 field2 = 2;
|
||||
int32 field3 = 3;
|
||||
int32 field280 = 280;
|
||||
int32 field6 = 6;
|
||||
int64 field22 = 22;
|
||||
string field4 = 4;
|
||||
repeated fixed64 field5 = 5;
|
||||
bool field59 = 59;
|
||||
string field7 = 7;
|
||||
int32 field16 = 16;
|
||||
int32 field130 = 130;
|
||||
bool field12 = 12;
|
||||
bool field17 = 17;
|
||||
bool field13 = 13;
|
||||
bool field14 = 14;
|
||||
int32 field104 = 104;
|
||||
int32 field100 = 100;
|
||||
int32 field101 = 101;
|
||||
string field102 = 102;
|
||||
string field103 = 103;
|
||||
int32 field29 = 29;
|
||||
bool field30 = 30;
|
||||
int32 field60 = 60;
|
||||
int32 field271 = 271;
|
||||
int32 field272 = 272;
|
||||
int32 field150 = 150;
|
||||
int32 field23 = 23;
|
||||
bool field24 = 24;
|
||||
int32 field25 = 25;
|
||||
GoogleMessage1SubMessage field15 = 15;
|
||||
bool field78 = 78;
|
||||
int32 field67 = 67;
|
||||
int32 field68 = 68;
|
||||
int32 field128 = 128;
|
||||
string field129 = 129;
|
||||
int32 field131 = 131;
|
||||
}
|
||||
|
||||
message GoogleMessage1SubMessage {
|
||||
int32 field1 = 1;
|
||||
int32 field2 = 2;
|
||||
int32 field3 = 3;
|
||||
string field15 = 15;
|
||||
bool field12 = 12;
|
||||
int64 field13 = 13;
|
||||
int64 field14 = 14;
|
||||
int32 field16 = 16;
|
||||
int32 field19 = 19;
|
||||
bool field20 = 20;
|
||||
bool field28 = 28;
|
||||
fixed64 field21 = 21;
|
||||
int32 field22 = 22;
|
||||
bool field23 = 23;
|
||||
bool field206 = 206;
|
||||
fixed32 field203 = 203;
|
||||
int32 field204 = 204;
|
||||
string field205 = 205;
|
||||
uint64 field207 = 207;
|
||||
uint64 field300 = 300;
|
||||
}
|
102
benchmarks/benchmarks.proto
Normal file
102
benchmarks/benchmarks.proto
Normal file
|
@ -0,0 +1,102 @@
|
|||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
syntax = "proto3";
|
||||
package benchmarks;
|
||||
option java_package = "com.google.protobuf.benchmarks";
|
||||
|
||||
message BenchmarkDataset {
|
||||
// Name of the benchmark dataset. This should be unique across all datasets.
|
||||
// Should only contain word characters: [a-zA-Z0-9_]
|
||||
string name = 1;
|
||||
|
||||
// Fully-qualified name of the protobuf message for this dataset.
|
||||
// It will be one of the messages defined benchmark_messages.proto.
|
||||
// Implementations that do not support reflection can implement this with
|
||||
// an explicit "if/else" chain that lists every possible message defined
|
||||
// in this file.
|
||||
string message_name = 2;
|
||||
|
||||
// The payload(s) for this dataset. They should be parsed or serialized
|
||||
// in sequence, in a loop, ie.
|
||||
//
|
||||
// while (!benchmarkDone) { // Benchmark runner decides when to exit.
|
||||
// for (i = 0; i < benchmark.payload.length; i++) {
|
||||
// parse(benchmark.payload[i])
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// This is intended to let datasets include a variety of data to provide
|
||||
// potentially more realistic results than just parsing the same message
|
||||
// over and over. A single message parsed repeatedly could yield unusually
|
||||
// good branch prediction performance.
|
||||
repeated bytes payload = 3;
|
||||
}
|
||||
|
||||
// A benchmark can write out metrics that we will then upload to our metrics
|
||||
// database for tracking over time.
|
||||
message Metric {
|
||||
// A unique ID for these results. Used for de-duping.
|
||||
string guid = 1;
|
||||
|
||||
// The tags specify exactly what benchmark was run against the dataset.
|
||||
// The specific benchmark suite can decide what these mean, but here are
|
||||
// some common tags that have a predefined meaning:
|
||||
//
|
||||
// - "dataset": for tests that pertain to a specific dataset.
|
||||
//
|
||||
// For example:
|
||||
//
|
||||
// # Tests parsing from binary proto string using arenas.
|
||||
// tags={
|
||||
// dataset: "testalltypes",
|
||||
// op: "parse",
|
||||
// format: "binaryproto",
|
||||
// input: "string"
|
||||
// arena: "true"
|
||||
// }
|
||||
//
|
||||
// # Tests serializing to JSON string.
|
||||
// tags={
|
||||
// dataset: "testalltypes",
|
||||
// op: "serialize",
|
||||
// format: "json",
|
||||
// input: "string"
|
||||
// }
|
||||
map<string, string> labels = 2;
|
||||
|
||||
// Unit of measurement for the metric:
|
||||
// - a speed test might be "mb_per_second" or "ops_per_second"
|
||||
// - a size test might be "kb".
|
||||
string unit = 3;
|
||||
|
||||
// Metric value.
|
||||
double value = 4;
|
||||
}
|
114
benchmarks/generate_datasets.cc
Normal file
114
benchmarks/generate_datasets.cc
Normal file
|
@ -0,0 +1,114 @@
|
|||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
const char *file_prefix = "dataset.";
|
||||
const char *file_suffix = ".pb";
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include "benchmarks.pb.h"
|
||||
#include "google_message1.h"
|
||||
#include "google_message2.h"
|
||||
|
||||
using benchmarks::BenchmarkDataset;
|
||||
using google::protobuf::Descriptor;
|
||||
using google::protobuf::DescriptorPool;
|
||||
using google::protobuf::Message;
|
||||
using google::protobuf::MessageFactory;
|
||||
|
||||
#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr))
|
||||
|
||||
std::set<std::string> names;
|
||||
|
||||
void WriteFileWithPayloads(const std::string& name,
|
||||
const std::string& message_name,
|
||||
const std::vector<std::string>& payload) {
|
||||
if (!names.insert(name).second) {
|
||||
std::cerr << "Duplicate test name: " << name << "\n";
|
||||
abort();
|
||||
}
|
||||
|
||||
// First verify that this message name exists in our set of benchmark messages
|
||||
// and that these payloads are valid for the given message.
|
||||
const Descriptor* d =
|
||||
DescriptorPool::generated_pool()->FindMessageTypeByName(message_name);
|
||||
|
||||
if (!d) {
|
||||
std::cerr << "For dataset " << name << ", no such message: "
|
||||
<< message_name << "\n";
|
||||
abort();
|
||||
}
|
||||
|
||||
Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New();
|
||||
|
||||
for (size_t i = 0; i < payload.size(); i++) {
|
||||
if (!m->ParseFromString(payload[i])) {
|
||||
std::cerr << "For dataset " << name << ", payload[" << i << "] fails "
|
||||
<< "to parse\n";
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
BenchmarkDataset dataset;
|
||||
dataset.set_name(name);
|
||||
dataset.set_message_name(message_name);
|
||||
for (size_t i = 0; i < payload.size(); i++) {
|
||||
dataset.add_payload()->assign(payload[i]);
|
||||
}
|
||||
|
||||
std::string serialized;
|
||||
dataset.SerializeToString(&serialized);
|
||||
|
||||
std::ofstream writer;
|
||||
std::string fname = file_prefix + name + file_suffix;
|
||||
writer.open(fname);
|
||||
writer << serialized;
|
||||
writer.close();
|
||||
|
||||
std::cerr << "Wrote dataset: " << fname << "\n";
|
||||
}
|
||||
|
||||
void WriteFile(const std::string& name, const std::string& message_name,
|
||||
const std::string& payload) {
|
||||
std::vector<std::string> payloads;
|
||||
payloads.push_back(payload);
|
||||
WriteFileWithPayloads(name, message_name, payloads);
|
||||
}
|
||||
|
||||
int main() {
|
||||
WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1",
|
||||
ARRAY_TO_STRING(google_message1_dat));
|
||||
WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1",
|
||||
ARRAY_TO_STRING(google_message1_dat));
|
||||
|
||||
// Not in proto3 because it has a group, which is not supported.
|
||||
WriteFile("google_message2", "benchmarks.p2.GoogleMessage2",
|
||||
ARRAY_TO_STRING(google_message2_dat));
|
||||
}
|
|
@ -180,5 +180,5 @@ export CFLAGS
|
|||
export CXXFLAGS
|
||||
AC_CONFIG_SUBDIRS([gmock])
|
||||
|
||||
AC_CONFIG_FILES([Makefile src/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
|
||||
AC_CONFIG_FILES([Makefile src/Makefile benchmarks/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
|
||||
AC_OUTPUT
|
||||
|
|
Loading…
Add table
Reference in a new issue