Optimize Java string serialization. Patch from Evan Jones.
This commit is contained in:
parent
ab6950d75d
commit
daee05168e
6 changed files with 99 additions and 9 deletions
|
@ -1,3 +1,8 @@
|
|||
????-??-?? version 2.3.1:
|
||||
|
||||
Java
|
||||
* Improved performance of string serialization.
|
||||
|
||||
2010-01-08 version 2.3.0:
|
||||
|
||||
General
|
||||
|
|
|
@ -80,6 +80,8 @@ Patch contributors:
|
|||
* Fixes for Solaris 10 32/64-bit confusion.
|
||||
Evan Jones <evanj@mit.edu>
|
||||
* Optimize Java serialization code when writing a small message to a stream.
|
||||
* Optimize Java serialization of strings so that UTF-8 encoding happens only
|
||||
once per string per serialization call.
|
||||
* Clean up some Java warnings.
|
||||
Michael Kucharski <m.kucharski@gmail.com>
|
||||
* Added CodedInputStream.getTotalBytesRead().
|
||||
|
|
|
@ -193,6 +193,23 @@ public final class CodedOutputStream {
|
|||
writeStringNoTag(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a {@code string} field, including tag, to the stream, where bytes
|
||||
* is the encoded version of value. Used by the SPEED version of messages
|
||||
* to avoid performing the UTF-8 conversion twice. bytes is simply a hint
|
||||
* and may be null. If it is null, value will be converted as usual.
|
||||
*/
|
||||
public void writeStringCached(final int fieldNumber, final String value,
|
||||
ByteString bytes)
|
||||
throws IOException {
|
||||
// The cache can be null if serializing without getting the size first, or
|
||||
// if there are multiple threads.
|
||||
if (bytes == null) {
|
||||
bytes = ByteString.copyFromUtf8(value);
|
||||
}
|
||||
writeBytes(fieldNumber, bytes);
|
||||
}
|
||||
|
||||
/** Write a {@code group} field, including tag, to the stream. */
|
||||
public void writeGroup(final int fieldNumber, final MessageLite value)
|
||||
throws IOException {
|
||||
|
|
|
@ -36,6 +36,7 @@ import protobuf_unittest.UnittestProto.TestPackedTypes;
|
|||
import junit.framework.TestCase;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -211,6 +212,29 @@ public class CodedOutputStreamTest extends TestCase {
|
|||
0x9abcdef012345678L);
|
||||
}
|
||||
|
||||
/** Test writing cached strings. */
|
||||
public void testWriteStringCached() throws IOException {
|
||||
final ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||
final CodedOutputStream stream = CodedOutputStream.newInstance(output);
|
||||
|
||||
// Test writing a string that is not cached
|
||||
stream.writeStringCached(5, "hello", null);
|
||||
stream.flush();
|
||||
CodedInputStream in = CodedInputStream.newInstance(output.toByteArray());
|
||||
assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
|
||||
in.readTag());
|
||||
assertEquals("hello", in.readString());
|
||||
|
||||
// Write a cached string: the real string is ignored
|
||||
output.reset();
|
||||
stream.writeStringCached(5, "ignored", ByteString.copyFromUtf8("hello"));
|
||||
stream.flush();
|
||||
in = CodedInputStream.newInstance(output.toByteArray());
|
||||
assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
|
||||
in.readTag());
|
||||
assertEquals("hello", in.readString());
|
||||
}
|
||||
|
||||
/** Test encodeZigZag32() and encodeZigZag64(). */
|
||||
public void testEncodeZigZag() throws Exception {
|
||||
assertEquals(0, CodedOutputStream.encodeZigZag32( 0));
|
||||
|
|
|
@ -199,6 +199,14 @@ GenerateMembers(io::Printer* printer) const {
|
|||
"private $type$ $name$_ = $default$;\n"
|
||||
"public boolean has$capitalized_name$() { return has$capitalized_name$; }\n"
|
||||
"public $type$ get$capitalized_name$() { return $name$_; }\n");
|
||||
// Avoid double encoding for Java strings
|
||||
// This field does not need to be volatile because ByteString is immutable.
|
||||
// http://www.cs.umd.edu/~pugh/java/memoryModel/jsr-133-faq.html#finalRight
|
||||
// However, it seems better to be safe than sorry.
|
||||
if (ShouldUseStringEncodingCache()) {
|
||||
printer->Print(variables_,
|
||||
"private volatile com.google.protobuf.ByteString $name$EncodedCache_;\n");
|
||||
}
|
||||
}
|
||||
|
||||
void PrimitiveFieldGenerator::
|
||||
|
@ -259,25 +267,57 @@ GenerateParsingCode(io::Printer* printer) const {
|
|||
|
||||
void PrimitiveFieldGenerator::
|
||||
GenerateSerializationCode(io::Printer* printer) const {
|
||||
printer->Print(variables_,
|
||||
"if (has$capitalized_name$()) {\n"
|
||||
" output.write$capitalized_type$($number$, get$capitalized_name$());\n"
|
||||
"}\n");
|
||||
if (ShouldUseStringEncodingCache()) {
|
||||
// Pass the cached serialized version, then forget it.
|
||||
// The cached version could be null if we didn't compute the size first,
|
||||
// or if there are two threads attempting to serialize simultaneously.
|
||||
// CodedOutputStream.writeStringCached handles this for us.
|
||||
printer->Print(variables_,
|
||||
"if (has$capitalized_name$()) {\n"
|
||||
" output.write$capitalized_type$Cached($number$,\n"
|
||||
" get$capitalized_name$(),\n"
|
||||
" $name$EncodedCache_);\n"
|
||||
" $name$EncodedCache_ = null;\n"
|
||||
"}\n");
|
||||
} else {
|
||||
printer->Print(variables_,
|
||||
"if (has$capitalized_name$()) {\n"
|
||||
" output.write$capitalized_type$($number$, get$capitalized_name$());\n"
|
||||
"}\n");
|
||||
}
|
||||
}
|
||||
|
||||
void PrimitiveFieldGenerator::
|
||||
GenerateSerializedSizeCode(io::Printer* printer) const {
|
||||
printer->Print(variables_,
|
||||
"if (has$capitalized_name$()) {\n"
|
||||
" size += com.google.protobuf.CodedOutputStream\n"
|
||||
" .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
|
||||
"}\n");
|
||||
// Avoid double encoding for strings: serialize the string here
|
||||
if (ShouldUseStringEncodingCache()) {
|
||||
printer->Print(variables_,
|
||||
"if (has$capitalized_name$()) {\n"
|
||||
" com.google.protobuf.ByteString serialized = \n"
|
||||
" com.google.protobuf.ByteString.copyFromUtf8(\n"
|
||||
" get$capitalized_name$());\n"
|
||||
" $name$EncodedCache_ = serialized;\n"
|
||||
" size += com.google.protobuf.CodedOutputStream\n"
|
||||
" .computeBytesSize($number$, serialized);\n"
|
||||
"}\n");
|
||||
} else {
|
||||
printer->Print(variables_,
|
||||
"if (has$capitalized_name$()) {\n"
|
||||
" size += com.google.protobuf.CodedOutputStream\n"
|
||||
" .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
|
||||
"}\n");
|
||||
}
|
||||
}
|
||||
|
||||
string PrimitiveFieldGenerator::GetBoxedType() const {
|
||||
return BoxedPrimitiveTypeName(GetJavaType(descriptor_));
|
||||
}
|
||||
|
||||
bool PrimitiveFieldGenerator::ShouldUseStringEncodingCache() const {
|
||||
return GetType(descriptor_) == FieldDescriptor::TYPE_STRING &&
|
||||
descriptor_->file()->options().optimize_for() == FileOptions::SPEED;
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
|
||||
RepeatedPrimitiveFieldGenerator::
|
||||
|
|
|
@ -62,6 +62,8 @@ class PrimitiveFieldGenerator : public FieldGenerator {
|
|||
string GetBoxedType() const;
|
||||
|
||||
private:
|
||||
bool ShouldUseStringEncodingCache() const;
|
||||
|
||||
const FieldDescriptor* descriptor_;
|
||||
map<string, string> variables_;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue