Heuristically detect sub-messages when printing unknown fields.
Patch mostly written by Dilip Joseph <dilip.antony.joseph@gmail.com>.
This commit is contained in:
parent
8ccb79057e
commit
a0f27fcd96
6 changed files with 146 additions and 4 deletions
|
@ -36,6 +36,14 @@ Maven packaging:
|
|||
|
||||
Non-Google patch contributors:
|
||||
Kevin Ko <kevin.s.ko@gmail.com>
|
||||
* Small patch to handle trailing slashes in --proto_path flag.
|
||||
Johan Euphrosine <proppy@aminche.com>
|
||||
* Small patch to fix Pyhton CallMethod().
|
||||
Ulrich Kunitz <kune@deine-taler.de>
|
||||
* Small optimizations to Python serialization.
|
||||
Leandro Lucarella <llucax@gmail.com>
|
||||
* VI syntax highlighting tweaks.
|
||||
* Fix compiler to not make output executable.
|
||||
Dilip Joseph <dilip.antony.joseph@gmail.com>
|
||||
* Heuristic detection of sub-messages when printing unknown fields in
|
||||
text format.
|
||||
|
|
|
@ -728,6 +728,16 @@ bool TextFormat::Parser::MergeFromString(const string& input,
|
|||
return result;
|
||||
}
|
||||
|
||||
/* static */ bool TextFormat::PrintUnknownFieldsToString(
|
||||
const UnknownFieldSet& unknown_fields,
|
||||
string* output) {
|
||||
GOOGLE_DCHECK(output) << "output specified is NULL";
|
||||
|
||||
output->clear();
|
||||
io::StringOutputStream output_stream(output);
|
||||
return PrintUnknownFields(unknown_fields, &output_stream);
|
||||
}
|
||||
|
||||
/* static */ bool TextFormat::Print(const Message& message,
|
||||
io::ZeroCopyOutputStream* output) {
|
||||
TextGenerator generator(output);
|
||||
|
@ -738,6 +748,17 @@ bool TextFormat::Parser::MergeFromString(const string& input,
|
|||
return !generator.failed();
|
||||
}
|
||||
|
||||
/* static */ bool TextFormat::PrintUnknownFields(
|
||||
const UnknownFieldSet& unknown_fields,
|
||||
io::ZeroCopyOutputStream* output) {
|
||||
TextGenerator generator(output);
|
||||
|
||||
PrintUnknownFields(unknown_fields, generator);
|
||||
|
||||
// Output false if the generator failed internally.
|
||||
return !generator.failed();
|
||||
}
|
||||
|
||||
/* static */ void TextFormat::Print(const Descriptor* descriptor,
|
||||
const Message::Reflection* message,
|
||||
TextGenerator& generator) {
|
||||
|
@ -922,9 +943,23 @@ static string PaddedHex(IntType value) {
|
|||
}
|
||||
for (int j = 0; j < field.length_delimited_size(); j++) {
|
||||
generator.Print(field_number);
|
||||
generator.Print(": \"");
|
||||
generator.Print(CEscape(field.length_delimited(j)));
|
||||
generator.Print("\"\n");
|
||||
const string& value = field.length_delimited(j);
|
||||
UnknownFieldSet embedded_unknown_fields;
|
||||
if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
|
||||
// This field is parseable as a Message.
|
||||
// So it is probably an embedded message.
|
||||
generator.Print(" {\n");
|
||||
generator.Indent();
|
||||
PrintUnknownFields(embedded_unknown_fields, generator);
|
||||
generator.Outdent();
|
||||
generator.Print("}\n");
|
||||
} else {
|
||||
// This field is not parseable as a Message.
|
||||
// So it is probably just a plain string.
|
||||
generator.Print(": \"");
|
||||
generator.Print(CEscape(value));
|
||||
generator.Print("\"\n");
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < field.group_size(); j++) {
|
||||
generator.Print(field_number);
|
||||
|
|
|
@ -45,9 +45,20 @@ class LIBPROTOBUF_EXPORT TextFormat {
|
|||
// Outputs a textual representation of the given message to the given
|
||||
// output stream.
|
||||
static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
|
||||
|
||||
// Print the fields in an UnknownFieldSet. They are printed by tag number
|
||||
// only. Embedded messages are heuristically identified by attempting to
|
||||
// parse them.
|
||||
static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
|
||||
io::ZeroCopyOutputStream* output);
|
||||
|
||||
// Like Print(), but outputs directly to a string.
|
||||
static bool PrintToString(const Message& message, string* output);
|
||||
|
||||
// Like PrintUnknownFields(), but outputs directly to a string.
|
||||
static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
|
||||
string* output);
|
||||
|
||||
// Outputs a textual representation of the value of the field supplied on
|
||||
// the message supplied. For non-repeated fields, an index of -1 must
|
||||
// be supplied. Note that this method will print the default value for a
|
||||
|
@ -130,7 +141,8 @@ class LIBPROTOBUF_EXPORT TextFormat {
|
|||
TextGenerator& generator);
|
||||
|
||||
// Print the fields in an UnknownFieldSet. They are printed by tag number
|
||||
// only.
|
||||
// only. Embedded messages are heuristically identified by attempting to
|
||||
// parse them.
|
||||
static void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
|
||||
TextGenerator& generator);
|
||||
|
||||
|
|
|
@ -158,6 +158,50 @@ TEST_F(TextFormatTest, PrintUnknownFields) {
|
|||
message.DebugString());
|
||||
}
|
||||
|
||||
TEST_F(TextFormatTest, PrintUnknownMessage) {
|
||||
// Test heuristic printing of messages in an UnknownFieldSet.
|
||||
|
||||
protobuf_unittest::TestAllTypes message;
|
||||
|
||||
// Cases which should not be interpreted as sub-messages.
|
||||
|
||||
// 'a' is a valid FIXED64 tag, so for the string to be parseable as a message
|
||||
// it should be followed by 8 bytes. Since this string only has two
|
||||
// subsequent bytes, it should be treated as a string.
|
||||
message.add_repeated_string("abc");
|
||||
|
||||
// 'd' happens to be a valid ENDGROUP tag. So,
|
||||
// UnknownFieldSet::MergeFromCodedStream() will successfully parse "def", but
|
||||
// the ConsumedEntireMessage() check should fail.
|
||||
message.add_repeated_string("def");
|
||||
|
||||
// A zero-length string should never be interpreted as a message even though
|
||||
// it is technically valid as one.
|
||||
message.add_repeated_string("");
|
||||
|
||||
// Case which should be interpreted as a sub-message.
|
||||
|
||||
// An actual nested message with content should always be interpreted as a
|
||||
// nested message.
|
||||
message.add_repeated_nested_message()->set_bb(123);
|
||||
|
||||
string data;
|
||||
message.SerializeToString(&data);
|
||||
|
||||
string text;
|
||||
UnknownFieldSet unknown_fields;
|
||||
EXPECT_TRUE(unknown_fields.ParseFromString(data));
|
||||
EXPECT_TRUE(TextFormat::PrintUnknownFieldsToString(unknown_fields, &text));
|
||||
EXPECT_EQ(
|
||||
"44: \"abc\"\n"
|
||||
"44: \"def\"\n"
|
||||
"44: \"\"\n"
|
||||
"48 {\n"
|
||||
" 1: 123\n"
|
||||
"}\n",
|
||||
text);
|
||||
}
|
||||
|
||||
TEST_F(TextFormatTest, ParseBasic) {
|
||||
io::ArrayInputStream input_stream(proto_debug_string_.data(),
|
||||
proto_debug_string_.size());
|
||||
|
|
|
@ -20,6 +20,10 @@
|
|||
|
||||
#include <google/protobuf/unknown_field_set.h>
|
||||
#include <google/protobuf/stubs/stl_util-inl.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/io/zero_copy_stream.h>
|
||||
#include <google/protobuf/io/zero_copy_stream_impl.h>
|
||||
#include <google/protobuf/wire_format.h>
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
|
@ -57,6 +61,34 @@ void UnknownFieldSet::MergeFrom(const UnknownFieldSet& other) {
|
|||
}
|
||||
}
|
||||
|
||||
bool UnknownFieldSet::MergeFromCodedStream(io::CodedInputStream* input) {
|
||||
|
||||
UnknownFieldSet other;
|
||||
if (internal::WireFormat::SkipMessage(input, &other) &&
|
||||
input->ConsumedEntireMessage()) {
|
||||
MergeFrom(other);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool UnknownFieldSet::ParseFromCodedStream(io::CodedInputStream* input) {
|
||||
Clear();
|
||||
return MergeFromCodedStream(input);
|
||||
}
|
||||
|
||||
bool UnknownFieldSet::ParseFromZeroCopyStream(io::ZeroCopyInputStream* input) {
|
||||
io::CodedInputStream coded_input(input);
|
||||
return ParseFromCodedStream(&coded_input) &&
|
||||
coded_input.ConsumedEntireMessage();
|
||||
}
|
||||
|
||||
bool UnknownFieldSet::ParseFromArray(const void* data, int size) {
|
||||
io::ArrayInputStream input(data, size);
|
||||
return ParseFromZeroCopyStream(&input);
|
||||
}
|
||||
|
||||
const UnknownField* UnknownFieldSet::FindFieldByNumber(int number) const {
|
||||
if (internal_ == NULL) return NULL;
|
||||
|
||||
|
|
|
@ -77,6 +77,17 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet {
|
|||
// the existing UnknownField.
|
||||
UnknownField* AddField(int number);
|
||||
|
||||
// Parsing helpers -------------------------------------------------
|
||||
// These work exactly like the similarly-named methods of Message.
|
||||
|
||||
bool MergeFromCodedStream(io::CodedInputStream* input);
|
||||
bool ParseFromCodedStream(io::CodedInputStream* input);
|
||||
bool ParseFromZeroCopyStream(io::ZeroCopyInputStream* input);
|
||||
bool ParseFromArray(const void* data, int size);
|
||||
inline bool ParseFromString(const string& data) {
|
||||
return ParseFromArray(data.data(), data.size());
|
||||
}
|
||||
|
||||
private:
|
||||
// "Active" fields are ones which have been added since the last time Clear()
|
||||
// was called. Inactive fields are objects we are keeping around incase
|
||||
|
|
Loading…
Add table
Reference in a new issue