From e59427a62cdd93ac8b18396d19f6dc74e979df95 Mon Sep 17 00:00:00 2001 From: "kenton@google.com" Date: Thu, 16 Apr 2009 22:30:56 +0000 Subject: [PATCH] Commit Brian Olson's gzip stream implementations. --- CHANGES.txt | 3 + CONTRIBUTORS.txt | 4 +- configure.ac | 15 + src/Makefile.am | 26 +- src/google/protobuf/io/gzip_stream.cc | 296 ++++++++++++++++++ src/google/protobuf/io/gzip_stream.h | 178 +++++++++++ .../protobuf/io/gzip_stream_unittest.sh | 44 +++ .../protobuf/io/zero_copy_stream_impl.cc | 4 + .../protobuf/io/zero_copy_stream_impl.h | 5 + .../protobuf/io/zero_copy_stream_unittest.cc | 141 ++++++++- src/google/protobuf/testing/zcgunzip.cc | 73 +++++ src/google/protobuf/testing/zcgzip.cc | 79 +++++ 12 files changed, 864 insertions(+), 4 deletions(-) create mode 100644 src/google/protobuf/io/gzip_stream.cc create mode 100644 src/google/protobuf/io/gzip_stream.h create mode 100755 src/google/protobuf/io/gzip_stream_unittest.sh create mode 100644 src/google/protobuf/testing/zcgunzip.cc create mode 100644 src/google/protobuf/testing/zcgzip.cc diff --git a/CHANGES.txt b/CHANGES.txt index 94fe94e1..35ddd537 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -19,6 +19,9 @@ * Message interface has method ParseFromBoundedZeroCopyStream() which parses a limited number of bytes from an input stream rather than parsing until EOF. + * GzipInputStream and GzipOutputStream support reading/writing gzip- or + zlib-compressed streams if zlib is available. + (google/protobuf/io/gzip_stream.h) Java * Fixed bug where Message.mergeFrom(Message) failed to merge extensions. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 3755f743..01c8033f 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -34,7 +34,7 @@ Documentation: Maven packaging: Gregory Kick -Non-Google patch contributors: +Patch contributors: Kevin Ko * Small patch to handle trailing slashes in --proto_path flag. Johan Euphrosine @@ -57,3 +57,5 @@ Non-Google patch contributors: * Slicing support for repeated scalar fields for the Python API. Oleg Smolsky * MS Visual Studio error format option. + Brian Olson + * gzip/zlib I/O support. diff --git a/configure.ac b/configure.ac index 6015281b..2824960b 100644 --- a/configure.ac +++ b/configure.ac @@ -15,6 +15,11 @@ AC_CONFIG_HEADERS([config.h]) AC_CONFIG_MACRO_DIR([m4]) AM_INIT_AUTOMAKE +AC_ARG_WITH([zlib], + [AS_HELP_STRING([--with-zlib], + [include classes for streaming compressed data in and out @<:@default=check@:>@])], + [],[with_zlib=check]) + # Checks for programs. AC_PROG_CC AC_PROG_CXX @@ -38,6 +43,16 @@ AC_FUNC_MEMCMP AC_FUNC_STRTOD AC_CHECK_FUNCS([ftruncate memset mkdir strchr strerror strtol]) +HAVE_ZLIB=0 +AS_IF([test "$with_zlib" != no], + [AC_SEARCH_LIBS([zlibVersion], [z], + [AC_DEFINE([HAVE_ZLIB], [1], [Enable classes using zlib compression.]) + HAVE_ZLIB=1], + [if test "$with_zlib" != check; then + AC_MSG_FAILURE([--with-zlib was given, but test for zlib failed]) + fi])]) +AM_CONDITIONAL([HAVE_ZLIB], [test $HAVE_ZLIB = 1]) + ACX_PTHREAD AC_CXX_STL_HASH diff --git a/src/Makefile.am b/src/Makefile.am index 18167482..8c0647b3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,5 +1,15 @@ ## Process this file with automake to produce Makefile.in +if HAVE_ZLIB +GZCHECKPROGRAMS = zcgzip zcgunzip +GZHEADERS = google/protobuf/io/gzip_stream.h +GZTESTS = google/protobuf/io/gzip_stream_unittest.sh +else +GZCHECKPROGRAMS = +GZHEADERS = +GZTESTS = +endif + if GCC # These are good warnings to turn on by default AM_CXXFLAGS = $(PTHREAD_CFLAGS) -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare @@ -40,6 +50,7 @@ nobase_include_HEADERS = \ google/protobuf/wire_format.h \ google/protobuf/wire_format_inl.h \ google/protobuf/io/coded_stream.h \ + $(GZHEADERS) \ google/protobuf/io/printer.h \ google/protobuf/io/tokenizer.h \ google/protobuf/io/zero_copy_stream.h \ @@ -83,6 +94,7 @@ libprotobuf_la_SOURCES = \ google/protobuf/unknown_field_set.cc \ google/protobuf/wire_format.cc \ google/protobuf/io/coded_stream.cc \ + google/protobuf/io/gzip_stream.cc \ google/protobuf/io/printer.cc \ google/protobuf/io/tokenizer.cc \ google/protobuf/io/zero_copy_stream.cc \ @@ -159,6 +171,8 @@ protoc_inputs = \ EXTRA_DIST = \ $(protoc_inputs) \ solaris/libstdc++.la \ + google/protobuf/io/gzip_stream.h \ + google/protobuf/io/gzip_stream_unittest.sh \ google/protobuf/testdata/golden_message \ google/protobuf/testdata/golden_packed_fields_message \ google/protobuf/testdata/text_format_unittest_data.txt \ @@ -206,7 +220,7 @@ unittest_proto_middleman: protoc$(EXEEXT) $(protoc_inputs) $(protoc_outputs): unittest_proto_middleman -check_PROGRAMS = protobuf-test +check_PROGRAMS = protobuf-test $(GZCHECKPROGRAMS) protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la protobuf_test_SOURCES = \ google/protobuf/stubs/common_unittest.cc \ @@ -256,6 +270,14 @@ protobuf_test_SOURCES = \ gtest/internal/gtest-port.h \ gtest/internal/gtest-string.h +if HAVE_ZLIB +zcgzip_LDADD = $(PTHREAD_LIBS) libprotobuf.la +zcgzip_SOURCES = google/protobuf/testing/zcgzip.cc + +zcgunzip_LDADD = $(PTHREAD_LIBS) libprotobuf.la +zcgunzip_SOURCES = google/protobuf/testing/zcgunzip.cc +endif + nodist_protobuf_test_SOURCES = $(protoc_outputs) -TESTS = protobuf-test +TESTS = protobuf-test $(GZTESTS) diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc new file mode 100644 index 00000000..d8d0e70d --- /dev/null +++ b/src/google/protobuf/io/gzip_stream.cc @@ -0,0 +1,296 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2009 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: brianolson@google.com (Brian Olson) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the implementation of classes GzipInputStream and +// GzipOutputStream. + +#include "config.h" + +#if HAVE_ZLIB +#include +#include + +namespace google { +namespace protobuf { +namespace io { + +static const int kDefaultBufferSize = 65536; + +GzipInputStream::GzipInputStream( + ZeroCopyInputStream* sub_stream, Format format, int buffer_size) + : format_(format), sub_stream_(sub_stream), zerror_(Z_OK) { + zcontext_.zalloc = Z_NULL; + zcontext_.zfree = Z_NULL; + zcontext_.opaque = Z_NULL; + zcontext_.total_out = 0; + zcontext_.next_in = NULL; + zcontext_.avail_in = 0; + zcontext_.total_in = 0; + zcontext_.msg = NULL; + if (buffer_size == -1) { + output_buffer_length_ = kDefaultBufferSize; + } else { + output_buffer_length_ = buffer_size; + } + output_buffer_ = operator new(output_buffer_length_); + GOOGLE_CHECK(output_buffer_ != NULL); + zcontext_.next_out = static_cast(output_buffer_); + zcontext_.avail_out = output_buffer_length_; + output_position_ = output_buffer_; +} +GzipInputStream::~GzipInputStream() { + operator delete(output_buffer_); + zerror_ = inflateEnd(&zcontext_); +} + +int GzipInputStream::Inflate(int flush) { + if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) { + // previous inflate filled output buffer. don't change input params yet. + } else if (zcontext_.avail_in == 0) { + const void* in; + int in_size; + bool first = zcontext_.next_in == NULL; + bool ok = sub_stream_->Next(&in, &in_size); + if (!ok) { + zcontext_.next_out = NULL; + zcontext_.avail_out = 0; + return Z_STREAM_END; + } + zcontext_.next_in = static_cast(const_cast(in)); + zcontext_.avail_in = in_size; + if (first) { + int windowBitsFormat = 0; + switch (format_) { + case GZIP: windowBitsFormat = 16; break; + case AUTO: windowBitsFormat = 32; break; + case ZLIB: windowBitsFormat = 0; break; + } + int error = inflateInit2(&zcontext_, + /* windowBits */15 | windowBitsFormat); + if (error != Z_OK) { + return error; + } + } + } + zcontext_.next_out = static_cast(output_buffer_); + zcontext_.avail_out = output_buffer_length_; + output_position_ = output_buffer_; + int error = inflate(&zcontext_, flush); + return error; +} + +void GzipInputStream::DoNextOutput(const void** data, int* size) { + *data = output_position_; + *size = ((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_); + output_position_ = zcontext_.next_out; +} + +// implements ZeroCopyInputStream ---------------------------------- +bool GzipInputStream::Next(const void** data, int* size) { + bool ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) + || (zerror_ == Z_BUF_ERROR); + if ((!ok) || (zcontext_.next_out == NULL)) { + return false; + } + if (zcontext_.next_out != output_position_) { + DoNextOutput(data, size); + return true; + } + if (zerror_ == Z_STREAM_END) { + *data = NULL; + *size = 0; + return false; + } + zerror_ = Inflate(Z_NO_FLUSH); + if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) { + // The underlying stream's Next returned false inside Inflate. + return false; + } + ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) + || (zerror_ == Z_BUF_ERROR); + if (!ok) { + return false; + } + DoNextOutput(data, size); + return true; +} +void GzipInputStream::BackUp(int count) { + output_position_ = reinterpret_cast( + reinterpret_cast(output_position_) - count); +} +bool GzipInputStream::Skip(int count) { + const void* data; + int size; + bool ok = Next(&data, &size); + while (ok && (size < count)) { + count -= size; + ok = Next(&data, &size); + } + if (size > count) { + BackUp(size - count); + } + return ok; +} +int64 GzipInputStream::ByteCount() const { + return zcontext_.total_out + + (((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_)); +} + +// ========================================================================= + +GzipOutputStream::GzipOutputStream( + ZeroCopyOutputStream* sub_stream, Format format, int buffer_size) + : sub_stream_(sub_stream), sub_data_(NULL), sub_data_size_(0) { + if (buffer_size == -1) { + input_buffer_length_ = kDefaultBufferSize; + } else { + input_buffer_length_ = buffer_size; + } + input_buffer_ = operator new(input_buffer_length_); + GOOGLE_CHECK(input_buffer_ != NULL); + + zcontext_.zalloc = Z_NULL; + zcontext_.zfree = Z_NULL; + zcontext_.opaque = Z_NULL; + zcontext_.next_out = NULL; + zcontext_.avail_out = 0; + zcontext_.total_out = 0; + zcontext_.next_in = NULL; + zcontext_.avail_in = 0; + zcontext_.total_in = 0; + zcontext_.msg = NULL; + // default to GZIP format + int windowBitsFormat = 16; + if (format == ZLIB) { + windowBitsFormat = 0; + } + zerror_ = deflateInit2( + &zcontext_, + Z_BEST_COMPRESSION, + Z_DEFLATED, + /* windowBits */15 | windowBitsFormat, + /* memLevel (default) */8, + Z_DEFAULT_STRATEGY); +} +GzipOutputStream::~GzipOutputStream() { + Close(); + if (input_buffer_ != NULL) { + operator delete(input_buffer_); + } +} + +// private +int GzipOutputStream::Deflate(int flush) { + int error = Z_OK; + do { + if ((sub_data_ == NULL) || (zcontext_.avail_out == 0)) { + bool ok = sub_stream_->Next(&sub_data_, &sub_data_size_); + if (!ok) { + sub_data_ = NULL; + sub_data_size_ = 0; + return Z_BUF_ERROR; + } + GOOGLE_CHECK_GT(sub_data_size_, 0); + zcontext_.next_out = static_cast(sub_data_); + zcontext_.avail_out = sub_data_size_; + } + error = deflate(&zcontext_, flush); + } while (error == Z_OK && zcontext_.avail_out == 0); + if (((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) + && (zcontext_.avail_out != sub_data_size_)) { + // Notify lower layer of data. + sub_stream_->BackUp(zcontext_.avail_out); + // We don't own the buffer anymore. + sub_data_ = NULL; + sub_data_size_ = 0; + } + return error; +} + +// implements ZeroCopyOutputStream --------------------------------- +bool GzipOutputStream::Next(void** data, int* size) { + if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) { + return false; + } + if (zcontext_.avail_in != 0) { + zerror_ = Deflate(Z_NO_FLUSH); + if (zerror_ != Z_OK) { + return false; + } + } + if (zcontext_.avail_in == 0) { + // all input was consumed. reset the buffer. + zcontext_.next_in = static_cast(input_buffer_); + zcontext_.avail_in = input_buffer_length_; + *data = input_buffer_; + *size = input_buffer_length_; + } else { + // The loop in Deflate should consume all avail_in + GOOGLE_LOG(DFATAL) << "Deflate left bytes unconsumed"; + } + return true; +} +void GzipOutputStream::BackUp(int count) { + GOOGLE_CHECK_GE(zcontext_.avail_in, count); + zcontext_.avail_in -= count; +} +int64 GzipOutputStream::ByteCount() const { + return zcontext_.total_in + zcontext_.avail_in; +} + +bool GzipOutputStream::Flush() { + do { + zerror_ = Deflate(Z_FULL_FLUSH); + } while (zerror_ == Z_OK); + return zerror_ == Z_OK; +} + +bool GzipOutputStream::Close() { + if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) { + return false; + } + do { + zerror_ = Deflate(Z_FINISH); + } while (zerror_ == Z_OK); + zerror_ = deflateEnd(&zcontext_); + bool ok = zerror_ == Z_OK; + zerror_ = Z_STREAM_END; + return ok; +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#endif // HAVE_ZLIB diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h new file mode 100644 index 00000000..4f29499a --- /dev/null +++ b/src/google/protobuf/io/gzip_stream.h @@ -0,0 +1,178 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2009 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: brianolson@google.com (Brian Olson) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the definition for classes GzipInputStream and +// GzipOutputStream. +// +// GzipInputStream decompresses data from an underlying +// ZeroCopyInputStream and provides the decompressed data as a +// ZeroCopyInputStream. +// +// GzipOutputStream is an ZeroCopyOutputStream that compresses data to +// an underlying ZeroCopyOutputStream. + +#ifndef GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ + +#include + +#include + +namespace google { +namespace protobuf { +namespace io { + +// A ZeroCopyInputStream that reads compressed data through zlib +class LIBPROTOBUF_EXPORT GzipInputStream : public ZeroCopyInputStream { + public: + // Format key for constructor + enum Format { + // zlib will autodetect gzip header or deflate stream + AUTO = 0, + + // GZIP streams have some extra header data for file attributes. + GZIP = 1, + + // Simpler zlib stream format. + ZLIB = 2, + }; + + // buffer_size and format may be -1 for default of 64kB and GZIP format + explicit GzipInputStream( + ZeroCopyInputStream* sub_stream, + Format format = AUTO, + int buffer_size = -1); + virtual ~GzipInputStream(); + + // Return last error message or NULL if no error. + inline const char* ZlibErrorMessage() const { + return zcontext_.msg; + } + inline int ZlibErrorCode() const { + return zerror_; + } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size); + void BackUp(int count); + bool Skip(int count); + int64 ByteCount() const; + + private: + Format format_; + + ZeroCopyInputStream* sub_stream_; + + z_stream zcontext_; + int zerror_; + + void* output_buffer_; + void* output_position_; + size_t output_buffer_length_; + + int Inflate(int flush); + void DoNextOutput(const void** data, int* size); + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipInputStream); +}; + + +class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream { + public: + // Format key for constructor + enum Format { + // GZIP streams have some extra header data for file attributes. + GZIP = 1, + + // Simpler zlib stream format. + ZLIB = 2, + }; + + // buffer_size and format may be -1 for default of 64kB and GZIP format + explicit GzipOutputStream( + ZeroCopyOutputStream* sub_stream, + Format format = GZIP, + int buffer_size = -1); + virtual ~GzipOutputStream(); + + // Return last error message or NULL if no error. + inline const char* ZlibErrorMessage() const { + return zcontext_.msg; + } + inline int ZlibErrorCode() const { + return zerror_; + } + + // Flushes data written so far to zipped data in the underlying stream. + // It is the caller's responsibility to flush the underlying stream if + // necessary. + // Compression may be less efficient stopping and starting around flushes. + // Returns true if no error. + bool Flush(); + + // Writes out all data and closes the gzip stream. + // It is the caller's responsibility to close the underlying stream if + // necessary. + // Returns true if no error. + bool Close(); + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size); + void BackUp(int count); + int64 ByteCount() const; + + private: + ZeroCopyOutputStream* sub_stream_; + // Result from calling Next() on sub_stream_ + void* sub_data_; + int sub_data_size_; + + z_stream zcontext_; + int zerror_; + void* input_buffer_; + size_t input_buffer_length_; + + // Do some compression. + // Takes zlib flush mode. + // Returns zlib error code. + int Deflate(int flush); + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipOutputStream); +}; + +} // namespace io +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ diff --git a/src/google/protobuf/io/gzip_stream_unittest.sh b/src/google/protobuf/io/gzip_stream_unittest.sh new file mode 100755 index 00000000..6e8a0943 --- /dev/null +++ b/src/google/protobuf/io/gzip_stream_unittest.sh @@ -0,0 +1,44 @@ +#!/bin/sh -x +# +# Protocol Buffers - Google's data interchange format +# Copyright 2009 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Author: brianolson@google.com (Brian Olson) +# +# Test compatibility between command line gzip/gunzip binaries and +# ZeroCopyStream versions. + +TESTFILE=Makefile + +(./zcgzip < ${TESTFILE} | gunzip | cmp - ${TESTFILE}) && \ +(gzip < ${TESTFILE} | ./zcgunzip | cmp - ${TESTFILE}) + +# Result of "(cmd) && (cmd)" implicitly becomes result of this script +# and thus the test. diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc index 04d573e1..730bd2f7 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl.cc +++ b/src/google/protobuf/io/zero_copy_stream_impl.cc @@ -528,6 +528,10 @@ bool FileOutputStream::Close() { return copying_output_.Close() && flush_succeeded; } +bool FileOutputStream::Flush() { + return impl_.Flush(); +} + bool FileOutputStream::Next(void** data, int* size) { return impl_.Next(data, size); } diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h index e886d8f8..448aa216 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl.h +++ b/src/google/protobuf/io/zero_copy_stream_impl.h @@ -431,6 +431,11 @@ class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream { // Even if an error occurs, the file descriptor is closed when this returns. bool Close(); + // Flushes FileOutputStream's buffers but does not close the + // underlying file. No special measures are taken to ensure that + // underlying operating system file object is synchronized to disk. + bool Flush(); + // By default, the file descriptor is not closed when the stream is // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: // This leaves no way for the caller to detect if close() fails. If diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc index 2ee9e6c4..9adef74d 100644 --- a/src/google/protobuf/io/zero_copy_stream_unittest.cc +++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc @@ -46,6 +46,8 @@ // "parametized tests" so that one set of tests can be used on all the // implementations. +#include "config.h" + #ifdef _MSC_VER #include #else @@ -59,6 +61,9 @@ #include #include +#if HAVE_ZLIB +#include +#endif #include #include @@ -141,6 +146,8 @@ bool IoTest::WriteToOutput(ZeroCopyOutputStream* output, } } +#define MAX_REPEATED_ZEROS 100 + int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) { uint8* out = reinterpret_cast(data); int out_size = size; @@ -148,11 +155,19 @@ int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) { const void* in; int in_size = 0; + int repeated_zeros = 0; + while (true) { if (!input->Next(&in, &in_size)) { return size - out_size; } - EXPECT_GT(in_size, 0); + EXPECT_GT(in_size, -1); + if (in_size == 0) { + repeated_zeros++; + } else { + repeated_zeros = 0; + } + EXPECT_LT(repeated_zeros, MAX_REPEATED_ZEROS); if (out_size <= in_size) { memcpy(out, in, out_size); @@ -263,6 +278,95 @@ TEST_F(IoTest, ArrayIo) { } } +#if HAVE_ZLIB +TEST_F(IoTest, GzipIo) { + const int kBufferSize = 2*1024; + uint8* buffer = new uint8[kBufferSize]; + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + for (int z = 0; z < kBlockSizeCount; z++) { + int gzip_buffer_size = kBlockSizes[z]; + int size; + { + ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); + GzipOutputStream gzout( + &output, GzipOutputStream::GZIP, gzip_buffer_size); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size, kBlockSizes[j]); + GzipInputStream gzin( + &input, GzipInputStream::GZIP, gzip_buffer_size); + ReadStuff(&gzin); + } + } + } + } + delete [] buffer; +} + +TEST_F(IoTest, ZlibIo) { + const int kBufferSize = 2*1024; + uint8* buffer = new uint8[kBufferSize]; + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + for (int z = 0; z < kBlockSizeCount; z++) { + int gzip_buffer_size = kBlockSizes[z]; + int size; + { + ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); + GzipOutputStream gzout( + &output, GzipOutputStream::ZLIB, gzip_buffer_size); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size, kBlockSizes[j]); + GzipInputStream gzin( + &input, GzipInputStream::ZLIB, gzip_buffer_size); + ReadStuff(&gzin); + } + } + } + } + delete [] buffer; +} + +TEST_F(IoTest, ZlibIoInputAutodetect) { + const int kBufferSize = 2*1024; + uint8* buffer = new uint8[kBufferSize]; + int size; + { + ArrayOutputStream output(buffer, kBufferSize); + GzipOutputStream gzout(&output, GzipOutputStream::ZLIB); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size); + GzipInputStream gzin(&input, GzipInputStream::AUTO); + ReadStuff(&gzin); + } + { + ArrayOutputStream output(buffer, kBufferSize); + GzipOutputStream gzout(&output, GzipOutputStream::GZIP); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size); + GzipInputStream gzin(&input, GzipInputStream::AUTO); + ReadStuff(&gzin); + } + delete [] buffer; +} +#endif + // There is no string input, only string output. Also, it doesn't support // explicit block sizes. So, we'll only run one test and we'll use // ArrayInput to read back the results. @@ -310,6 +414,41 @@ TEST_F(IoTest, FileIo) { } } +#if HAVE_ZLIB +TEST_F(IoTest, GzipFileIo) { + string filename = TestTempDir() + "/zero_copy_stream_test_file"; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + // Make a temporary file. + int file = + open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777); + ASSERT_GE(file, 0); + { + FileOutputStream output(file, kBlockSizes[i]); + GzipOutputStream gzout(&output); + WriteStuffLarge(&gzout); + gzout.Close(); + output.Flush(); + EXPECT_EQ(0, output.GetErrno()); + } + + // Rewind. + ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1); + + { + FileInputStream input(file, kBlockSizes[j]); + GzipInputStream gzin(&input); + ReadStuffLarge(&gzin); + EXPECT_EQ(0, input.GetErrno()); + } + + close(file); + } + } +} +#endif + // MSVC raises various debugging exceptions if we try to use a file // descriptor of -1, defeating our tests below. This class will disable // these debug assertions while in scope. diff --git a/src/google/protobuf/testing/zcgunzip.cc b/src/google/protobuf/testing/zcgunzip.cc new file mode 100644 index 00000000..a6197854 --- /dev/null +++ b/src/google/protobuf/testing/zcgunzip.cc @@ -0,0 +1,73 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2009 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: brianolson@google.com (Brian Olson) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Test program to verify that GzipInputStream is compatible with command line +// gunzip or java.util.zip.GzipInputStream +// +// Reads gzip stream on standard input and writes decompressed data to standard +// output. + +#include "config.h" + +#include +#include +#include +#include + +#include +#include + +using google::protobuf::io::FileInputStream; +using google::protobuf::io::GzipInputStream; + +int main(int argc, const char** argv) { + FileInputStream fin(STDIN_FILENO); + GzipInputStream in(&fin); + + while (true) { + const void* inptr; + int inlen; + bool ok; + ok = in.Next(&inptr, &inlen); + if (!ok) { + break; + } + if (inlen > 0) { + int err = write(STDOUT_FILENO, inptr, inlen); + assert(err == inlen); + } + } + + return 0; +} diff --git a/src/google/protobuf/testing/zcgzip.cc b/src/google/protobuf/testing/zcgzip.cc new file mode 100644 index 00000000..9133275c --- /dev/null +++ b/src/google/protobuf/testing/zcgzip.cc @@ -0,0 +1,79 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2009 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: brianolson@google.com (Brian Olson) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Test program to verify that GzipOutputStream is compatible with command line +// gzip or java.util.zip.GzipOutputStream +// +// Reads data on standard input and writes compressed gzip stream to standard +// output. + +#include "config.h" + +#include +#include +#include + +#include +#include + +using google::protobuf::io::FileOutputStream; +using google::protobuf::io::GzipOutputStream; + +int main(int argc, const char** argv) { + FileOutputStream fout(STDOUT_FILENO); + GzipOutputStream out(&fout); + int readlen; + + while (true) { + void* outptr; + int outlen; + bool ok; + do { + ok = out.Next(&outptr, &outlen); + if (!ok) { + break; + } + } while (outlen <= 0); + readlen = read(STDIN_FILENO, outptr, outlen); + if (readlen <= 0) { + out.BackUp(outlen); + break; + } + if (readlen < outlen) { + out.BackUp(outlen - readlen); + } + } + + return 0; +}