From f05cdbc13d3b553b5ce79c0bfa28317462f80f19 Mon Sep 17 00:00:00 2001 From: Lovro Puzar Date: Tue, 15 Dec 2015 08:13:53 -0800 Subject: [PATCH] Support gzip in folly/io:compression Summary: gzip files can be easier to work with operationally than raw zlib thanks to gzip+gunzip. zlib supports adding/stripping the gzip header; expose that through a new CodecType. Reviewed By: chipturner Differential Revision: D2759554 fb-gh-sync-id: 67e0f5f88e087d363db1c349cabb909fb3f00417 --- folly/io/Compression.cpp | 31 ++++++++++++++++++++++++++++--- folly/io/Compression.h | 9 ++++++++- folly/io/test/CompressionTest.cpp | 4 +++- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp index 11fb8e3c..767e6760 100644 --- a/folly/io/Compression.cpp +++ b/folly/io/Compression.cpp @@ -469,7 +469,7 @@ std::unique_ptr ZlibCodec::create(int level, CodecType type) { } ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) { - DCHECK(type == CodecType::ZLIB); + DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP); switch (level) { case COMPRESSION_LEVEL_FASTEST: level = 1; @@ -534,7 +534,22 @@ std::unique_ptr ZlibCodec::doCompress(const IOBuf* data) { stream.zfree = nullptr; stream.opaque = nullptr; - int rc = deflateInit(&stream, level_); + // Using deflateInit2() to support gzip. "The windowBits parameter is the + // base two logarithm of the maximum window size (...) The default value is + // 15 (...) Add 16 to windowBits to write a simple gzip header and trailer + // around the compressed data instead of a zlib wrapper. The gzip header + // will have no file name, no extra data, no comment, no modification time + // (set to zero), no header crc, and the operating system will be set to 255 + // (unknown)." + int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0); + // All other parameters (method, memLevel, strategy) get default values from + // the zlib manual. + int rc = deflateInit2(&stream, + level_, + Z_DEFLATED, + windowBits, + /* memLevel */ 8, + Z_DEFAULT_STRATEGY); if (rc != Z_OK) { throw std::runtime_error(to( "ZlibCodec: deflateInit error: ", rc, ": ", stream.msg)); @@ -614,7 +629,11 @@ std::unique_ptr ZlibCodec::doUncompress(const IOBuf* data, stream.zfree = nullptr; stream.opaque = nullptr; - int rc = inflateInit(&stream); + // "The windowBits parameter is the base two logarithm of the maximum window + // size (...) The default value is 15 (...) add 16 to decode only the gzip + // format (the zlib format will return a Z_DATA_ERROR)." + int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0); + int rc = inflateInit2(&stream, windowBits); if (rc != Z_OK) { throw std::runtime_error(to( "ZlibCodec: inflateInit error: ", rc, ": ", stream.msg)); @@ -1063,6 +1082,12 @@ std::unique_ptr getCodec(CodecType type, int level) { #else nullptr, #endif + +#if FOLLY_HAVE_LIBZ + ZlibCodec::create, +#else + nullptr, +#endif }; size_t idx = static_cast(type); diff --git a/folly/io/Compression.h b/folly/io/Compression.h index 5bcb5177..05e3ed83 100644 --- a/folly/io/Compression.h +++ b/folly/io/Compression.h @@ -80,7 +80,14 @@ enum class CodecType { */ ZSTD_BETA = 8, - NUM_CODEC_TYPES = 9, + /** + * Use gzip compression. This is the same compression algorithm as ZLIB but + * gzip-compressed files tend to be easier to work with from the command line. + * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6 + */ + GZIP = 9, + + NUM_CODEC_TYPES = 10, }; class Codec { diff --git a/folly/io/test/CompressionTest.cpp b/folly/io/test/CompressionTest.cpp index 1a177f25..2c1a0522 100644 --- a/folly/io/test/CompressionTest.cpp +++ b/folly/io/test/CompressionTest.cpp @@ -129,6 +129,7 @@ TEST(CompressionTestNeedsUncompressedLength, Simple) { EXPECT_FALSE(getCodec(CodecType::LZMA2_VARINT_SIZE) ->needsUncompressedLength()); EXPECT_TRUE(getCodec(CodecType::ZSTD_BETA)->needsUncompressedLength()); + EXPECT_FALSE(getCodec(CodecType::GZIP)->needsUncompressedLength()); } class CompressionTest @@ -182,7 +183,8 @@ INSTANTIATE_TEST_CASE_P( CodecType::LZ4_VARINT_SIZE, CodecType::LZMA2, CodecType::LZMA2_VARINT_SIZE, - CodecType::ZSTD_BETA))); + CodecType::ZSTD_BETA, + CodecType::GZIP))); class CompressionVarintTest : public testing::TestWithParam> { -- 2.34.1