From: Stella Lau Date: Tue, 22 Aug 2017 01:15:27 +0000 (-0700) Subject: Add zlib-specific codec initialization X-Git-Tag: v2017.08.28.00~19 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2ca5653ce3e8bc8b4af80651b26288326e898969;p=folly.git Add zlib-specific codec initialization Summary: - Create interface to initialize zlib codec using specific parameters - This enables the raw inflate/deflate and auto inflate options - Add tests for option initialization Reviewed By: terrelln, yfeldblum Differential Revision: D5649980 fbshipit-source-id: fd36e8edc0e8c528cd6c9d8f39e8ef839b6acfef --- diff --git a/folly/Makefile.am b/folly/Makefile.am index f1872fea..ca03d5a5 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -298,6 +298,8 @@ nobase_follyinclude_HEADERS = \ io/async/test/TimeUtil.h \ io/async/test/UndelayedDestruction.h \ io/async/test/Util.h \ + io/compression/Utils.h \ + io/compression/Zlib.h \ Iterator.h \ json.h \ Launder.h \ @@ -533,6 +535,7 @@ libfolly_la_SOURCES = \ io/async/test/TimeUtil.cpp \ io/async/ssl/OpenSSLUtils.cpp \ io/async/ssl/SSLErrors.cpp \ + io/compression/Zlib.cpp \ json.cpp \ detail/MemoryIdler.cpp \ detail/SocketFastOpen.cpp \ diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp index cfa53181..de065f10 100644 --- a/folly/io/Compression.cpp +++ b/folly/io/Compression.cpp @@ -32,7 +32,7 @@ #endif #if FOLLY_HAVE_LIBZ -#include +#include #endif #if FOLLY_HAVE_LIBLZMA @@ -55,9 +55,15 @@ #include #include #include +#include #include #include +using folly::io::compression::detail::dataStartsWithLE; +using folly::io::compression::detail::prefixToStringLE; + +namespace zlib = folly::io::zlib; + namespace folly { namespace io { @@ -517,39 +523,6 @@ inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) { #endif // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA -namespace { -/** - * Reads sizeof(T) bytes, and returns false if not enough bytes are available. - * Returns true if the first n bytes are equal to prefix when interpreted as - * a little endian T. - */ -template -typename std::enable_if::value, bool>::type -dataStartsWithLE(const IOBuf* data, T prefix, uint64_t n = sizeof(T)) { - DCHECK_GT(n, 0); - DCHECK_LE(n, sizeof(T)); - T value; - Cursor cursor{data}; - if (!cursor.tryReadLE(value)) { - return false; - } - const T mask = n == sizeof(T) ? T(-1) : (T(1) << (8 * n)) - 1; - return prefix == (value & mask); -} - -template -typename std::enable_if::value, std::string>::type -prefixToStringLE(T prefix, uint64_t n = sizeof(T)) { - DCHECK_GT(n, 0); - DCHECK_LE(n, sizeof(T)); - prefix = Endian::little(prefix); - std::string result; - result.resize(n); - memcpy(&result[0], &prefix, n); - return result; -} -} // namespace - #if FOLLY_HAVE_LIBLZ4 /** @@ -1005,300 +978,6 @@ std::unique_ptr SnappyCodec::doUncompress( #endif // FOLLY_HAVE_LIBSNAPPY -#if FOLLY_HAVE_LIBZ -/** - * Zlib codec - */ -class ZlibStreamCodec final : public StreamCodec { - public: - static std::unique_ptr createCodec(int level, CodecType type); - static std::unique_ptr createStream(int level, CodecType type); - explicit ZlibStreamCodec(int level, CodecType type); - ~ZlibStreamCodec() override; - - std::vector validPrefixes() const override; - bool canUncompress(const IOBuf* data, Optional uncompressedLength) - const override; - - private: - uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override; - - void doResetStream() override; - bool doCompressStream( - ByteRange& input, - MutableByteRange& output, - StreamCodec::FlushOp flush) override; - bool doUncompressStream( - ByteRange& input, - MutableByteRange& output, - StreamCodec::FlushOp flush) override; - - void resetDeflateStream(); - void resetInflateStream(); - - Optional deflateStream_{}; - Optional inflateStream_{}; - int level_; - bool needReset_{true}; -}; - -static constexpr uint16_t kGZIPMagicLE = 0x8B1F; - -std::vector ZlibStreamCodec::validPrefixes() const { - if (type() == CodecType::ZLIB) { - // Zlib streams start with a 2 byte header. - // - // 0 1 - // +---+---+ - // |CMF|FLG| - // +---+---+ - // - // We won't restrict the values of any sub-fields except as described below. - // - // The lowest 4 bits of CMF is the compression method (CM). - // CM == 0x8 is the deflate compression method, which is currently the only - // supported compression method, so any valid prefix must have CM == 0x8. - // - // The lowest 5 bits of FLG is FCHECK. - // FCHECK must be such that the two header bytes are a multiple of 31 when - // interpreted as a big endian 16-bit number. - std::vector result; - // 16 values for the first byte, 8 values for the second byte. - // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK. - result.reserve(132); - // Select all values for the CMF byte that use the deflate algorithm 0x8. - for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) { - // Select all values for the FLG, but leave FCHECK as 0 since it's fixed. - for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) { - uint16_t prefix = first | second; - // Compute FCHECK. - prefix += 31 - (prefix % 31); - result.push_back(prefixToStringLE(Endian::big(prefix))); - // zlib won't produce this, but it is a valid prefix. - if ((prefix & 0x1F) == 31) { - prefix -= 31; - result.push_back(prefixToStringLE(Endian::big(prefix))); - } - } - } - return result; - } else { - // The gzip frame starts with 2 magic bytes. - return {prefixToStringLE(kGZIPMagicLE)}; - } -} - -bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional) - const { - if (type() == CodecType::ZLIB) { - uint16_t value; - Cursor cursor{data}; - if (!cursor.tryReadBE(value)) { - return false; - } - // zlib compressed if using deflate and is a multiple of 31. - return (value & 0x0F00) == 0x0800 && value % 31 == 0; - } else { - return dataStartsWithLE(data, kGZIPMagicLE); - } -} - -uint64_t ZlibStreamCodec::doMaxCompressedLength( - uint64_t uncompressedLength) const { - return deflateBound(nullptr, uncompressedLength); -} - -std::unique_ptr ZlibStreamCodec::createCodec(int level, CodecType type) { - return std::make_unique(level, type); -} - -std::unique_ptr ZlibStreamCodec::createStream( - int level, - CodecType type) { - return std::make_unique(level, type); -} - -ZlibStreamCodec::ZlibStreamCodec(int level, CodecType type) - : StreamCodec(type) { - DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP); - switch (level) { - case COMPRESSION_LEVEL_FASTEST: - level = 1; - break; - case COMPRESSION_LEVEL_DEFAULT: - level = Z_DEFAULT_COMPRESSION; - break; - case COMPRESSION_LEVEL_BEST: - level = 9; - break; - } - if (level != Z_DEFAULT_COMPRESSION && (level < 0 || level > 9)) { - throw std::invalid_argument( - to("ZlibStreamCodec: invalid level: ", level)); - } - level_ = level; -} - -ZlibStreamCodec::~ZlibStreamCodec() { - if (deflateStream_) { - deflateEnd(deflateStream_.get_pointer()); - deflateStream_.clear(); - } - if (inflateStream_) { - inflateEnd(inflateStream_.get_pointer()); - inflateStream_.clear(); - } -} - -void ZlibStreamCodec::doResetStream() { - needReset_ = true; -} - -void ZlibStreamCodec::resetDeflateStream() { - if (deflateStream_) { - int const rc = deflateReset(deflateStream_.get_pointer()); - if (rc != Z_OK) { - deflateStream_.clear(); - throw std::runtime_error( - to("ZlibStreamCodec: deflateReset error: ", rc)); - } - return; - } - deflateStream_ = z_stream{}; - // Using deflateInit2() to support gzip. "The windowBits parameter is the - // base two logarithm of the maximum window size (...) The default value is - // 15 (...) Add 16 to windowBits to write a simple gzip header and trailer - // around the compressed data instead of a zlib wrapper. The gzip header - // will have no file name, no extra data, no comment, no modification time - // (set to zero), no header crc, and the operating system will be set to 255 - // (unknown)." - int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0); - // All other parameters (method, memLevel, strategy) get default values from - // the zlib manual. - int const rc = deflateInit2( - deflateStream_.get_pointer(), - level_, - Z_DEFLATED, - windowBits, - /* memLevel */ 8, - Z_DEFAULT_STRATEGY); - if (rc != Z_OK) { - deflateStream_.clear(); - throw std::runtime_error( - to("ZlibStreamCodec: deflateInit error: ", rc)); - } -} - -void ZlibStreamCodec::resetInflateStream() { - if (inflateStream_) { - int const rc = inflateReset(inflateStream_.get_pointer()); - if (rc != Z_OK) { - inflateStream_.clear(); - throw std::runtime_error( - to("ZlibStreamCodec: inflateReset error: ", rc)); - } - return; - } - inflateStream_ = z_stream{}; - // "The windowBits parameter is the base two logarithm of the maximum window - // size (...) The default value is 15 (...) add 16 to decode only the gzip - // format (the zlib format will return a Z_DATA_ERROR)." - int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0); - int const rc = inflateInit2(inflateStream_.get_pointer(), windowBits); - if (rc != Z_OK) { - inflateStream_.clear(); - throw std::runtime_error( - to("ZlibStreamCodec: inflateInit error: ", rc)); - } -} - -static int zlibTranslateFlush(StreamCodec::FlushOp flush) { - switch (flush) { - case StreamCodec::FlushOp::NONE: - return Z_NO_FLUSH; - case StreamCodec::FlushOp::FLUSH: - return Z_SYNC_FLUSH; - case StreamCodec::FlushOp::END: - return Z_FINISH; - default: - throw std::invalid_argument("ZlibStreamCodec: Invalid flush"); - } -} - -static int zlibThrowOnError(int rc) { - switch (rc) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - return rc; - default: - throw std::runtime_error(to("ZlibStreamCodec: error: ", rc)); - } -} - -bool ZlibStreamCodec::doCompressStream( - ByteRange& input, - MutableByteRange& output, - StreamCodec::FlushOp flush) { - if (needReset_) { - resetDeflateStream(); - needReset_ = false; - } - DCHECK(deflateStream_.hasValue()); - // zlib will return Z_STREAM_ERROR if output.data() is null. - if (output.data() == nullptr) { - return false; - } - deflateStream_->next_in = const_cast(input.data()); - deflateStream_->avail_in = input.size(); - deflateStream_->next_out = output.data(); - deflateStream_->avail_out = output.size(); - SCOPE_EXIT { - input.uncheckedAdvance(input.size() - deflateStream_->avail_in); - output.uncheckedAdvance(output.size() - deflateStream_->avail_out); - }; - int const rc = zlibThrowOnError( - deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush))); - switch (flush) { - case StreamCodec::FlushOp::NONE: - return false; - case StreamCodec::FlushOp::FLUSH: - return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0; - case StreamCodec::FlushOp::END: - return rc == Z_STREAM_END; - default: - throw std::invalid_argument("ZlibStreamCodec: Invalid flush"); - } -} - -bool ZlibStreamCodec::doUncompressStream( - ByteRange& input, - MutableByteRange& output, - StreamCodec::FlushOp flush) { - if (needReset_) { - resetInflateStream(); - needReset_ = false; - } - DCHECK(inflateStream_.hasValue()); - // zlib will return Z_STREAM_ERROR if output.data() is null. - if (output.data() == nullptr) { - return false; - } - inflateStream_->next_in = const_cast(input.data()); - inflateStream_->avail_in = input.size(); - inflateStream_->next_out = output.data(); - inflateStream_->avail_out = output.size(); - SCOPE_EXIT { - input.advance(input.size() - inflateStream_->avail_in); - output.advance(output.size() - inflateStream_->avail_out); - }; - int const rc = zlibThrowOnError( - inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush))); - return rc == Z_STREAM_END; -} - -#endif // FOLLY_HAVE_LIBZ - #if FOLLY_HAVE_LIBLZMA /** @@ -2047,6 +1726,24 @@ std::unique_ptr Bzip2Codec::doUncompress( #endif // FOLLY_HAVE_LIBBZ2 +#if FOLLY_HAVE_LIBZ + +zlib::Options getZlibOptions(CodecType type) { + DCHECK(type == CodecType::GZIP || type == CodecType::ZLIB); + return type == CodecType::GZIP ? zlib::defaultGzipOptions() + : zlib::defaultZlibOptions(); +} + +std::unique_ptr getZlibCodec(int level, CodecType type) { + return zlib::getCodec(getZlibOptions(type), level); +} + +std::unique_ptr getZlibStreamCodec(int level, CodecType type) { + return zlib::getStreamCodec(getZlibOptions(type), level); +} + +#endif // FOLLY_HAVE_LIBZ + /** * Automatic decompression */ @@ -2236,7 +1933,7 @@ constexpr Factory #endif #if FOLLY_HAVE_LIBZ - {ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream}, + {getZlibCodec, getZlibStreamCodec}, #else {}, #endif @@ -2262,7 +1959,7 @@ constexpr Factory #endif #if FOLLY_HAVE_LIBZ - {ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream}, + {getZlibCodec, getZlibStreamCodec}, #else {}, #endif diff --git a/folly/io/compression/Utils.h b/folly/io/compression/Utils.h new file mode 100644 index 00000000..8d23723f --- /dev/null +++ b/folly/io/compression/Utils.h @@ -0,0 +1,67 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +/** + * Helper functions for compression codecs. + */ +namespace folly { +namespace io { +namespace compression { +namespace detail { + +/** + * Reads sizeof(T) bytes, and returns false if not enough bytes are available. + * Returns true if the first n bytes are equal to prefix when interpreted as + * a little endian T. + */ +template +typename std::enable_if::value, bool>::type +dataStartsWithLE(const IOBuf* data, T prefix, uint64_t n = sizeof(T)) { + DCHECK_GT(n, 0); + DCHECK_LE(n, sizeof(T)); + T value; + Cursor cursor{data}; + if (!cursor.tryReadLE(value)) { + return false; + } + const T mask = n == sizeof(T) ? T(-1) : (T(1) << (8 * n)) - 1; + return prefix == (value & mask); +} + +template +typename std::enable_if::value, std::string>::type +prefixToStringLE(T prefix, uint64_t n = sizeof(T)) { + DCHECK_GT(n, 0); + DCHECK_LE(n, sizeof(T)); + prefix = Endian::little(prefix); + std::string result; + result.resize(n); + memcpy(&result[0], &prefix, n); + return result; +} + +} // namespace detail +} // namespace compression +} // namespace io +} // namespace folly diff --git a/folly/io/compression/Zlib.cpp b/folly/io/compression/Zlib.cpp new file mode 100644 index 00000000..5cb5dcbf --- /dev/null +++ b/folly/io/compression/Zlib.cpp @@ -0,0 +1,413 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#if FOLLY_HAVE_LIBZ + +#include +#include +#include +#include +#include +#include +#include + +using folly::io::compression::detail::dataStartsWithLE; +using folly::io::compression::detail::prefixToStringLE; + +namespace folly { +namespace io { +namespace zlib { + +namespace { + +bool isValidStrategy(int strategy) { + std::array strategies{{ + Z_DEFAULT_STRATEGY, + Z_FILTERED, + Z_HUFFMAN_ONLY, + Z_RLE, + Z_FIXED + }}; + return std::any_of(strategies.begin(), strategies.end(), [&](int i) { + return i == strategy; + }); +} + +int getWindowBits(Options::Format format, int windowSize) { + switch (format) { + case Options::Format::ZLIB: + return windowSize; + case Options::Format::GZIP: + return windowSize + 16; + case Options::Format::RAW: + return -windowSize; + case Options::Format::AUTO: + return windowSize + 32; + default: + return windowSize; + } +} + +CodecType getCodecType(Options options) { + if (options.windowSize == 15 && options.format == Options::Format::ZLIB) { + return CodecType::ZLIB; + } else if ( + options.windowSize == 15 && options.format == Options::Format::GZIP) { + return CodecType::GZIP; + } else { + return CodecType::USER_DEFINED; + } +} + +class ZlibStreamCodec final : public StreamCodec { + public: + static std::unique_ptr createCodec(Options options, int level); + static std::unique_ptr createStream(Options options, int level); + + explicit ZlibStreamCodec(Options options, int level); + ~ZlibStreamCodec() override; + + std::vector validPrefixes() const override; + bool canUncompress(const IOBuf* data, Optional uncompressedLength) + const override; + + private: + uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override; + + void doResetStream() override; + bool doCompressStream( + ByteRange& input, + MutableByteRange& output, + StreamCodec::FlushOp flush) override; + bool doUncompressStream( + ByteRange& input, + MutableByteRange& output, + StreamCodec::FlushOp flush) override; + + void resetDeflateStream(); + void resetInflateStream(); + + Options options_; + + Optional deflateStream_{}; + Optional inflateStream_{}; + int level_; + bool needReset_{true}; +}; +static constexpr uint16_t kGZIPMagicLE = 0x8B1F; + +std::vector ZlibStreamCodec::validPrefixes() const { + if (type() == CodecType::ZLIB) { + // Zlib streams start with a 2 byte header. + // + // 0 1 + // +---+---+ + // |CMF|FLG| + // +---+---+ + // + // We won't restrict the values of any sub-fields except as described below. + // + // The lowest 4 bits of CMF is the compression method (CM). + // CM == 0x8 is the deflate compression method, which is currently the only + // supported compression method, so any valid prefix must have CM == 0x8. + // + // The lowest 5 bits of FLG is FCHECK. + // FCHECK must be such that the two header bytes are a multiple of 31 when + // interpreted as a big endian 16-bit number. + std::vector result; + // 16 values for the first byte, 8 values for the second byte. + // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK. + result.reserve(132); + // Select all values for the CMF byte that use the deflate algorithm 0x8. + for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) { + // Select all values for the FLG, but leave FCHECK as 0 since it's fixed. + for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) { + uint16_t prefix = first | second; + // Compute FCHECK. + prefix += 31 - (prefix % 31); + result.push_back(prefixToStringLE(Endian::big(prefix))); + // zlib won't produce this, but it is a valid prefix. + if ((prefix & 0x1F) == 31) { + prefix -= 31; + result.push_back(prefixToStringLE(Endian::big(prefix))); + } + } + } + return result; + } else if (type() == CodecType::GZIP) { + // The gzip frame starts with 2 magic bytes. + return {prefixToStringLE(kGZIPMagicLE)}; + } else { + return {}; + } +} + +bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional) + const { + if (type() == CodecType::ZLIB) { + uint16_t value; + Cursor cursor{data}; + if (!cursor.tryReadBE(value)) { + return false; + } + // zlib compressed if using deflate and is a multiple of 31. + return (value & 0x0F00) == 0x0800 && value % 31 == 0; + } else if (type() == CodecType::GZIP) { + return dataStartsWithLE(data, kGZIPMagicLE); + } else { + return false; + } +} + +uint64_t ZlibStreamCodec::doMaxCompressedLength( + uint64_t uncompressedLength) const { + return deflateBound(nullptr, uncompressedLength); +} + +std::unique_ptr ZlibStreamCodec::createCodec( + Options options, + int level) { + return std::make_unique(options, level); +} + +std::unique_ptr ZlibStreamCodec::createStream( + Options options, + int level) { + return std::make_unique(options, level); +} + +ZlibStreamCodec::ZlibStreamCodec(Options options, int level) + : StreamCodec(getCodecType(options)) { + switch (level) { + case COMPRESSION_LEVEL_FASTEST: + level = 1; + break; + case COMPRESSION_LEVEL_DEFAULT: + level = Z_DEFAULT_COMPRESSION; + break; + case COMPRESSION_LEVEL_BEST: + level = 9; + break; + } + auto inBounds = [](int value, int low, int high) { + return (value >= low) && (value <= high); + }; + + if (level != Z_DEFAULT_COMPRESSION && !inBounds(level, 0, 9)) { + throw std::invalid_argument( + to("ZlibStreamCodec: invalid level: ", level)); + } + level_ = level; + options_ = options; + + // Although zlib allows a windowSize of 8..15, a value of 8 is not + // properly supported and is treated as a value of 9. This means data deflated + // with windowSize==8 can not be re-inflated with windowSize==8. windowSize==8 + // is also not supported for gzip and raw deflation. + // Hence, the codec supports only 9..15. + if (!inBounds(options_.windowSize, 9, 15)) { + throw std::invalid_argument(to( + "ZlibStreamCodec: invalid windowSize option: ", options.windowSize)); + } + if (!inBounds(options_.memLevel, 1, 9)) { + throw std::invalid_argument(to( + "ZlibStreamCodec: invalid memLevel option: ", options.memLevel)); + } + if (!isValidStrategy(options_.strategy)) { + throw std::invalid_argument(to( + "ZlibStreamCodec: invalid strategy: ", options.strategy)); + } +} + +ZlibStreamCodec::~ZlibStreamCodec() { + if (deflateStream_) { + deflateEnd(deflateStream_.get_pointer()); + deflateStream_.clear(); + } + if (inflateStream_) { + inflateEnd(inflateStream_.get_pointer()); + inflateStream_.clear(); + } +} + +void ZlibStreamCodec::doResetStream() { + needReset_ = true; +} + +void ZlibStreamCodec::resetDeflateStream() { + if (deflateStream_) { + int const rc = deflateReset(deflateStream_.get_pointer()); + if (rc != Z_OK) { + deflateStream_.clear(); + throw std::runtime_error( + to("ZlibStreamCodec: deflateReset error: ", rc)); + } + return; + } + deflateStream_ = z_stream{}; + + // The automatic header detection format is only for inflation. + // Use zlib for deflation if the format is auto. + int const windowBits = getWindowBits( + options_.format == Options::Format::AUTO ? Options::Format::ZLIB + : options_.format, + options_.windowSize); + + int const rc = deflateInit2( + deflateStream_.get_pointer(), + level_, + Z_DEFLATED, + windowBits, + options_.memLevel, + options_.strategy); + if (rc != Z_OK) { + deflateStream_.clear(); + throw std::runtime_error( + to("ZlibStreamCodec: deflateInit error: ", rc)); + } +} + +void ZlibStreamCodec::resetInflateStream() { + if (inflateStream_) { + int const rc = inflateReset(inflateStream_.get_pointer()); + if (rc != Z_OK) { + inflateStream_.clear(); + throw std::runtime_error( + to("ZlibStreamCodec: inflateReset error: ", rc)); + } + return; + } + inflateStream_ = z_stream{}; + int const rc = inflateInit2( + inflateStream_.get_pointer(), + getWindowBits(options_.format, options_.windowSize)); + if (rc != Z_OK) { + inflateStream_.clear(); + throw std::runtime_error( + to("ZlibStreamCodec: inflateInit error: ", rc)); + } +} + +static int zlibTranslateFlush(StreamCodec::FlushOp flush) { + switch (flush) { + case StreamCodec::FlushOp::NONE: + return Z_NO_FLUSH; + case StreamCodec::FlushOp::FLUSH: + return Z_SYNC_FLUSH; + case StreamCodec::FlushOp::END: + return Z_FINISH; + default: + throw std::invalid_argument("ZlibStreamCodec: Invalid flush"); + } +} + +static int zlibThrowOnError(int rc) { + switch (rc) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + return rc; + default: + throw std::runtime_error(to("ZlibStreamCodec: error: ", rc)); + } +} + +bool ZlibStreamCodec::doCompressStream( + ByteRange& input, + MutableByteRange& output, + StreamCodec::FlushOp flush) { + if (needReset_) { + resetDeflateStream(); + needReset_ = false; + } + DCHECK(deflateStream_.hasValue()); + // zlib will return Z_STREAM_ERROR if output.data() is null. + if (output.data() == nullptr) { + return false; + } + deflateStream_->next_in = const_cast(input.data()); + deflateStream_->avail_in = input.size(); + deflateStream_->next_out = output.data(); + deflateStream_->avail_out = output.size(); + SCOPE_EXIT { + input.uncheckedAdvance(input.size() - deflateStream_->avail_in); + output.uncheckedAdvance(output.size() - deflateStream_->avail_out); + }; + int const rc = zlibThrowOnError( + deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush))); + switch (flush) { + case StreamCodec::FlushOp::NONE: + return false; + case StreamCodec::FlushOp::FLUSH: + return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0; + case StreamCodec::FlushOp::END: + return rc == Z_STREAM_END; + default: + throw std::invalid_argument("ZlibStreamCodec: Invalid flush"); + } +} + +bool ZlibStreamCodec::doUncompressStream( + ByteRange& input, + MutableByteRange& output, + StreamCodec::FlushOp flush) { + if (needReset_) { + resetInflateStream(); + needReset_ = false; + } + DCHECK(inflateStream_.hasValue()); + // zlib will return Z_STREAM_ERROR if output.data() is null. + if (output.data() == nullptr) { + return false; + } + inflateStream_->next_in = const_cast(input.data()); + inflateStream_->avail_in = input.size(); + inflateStream_->next_out = output.data(); + inflateStream_->avail_out = output.size(); + SCOPE_EXIT { + input.advance(input.size() - inflateStream_->avail_in); + output.advance(output.size() - inflateStream_->avail_out); + }; + int const rc = zlibThrowOnError( + inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush))); + return rc == Z_STREAM_END; +} + +} // namespace + +Options defaultGzipOptions() { + return Options(Options::Format::GZIP); +} + +Options defaultZlibOptions() { + return Options(Options::Format::ZLIB); +} + +std::unique_ptr getCodec(Options options, int level) { + return ZlibStreamCodec::createCodec(options, level); +} + +std::unique_ptr getStreamCodec(Options options, int level) { + return ZlibStreamCodec::createStream(options, level); +} + +} // namespace zlib +} // namespace io +} // namespace folly + +#endif // FOLLY_HAVE_LIBZ diff --git a/folly/io/compression/Zlib.h b/folly/io/compression/Zlib.h new file mode 100644 index 00000000..74dfe032 --- /dev/null +++ b/folly/io/compression/Zlib.h @@ -0,0 +1,129 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#if FOLLY_HAVE_LIBZ + +#include + +/** + * Interface for Zlib-specific codec initialization. + */ +namespace folly { +namespace io { +namespace zlib { + +struct Options { + /** + * ZLIB: default option -- write a zlib wrapper as documented in RFC 1950. + * + * GZIP: write a simple gzip header and trailer around the compressed data + * instead of a zlib wrapper. + * + * RAW: deflate will generate raw deflate data with no zlib header or + * trailer, and will not compute a check value. + * + * AUTO: enable automatic header detection for decoding gzip or zlib data. + * For deflation, ZLIB will be used. + */ + enum class Format { ZLIB, GZIP, RAW, AUTO }; + + explicit Options( + Format format = Format::ZLIB, + int windowSize = 15, + int memLevel = 8, + int strategy = Z_DEFAULT_STRATEGY) + : format(format), + windowSize(windowSize), + memLevel(memLevel), + strategy(strategy) {} + + Format format; + + /** + * windowSize is the base two logarithm of the window size (the size of the + * history buffer). It should be in the range 9..15. Larger values of this + * parameter result in better compression at the expense of memory usage. + * + * The default value is 15. + * + * NB: when inflating/uncompressing data, the windowSize must be greater than + * or equal to the size used when deflating/compressing. + */ + int windowSize; + + /** + * "The memLevel parameter specifies how much memory should be allocated for + * the internal compression state. memLevel=1 uses minimum memory but is slow + * and reduces compression ratio; memLevel=9 uses maximum memory for optimal + * speed. The default value is 8." + */ + int memLevel; + + /** + * The strategy parameter is used to tune the compression algorithm. + * Supported values: + * - Z_DEFAULT_STRATEGY: normal data + * - Z_FILTERED: data produced by a filter (or predictor) + * - Z_HUFFMAN_ONLY: force Huffman encoding only (no string match) + * - Z_RLE: limit match distances to one + * - Z_FIXED: prevents the use of dynamic Huffman codes + * + * The strategy parameter only affects the compression ratio but not the + * correctness of the compressed output. + */ + int strategy; +}; + +/** + * Get the default options for gzip compression. + * A codec created with these options will have type CodecType::GZIP. + */ +Options defaultGzipOptions(); + +/** + * Get the default options for zlib compression. + * A codec created with these options will have type CodecType::ZLIB. + */ +Options defaultZlibOptions(); + +/** + * Get a codec with the given options and compression level. + * + * If the windowSize is 15 and the format is Format::ZLIB or Format::GZIP, then + * the type of the codec will be CodecType::ZLIB or CodecType::GZIP + * respectively. Otherwise, the type will be CodecType::USER_DEFINED. + * + * Automatic uncompression is not supported with USER_DEFINED codecs. + * + * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6 + */ +std::unique_ptr getCodec( + Options options = Options(), + int level = COMPRESSION_LEVEL_DEFAULT); +std::unique_ptr getStreamCodec( + Options options = Options(), + int level = COMPRESSION_LEVEL_DEFAULT); + +} // namespace zlib +} // namespace io +} // namespace folly + +#endif // FOLLY_HAVE_LIBZ diff --git a/folly/io/test/CompressionTest.cpp b/folly/io/test/CompressionTest.cpp index 55514e86..6beccbdd 100644 --- a/folly/io/test/CompressionTest.cpp +++ b/folly/io/test/CompressionTest.cpp @@ -38,6 +38,12 @@ #include #endif +#if FOLLY_HAVE_LIBZ +#include +#endif + +namespace zlib = folly::io::zlib; + namespace folly { namespace io { namespace test { @@ -1129,6 +1135,118 @@ TEST(ZstdTest, BackwardCompatible) { } #endif + +#if FOLLY_HAVE_LIBZ + +using ZlibFormat = zlib::Options::Format; + +TEST(ZlibTest, Auto) { + size_t const uncompressedLength_ = (size_t)1 << 15; + auto const original = std::string( + reinterpret_cast( + randomDataHolder.data(uncompressedLength_).data()), + uncompressedLength_); + auto optionCodec = zlib::getCodec(zlib::Options(ZlibFormat::AUTO)); + + // Test the codec can uncompress zlib data. + { + auto codec = getCodec(CodecType::ZLIB); + auto const compressed = codec->compress(original); + auto const uncompressed = optionCodec->uncompress(compressed); + EXPECT_EQ(original, uncompressed); + } + + // Test the codec can uncompress gzip data. + { + auto codec = getCodec(CodecType::GZIP); + auto const compressed = codec->compress(original); + auto const uncompressed = optionCodec->uncompress(compressed); + EXPECT_EQ(original, uncompressed); + } +} + +TEST(ZlibTest, DefaultOptions) { + size_t const uncompressedLength_ = (size_t)1 << 20; + auto const original = std::string( + reinterpret_cast( + randomDataHolder.data(uncompressedLength_).data()), + uncompressedLength_); + { + auto codec = getCodec(CodecType::ZLIB); + auto optionCodec = zlib::getCodec(zlib::defaultZlibOptions()); + auto const compressed = optionCodec->compress(original); + auto uncompressed = codec->uncompress(compressed); + EXPECT_EQ(original, uncompressed); + uncompressed = optionCodec->uncompress(compressed); + EXPECT_EQ(original, uncompressed); + } + + { + auto codec = getCodec(CodecType::GZIP); + auto optionCodec = zlib::getCodec(zlib::defaultGzipOptions()); + auto const compressed = optionCodec->compress(original); + auto uncompressed = codec->uncompress(compressed); + EXPECT_EQ(original, uncompressed); + uncompressed = optionCodec->uncompress(compressed); + EXPECT_EQ(original, uncompressed); + } +} + +class ZlibOptionsTest : public testing::TestWithParam< + std::tr1::tuple> { + protected: + void SetUp() override { + auto tup = GetParam(); + options_.format = std::tr1::get<0>(tup); + options_.windowSize = std::tr1::get<1>(tup); + options_.memLevel = std::tr1::get<2>(tup); + options_.strategy = std::tr1::get<3>(tup); + codec_ = zlib::getStreamCodec(options_); + } + + void runSimpleRoundTripTest(const DataHolder& dh); + + private: + zlib::Options options_; + std::unique_ptr codec_; +}; + +void ZlibOptionsTest::runSimpleRoundTripTest(const DataHolder& dh) { + size_t const uncompressedLength = (size_t)1 << 16; + auto const original = std::string( + reinterpret_cast(dh.data(uncompressedLength).data()), + uncompressedLength); + + auto const compressed = codec_->compress(original); + auto const uncompressed = codec_->uncompress(compressed); + EXPECT_EQ(uncompressed, original); +} + +TEST_P(ZlibOptionsTest, simpleRoundTripTest) { + runSimpleRoundTripTest(constantDataHolder); + runSimpleRoundTripTest(randomDataHolder); +} + +INSTANTIATE_TEST_CASE_P( + ZlibOptionsTest, + ZlibOptionsTest, + testing::Combine( + testing::Values( + ZlibFormat::ZLIB, + ZlibFormat::GZIP, + ZlibFormat::RAW, + ZlibFormat::AUTO), + testing::Values(9, 12, 15), + testing::Values(1, 8, 9), + testing::Values( + Z_DEFAULT_STRATEGY, + Z_FILTERED, + Z_HUFFMAN_ONLY, + Z_RLE, + Z_FIXED))); + +#endif // FOLLY_HAVE_LIBZ + } // namespace test } // namespace io } // namespace folly