From 6fac16c64da068d31789b2dec3f00ec17d6ba9d6 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 24 Mar 2017 14:30:45 -0700 Subject: [PATCH] Allow decompresion without uncompressedSize Summary: The `LZMA2` codec doesn't allow decompression without passing the uncompressed size. Every other codec that supports streaming decompression without the uncompressed size by streaming into an IOBuf chain. Benchmarks show that reducing the `defaultBufferLength` to 256 KiB doesn't slow down decompression of large files, but it speeds up decompression of small files (< 1000 bytes). Reviewed By: yfeldblum Differential Revision: D4751571 fbshipit-source-id: 39dbe6754a1ecdc2b7ba3107e9face926d4c98ca --- folly/io/Compression.cpp | 27 +++++++++++++-------------- folly/io/test/CompressionTest.cpp | 3 ++- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp index 45a4d971..07749125 100644 --- a/folly/io/Compression.cpp +++ b/folly/io/Compression.cpp @@ -973,7 +973,7 @@ LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) { } bool LZMA2Codec::doNeedsUncompressedLength() const { - return !encodeSize(); + return false; } uint64_t LZMA2Codec::doMaxUncompressedLength() const { @@ -1104,27 +1104,25 @@ std::unique_ptr LZMA2Codec::doUncompress(const IOBuf* data, SCOPE_EXIT { lzma_end(&stream); }; // Max 64MiB in one go - constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB - constexpr uint32_t defaultBufferLength = uint32_t(4) << 20; // 4MiB + constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB + constexpr uint32_t defaultBufferLength = uint32_t(256) << 10; // 256 KiB folly::io::Cursor cursor(data); - uint64_t actualUncompressedLength; if (encodeSize()) { - actualUncompressedLength = decodeVarintFromCursor(cursor); + const uint64_t actualUncompressedLength = decodeVarintFromCursor(cursor); if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH && uncompressedLength != actualUncompressedLength) { throw std::runtime_error("LZMA2Codec: invalid uncompressed length"); } - } else { - actualUncompressedLength = uncompressedLength; - DCHECK_NE(actualUncompressedLength, UNKNOWN_UNCOMPRESSED_LENGTH); + uncompressedLength = actualUncompressedLength; } auto out = addOutputBuffer( &stream, - (actualUncompressedLength <= maxSingleStepLength ? - actualUncompressedLength : - defaultBufferLength)); + ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH && + uncompressedLength <= maxSingleStepLength) + ? uncompressedLength + : defaultBufferLength)); bool streamEnd = false; auto buf = cursor.peekBytes(); @@ -1151,9 +1149,10 @@ std::unique_ptr LZMA2Codec::doUncompress(const IOBuf* data, out->prev()->trimEnd(stream.avail_out); - if (actualUncompressedLength != stream.total_out) { - throw std::runtime_error(to( - "LZMA2Codec: invalid uncompressed length")); + if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH && + uncompressedLength != stream.total_out) { + throw std::runtime_error( + to("LZMA2Codec: invalid uncompressed length")); } return out; diff --git a/folly/io/test/CompressionTest.cpp b/folly/io/test/CompressionTest.cpp index a97b5bcf..5cac8887 100644 --- a/folly/io/test/CompressionTest.cpp +++ b/folly/io/test/CompressionTest.cpp @@ -155,7 +155,7 @@ TEST(CompressionTestNeedsUncompressedLength, Simple) { { CodecType::SNAPPY, false }, { CodecType::ZLIB, false }, { CodecType::LZ4_VARINT_SIZE, false }, - { CodecType::LZMA2, true }, + { CodecType::LZMA2, false }, { CodecType::LZMA2_VARINT_SIZE, false }, { CodecType::ZSTD, false }, { CodecType::GZIP, false }, @@ -392,6 +392,7 @@ INSTANTIATE_TEST_CASE_P( supportedCodecs({ CodecType::SNAPPY, CodecType::ZLIB, + CodecType::LZMA2, CodecType::ZSTD, CodecType::LZ4_FRAME, }))); -- 2.34.1