Allow decompresion without uncompressedSize
authorNick Terrell <terrelln@fb.com>
Fri, 24 Mar 2017 21:30:45 +0000 (14:30 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 24 Mar 2017 21:37:06 +0000 (14:37 -0700)
Summary:
The `LZMA2` codec doesn't allow decompression without passing the uncompressed size.
Every other codec that supports streaming decompression without the uncompressed size by streaming into an IOBuf chain.
Benchmarks show that reducing the `defaultBufferLength` to 256 KiB doesn't slow down decompression of large files, but it speeds up decompression of small files (< 1000 bytes).

Reviewed By: yfeldblum

Differential Revision: D4751571

fbshipit-source-id: 39dbe6754a1ecdc2b7ba3107e9face926d4c98ca

folly/io/Compression.cpp
folly/io/test/CompressionTest.cpp

index 45a4d97149d3609d677fc678ec86371f7dd73642..0774912579411ae0d16a085fba1b5431ef41b417 100644 (file)
@@ -973,7 +973,7 @@ LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) {
 }
 
 bool LZMA2Codec::doNeedsUncompressedLength() const {
-  return !encodeSize();
+  return false;
 }
 
 uint64_t LZMA2Codec::doMaxUncompressedLength() const {
@@ -1104,27 +1104,25 @@ std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
   SCOPE_EXIT { lzma_end(&stream); };
 
   // Max 64MiB in one go
-  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20;    // 64MiB
-  constexpr uint32_t defaultBufferLength = uint32_t(4) << 20;     // 4MiB
+  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
+  constexpr uint32_t defaultBufferLength = uint32_t(256) << 10; // 256 KiB
 
   folly::io::Cursor cursor(data);
-  uint64_t actualUncompressedLength;
   if (encodeSize()) {
-    actualUncompressedLength = decodeVarintFromCursor(cursor);
+    const uint64_t actualUncompressedLength = decodeVarintFromCursor(cursor);
     if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
         uncompressedLength != actualUncompressedLength) {
       throw std::runtime_error("LZMA2Codec: invalid uncompressed length");
     }
-  } else {
-    actualUncompressedLength = uncompressedLength;
-    DCHECK_NE(actualUncompressedLength, UNKNOWN_UNCOMPRESSED_LENGTH);
+    uncompressedLength = actualUncompressedLength;
   }
 
   auto out = addOutputBuffer(
       &stream,
-      (actualUncompressedLength <= maxSingleStepLength ?
-       actualUncompressedLength :
-       defaultBufferLength));
+      ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
+        uncompressedLength <= maxSingleStepLength)
+           ? uncompressedLength
+           : defaultBufferLength));
 
   bool streamEnd = false;
   auto buf = cursor.peekBytes();
@@ -1151,9 +1149,10 @@ std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
 
   out->prev()->trimEnd(stream.avail_out);
 
-  if (actualUncompressedLength != stream.total_out) {
-    throw std::runtime_error(to<std::string>(
-        "LZMA2Codec: invalid uncompressed length"));
+  if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
+      uncompressedLength != stream.total_out) {
+    throw std::runtime_error(
+        to<std::string>("LZMA2Codec: invalid uncompressed length"));
   }
 
   return out;
index a97b5bcf56ec0b0a548478845cccf889f33f11eb..5cac88875ee1003a4256d5c062c5f51ff3feb3b2 100644 (file)
@@ -155,7 +155,7 @@ TEST(CompressionTestNeedsUncompressedLength, Simple) {
       { CodecType::SNAPPY, false },
       { CodecType::ZLIB, false },
       { CodecType::LZ4_VARINT_SIZE, false },
-      { CodecType::LZMA2, true },
+      { CodecType::LZMA2, false },
       { CodecType::LZMA2_VARINT_SIZE, false },
       { CodecType::ZSTD, false },
       { CodecType::GZIP, false },
@@ -392,6 +392,7 @@ INSTANTIATE_TEST_CASE_P(
         supportedCodecs({
             CodecType::SNAPPY,
             CodecType::ZLIB,
+            CodecType::LZMA2,
             CodecType::ZSTD,
             CodecType::LZ4_FRAME,
         })));