Fix build without zlib compression enabled
[folly.git] / folly / io / Compression.cpp
index d83a7b5cabca70c0162e7bd1b5b264af7d4ae023..d02a6b3e6f1bc741f972ce5c55af8f70c28c269b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2016 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
 #if FOLLY_HAVE_LIBLZ4
 #include <lz4.h>
 #include <lz4hc.h>
+#if LZ4_VERSION_NUMBER >= 10301
+#include <lz4frame.h>
+#endif
 #endif
 
 #include <glog/logging.h>
 
 #if FOLLY_HAVE_LIBSNAPPY
-#include <snappy.h>
 #include <snappy-sinksource.h>
+#include <snappy.h>
 #endif
 
 #if FOLLY_HAVE_LIBZ
-#include <zlib.h>
+#include <folly/io/compression/Zlib.h>
 #endif
 
 #if FOLLY_HAVE_LIBLZMA
 #endif
 
 #if FOLLY_HAVE_LIBZSTD
+#define ZSTD_STATIC_LINKING_ONLY
 #include <zstd.h>
 #endif
 
+#if FOLLY_HAVE_LIBBZ2
+#include <bzlib.h>
+#endif
+
+#include <folly/Bits.h>
 #include <folly/Conv.h>
 #include <folly/Memory.h>
 #include <folly/Portability.h>
 #include <folly/ScopeGuard.h>
 #include <folly/Varint.h>
 #include <folly/io/Cursor.h>
+#include <folly/io/compression/Utils.h>
+#include <algorithm>
+#include <unordered_set>
+
+using folly::io::compression::detail::dataStartsWithLE;
+using folly::io::compression::detail::prefixToStringLE;
 
-namespace folly { namespace io {
+namespace folly {
+namespace io {
 
 Codec::Codec(CodecType type) : type_(type) { }
 
 // Ensure consistent behavior in the nullptr case
 std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
+  if (data == nullptr) {
+    throw std::invalid_argument("Codec: data must not be nullptr");
+  }
   uint64_t len = data->computeChainDataLength();
-  if (len == 0) {
-    return IOBuf::create(0);
-  } else if (len > maxUncompressedLength()) {
+  if (len > maxUncompressedLength()) {
     throw std::runtime_error("Codec: uncompressed length too large");
   }
 
   return doCompress(data);
 }
 
-std::unique_ptr<IOBuf> Codec::uncompress(const IOBuf* data,
-                                         uint64_t uncompressedLength) {
-  if (uncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH) {
+std::string Codec::compress(const StringPiece data) {
+  const uint64_t len = data.size();
+  if (len > maxUncompressedLength()) {
+    throw std::runtime_error("Codec: uncompressed length too large");
+  }
+
+  return doCompressString(data);
+}
+
+std::unique_ptr<IOBuf> Codec::uncompress(
+    const IOBuf* data,
+    Optional<uint64_t> uncompressedLength) {
+  if (data == nullptr) {
+    throw std::invalid_argument("Codec: data must not be nullptr");
+  }
+  if (!uncompressedLength) {
     if (needsUncompressedLength()) {
       throw std::invalid_argument("Codec: uncompressed length required");
     }
-  } else if (uncompressedLength > maxUncompressedLength()) {
+  } else if (*uncompressedLength > maxUncompressedLength()) {
     throw std::runtime_error("Codec: uncompressed length too large");
   }
 
   if (data->empty()) {
-    if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
-        uncompressedLength != 0) {
+    if (uncompressedLength.value_or(0) != 0) {
       throw std::runtime_error("Codec: invalid uncompressed length");
     }
     return IOBuf::create(0);
@@ -84,6 +113,27 @@ std::unique_ptr<IOBuf> Codec::uncompress(const IOBuf* data,
   return doUncompress(data, uncompressedLength);
 }
 
+std::string Codec::uncompress(
+    const StringPiece data,
+    Optional<uint64_t> uncompressedLength) {
+  if (!uncompressedLength) {
+    if (needsUncompressedLength()) {
+      throw std::invalid_argument("Codec: uncompressed length required");
+    }
+  } else if (*uncompressedLength > maxUncompressedLength()) {
+    throw std::runtime_error("Codec: uncompressed length too large");
+  }
+
+  if (data.empty()) {
+    if (uncompressedLength.value_or(0) != 0) {
+      throw std::runtime_error("Codec: invalid uncompressed length");
+    }
+    return "";
+  }
+
+  return doUncompressString(data, uncompressedLength);
+}
+
 bool Codec::needsUncompressedLength() const {
   return doNeedsUncompressedLength();
 }
@@ -100,6 +150,291 @@ uint64_t Codec::doMaxUncompressedLength() const {
   return UNLIMITED_UNCOMPRESSED_LENGTH;
 }
 
+std::vector<std::string> Codec::validPrefixes() const {
+  return {};
+}
+
+bool Codec::canUncompress(const IOBuf*, Optional<uint64_t>) const {
+  return false;
+}
+
+std::string Codec::doCompressString(const StringPiece data) {
+  const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data};
+  auto outputBuffer = doCompress(&inputBuffer);
+  std::string output;
+  output.reserve(outputBuffer->computeChainDataLength());
+  for (auto range : *outputBuffer) {
+    output.append(reinterpret_cast<const char*>(range.data()), range.size());
+  }
+  return output;
+}
+
+std::string Codec::doUncompressString(
+    const StringPiece data,
+    Optional<uint64_t> uncompressedLength) {
+  const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data};
+  auto outputBuffer = doUncompress(&inputBuffer, uncompressedLength);
+  std::string output;
+  output.reserve(outputBuffer->computeChainDataLength());
+  for (auto range : *outputBuffer) {
+    output.append(reinterpret_cast<const char*>(range.data()), range.size());
+  }
+  return output;
+}
+
+uint64_t Codec::maxCompressedLength(uint64_t uncompressedLength) const {
+  return doMaxCompressedLength(uncompressedLength);
+}
+
+Optional<uint64_t> Codec::getUncompressedLength(
+    const folly::IOBuf* data,
+    Optional<uint64_t> uncompressedLength) const {
+  auto const compressedLength = data->computeChainDataLength();
+  if (compressedLength == 0) {
+    if (uncompressedLength.value_or(0) != 0) {
+      throw std::runtime_error("Invalid uncompressed length");
+    }
+    return 0;
+  }
+  return doGetUncompressedLength(data, uncompressedLength);
+}
+
+Optional<uint64_t> Codec::doGetUncompressedLength(
+    const folly::IOBuf*,
+    Optional<uint64_t> uncompressedLength) const {
+  return uncompressedLength;
+}
+
+bool StreamCodec::needsDataLength() const {
+  return doNeedsDataLength();
+}
+
+bool StreamCodec::doNeedsDataLength() const {
+  return false;
+}
+
+void StreamCodec::assertStateIs(State expected) const {
+  if (state_ != expected) {
+    throw std::logic_error(folly::to<std::string>(
+        "Codec: state is ", state_, "; expected state ", expected));
+  }
+}
+
+void StreamCodec::resetStream(Optional<uint64_t> uncompressedLength) {
+  state_ = State::RESET;
+  uncompressedLength_ = uncompressedLength;
+  progressMade_ = true;
+  doResetStream();
+}
+
+bool StreamCodec::compressStream(
+    ByteRange& input,
+    MutableByteRange& output,
+    StreamCodec::FlushOp flushOp) {
+  if (state_ == State::RESET && input.empty() &&
+      flushOp == StreamCodec::FlushOp::END &&
+      uncompressedLength().value_or(0) != 0) {
+    throw std::runtime_error("Codec: invalid uncompressed length");
+  }
+
+  if (!uncompressedLength() && needsDataLength()) {
+    throw std::runtime_error("Codec: uncompressed length required");
+  }
+  if (state_ == State::RESET && !input.empty() &&
+      uncompressedLength() == uint64_t(0)) {
+    throw std::runtime_error("Codec: invalid uncompressed length");
+  }
+  // Handle input state transitions
+  switch (flushOp) {
+    case StreamCodec::FlushOp::NONE:
+      if (state_ == State::RESET) {
+        state_ = State::COMPRESS;
+      }
+      assertStateIs(State::COMPRESS);
+      break;
+    case StreamCodec::FlushOp::FLUSH:
+      if (state_ == State::RESET || state_ == State::COMPRESS) {
+        state_ = State::COMPRESS_FLUSH;
+      }
+      assertStateIs(State::COMPRESS_FLUSH);
+      break;
+    case StreamCodec::FlushOp::END:
+      if (state_ == State::RESET || state_ == State::COMPRESS) {
+        state_ = State::COMPRESS_END;
+      }
+      assertStateIs(State::COMPRESS_END);
+      break;
+  }
+  size_t const inputSize = input.size();
+  size_t const outputSize = output.size();
+  bool const done = doCompressStream(input, output, flushOp);
+  if (!done && inputSize == input.size() && outputSize == output.size()) {
+    if (!progressMade_) {
+      throw std::runtime_error("Codec: No forward progress made");
+    }
+    // Throw an exception if there is no progress again next time
+    progressMade_ = false;
+  } else {
+    progressMade_ = true;
+  }
+  // Handle output state transitions
+  if (done) {
+    if (state_ == State::COMPRESS_FLUSH) {
+      state_ = State::COMPRESS;
+    } else if (state_ == State::COMPRESS_END) {
+      state_ = State::END;
+    }
+    // Check internal invariants
+    DCHECK(input.empty());
+    DCHECK(flushOp != StreamCodec::FlushOp::NONE);
+  }
+  return done;
+}
+
+bool StreamCodec::uncompressStream(
+    ByteRange& input,
+    MutableByteRange& output,
+    StreamCodec::FlushOp flushOp) {
+  if (state_ == State::RESET && input.empty()) {
+    if (uncompressedLength().value_or(0) == 0) {
+      return true;
+    }
+    return false;
+  }
+  // Handle input state transitions
+  if (state_ == State::RESET) {
+    state_ = State::UNCOMPRESS;
+  }
+  assertStateIs(State::UNCOMPRESS);
+  size_t const inputSize = input.size();
+  size_t const outputSize = output.size();
+  bool const done = doUncompressStream(input, output, flushOp);
+  if (!done && inputSize == input.size() && outputSize == output.size()) {
+    if (!progressMade_) {
+      throw std::runtime_error("Codec: no forward progress made");
+    }
+    // Throw an exception if there is no progress again next time
+    progressMade_ = false;
+  } else {
+    progressMade_ = true;
+  }
+  // Handle output state transitions
+  if (done) {
+    state_ = State::END;
+  }
+  return done;
+}
+
+static std::unique_ptr<IOBuf> addOutputBuffer(
+    MutableByteRange& output,
+    uint64_t size) {
+  DCHECK(output.empty());
+  auto buffer = IOBuf::create(size);
+  buffer->append(buffer->capacity());
+  output = {buffer->writableData(), buffer->length()};
+  return buffer;
+}
+
+std::unique_ptr<IOBuf> StreamCodec::doCompress(IOBuf const* data) {
+  uint64_t const uncompressedLength = data->computeChainDataLength();
+  resetStream(uncompressedLength);
+  uint64_t const maxCompressedLen = maxCompressedLength(uncompressedLength);
+
+  auto constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MB
+  auto constexpr kDefaultBufferLength = uint64_t(4) << 20; // 4 MB
+
+  MutableByteRange output;
+  auto buffer = addOutputBuffer(
+      output,
+      maxCompressedLen <= kMaxSingleStepLength ? maxCompressedLen
+                                               : kDefaultBufferLength);
+
+  // Compress the entire IOBuf chain into the IOBuf chain pointed to by buffer
+  IOBuf const* current = data;
+  ByteRange input{current->data(), current->length()};
+  StreamCodec::FlushOp flushOp = StreamCodec::FlushOp::NONE;
+  bool done = false;
+  while (!done) {
+    while (input.empty() && current->next() != data) {
+      current = current->next();
+      input = {current->data(), current->length()};
+    }
+    if (current->next() == data) {
+      // This is the last input buffer so end the stream
+      flushOp = StreamCodec::FlushOp::END;
+    }
+    if (output.empty()) {
+      buffer->prependChain(addOutputBuffer(output, kDefaultBufferLength));
+    }
+    done = compressStream(input, output, flushOp);
+    if (done) {
+      DCHECK(input.empty());
+      DCHECK(flushOp == StreamCodec::FlushOp::END);
+      DCHECK_EQ(current->next(), data);
+    }
+  }
+  buffer->prev()->trimEnd(output.size());
+  return buffer;
+}
+
+static uint64_t computeBufferLength(
+    uint64_t const compressedLength,
+    uint64_t const blockSize) {
+  uint64_t constexpr kMaxBufferLength = uint64_t(4) << 20; // 4 MiB
+  uint64_t const goodBufferSize = 4 * std::max(blockSize, compressedLength);
+  return std::min(goodBufferSize, kMaxBufferLength);
+}
+
+std::unique_ptr<IOBuf> StreamCodec::doUncompress(
+    IOBuf const* data,
+    Optional<uint64_t> uncompressedLength) {
+  auto constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MB
+  auto constexpr kBlockSize = uint64_t(128) << 10;
+  auto const defaultBufferLength =
+      computeBufferLength(data->computeChainDataLength(), kBlockSize);
+
+  uncompressedLength = getUncompressedLength(data, uncompressedLength);
+  resetStream(uncompressedLength);
+
+  MutableByteRange output;
+  auto buffer = addOutputBuffer(
+      output,
+      (uncompressedLength && *uncompressedLength <= kMaxSingleStepLength
+           ? *uncompressedLength
+           : defaultBufferLength));
+
+  // Uncompress the entire IOBuf chain into the IOBuf chain pointed to by buffer
+  IOBuf const* current = data;
+  ByteRange input{current->data(), current->length()};
+  StreamCodec::FlushOp flushOp = StreamCodec::FlushOp::NONE;
+  bool done = false;
+  while (!done) {
+    while (input.empty() && current->next() != data) {
+      current = current->next();
+      input = {current->data(), current->length()};
+    }
+    if (current->next() == data) {
+      // Tell the uncompressor there is no more input (it may optimize)
+      flushOp = StreamCodec::FlushOp::END;
+    }
+    if (output.empty()) {
+      buffer->prependChain(addOutputBuffer(output, defaultBufferLength));
+    }
+    done = uncompressStream(input, output, flushOp);
+  }
+  if (!input.empty()) {
+    throw std::runtime_error("Codec: Junk after end of data");
+  }
+
+  buffer->prev()->trimEnd(output.size());
+  if (uncompressedLength &&
+      *uncompressedLength != buffer->computeChainDataLength()) {
+    throw std::runtime_error("Codec: invalid uncompressed length");
+  }
+
+  return buffer;
+}
+
 namespace {
 
 /**
@@ -111,24 +446,25 @@ class NoCompressionCodec final : public Codec {
   explicit NoCompressionCodec(int level, CodecType type);
 
  private:
+  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
   std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
   std::unique_ptr<IOBuf> doUncompress(
       const IOBuf* data,
-      uint64_t uncompressedLength) override;
+      Optional<uint64_t> uncompressedLength) override;
 };
 
 std::unique_ptr<Codec> NoCompressionCodec::create(int level, CodecType type) {
-  return make_unique<NoCompressionCodec>(level, type);
+  return std::make_unique<NoCompressionCodec>(level, type);
 }
 
 NoCompressionCodec::NoCompressionCodec(int level, CodecType type)
-  : Codec(type) {
+    : Codec(type) {
   DCHECK(type == CodecType::NO_COMPRESSION);
   switch (level) {
-  case COMPRESSION_LEVEL_DEFAULT:
-  case COMPRESSION_LEVEL_FASTEST:
-  case COMPRESSION_LEVEL_BEST:
-    level = 0;
+    case COMPRESSION_LEVEL_DEFAULT:
+    case COMPRESSION_LEVEL_FASTEST:
+    case COMPRESSION_LEVEL_BEST:
+      level = 0;
   }
   if (level != 0) {
     throw std::invalid_argument(to<std::string>(
@@ -136,6 +472,11 @@ NoCompressionCodec::NoCompressionCodec(int level, CodecType type)
   }
 }
 
+uint64_t NoCompressionCodec::doMaxCompressedLength(
+    uint64_t uncompressedLength) const {
+  return uncompressedLength;
+}
+
 std::unique_ptr<IOBuf> NoCompressionCodec::doCompress(
     const IOBuf* data) {
   return data->clone();
@@ -143,11 +484,11 @@ std::unique_ptr<IOBuf> NoCompressionCodec::doCompress(
 
 std::unique_ptr<IOBuf> NoCompressionCodec::doUncompress(
     const IOBuf* data,
-    uint64_t uncompressedLength) {
-  if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
-      data->computeChainDataLength() != uncompressedLength) {
-    throw std::runtime_error(to<std::string>(
-        "NoCompressionCodec: invalid uncompressed length"));
+    Optional<uint64_t> uncompressedLength) {
+  if (uncompressedLength &&
+      data->computeChainDataLength() != *uncompressedLength) {
+    throw std::runtime_error(
+        to<std::string>("NoCompressionCodec: invalid uncompressed length"));
   }
   return data->clone();
 }
@@ -177,7 +518,7 @@ inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
   return val;
 }
 
-}  // namespace
+} // namespace
 
 #endif  // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA
 
@@ -194,32 +535,33 @@ class LZ4Codec final : public Codec {
  private:
   bool doNeedsUncompressedLength() const override;
   uint64_t doMaxUncompressedLength() const override;
+  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
 
   bool encodeSize() const { return type() == CodecType::LZ4_VARINT_SIZE; }
 
   std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
   std::unique_ptr<IOBuf> doUncompress(
       const IOBuf* data,
-      uint64_t uncompressedLength) override;
+      Optional<uint64_t> uncompressedLength) override;
 
   bool highCompression_;
 };
 
 std::unique_ptr<Codec> LZ4Codec::create(int level, CodecType type) {
-  return make_unique<LZ4Codec>(level, type);
+  return std::make_unique<LZ4Codec>(level, type);
 }
 
 LZ4Codec::LZ4Codec(int level, CodecType type) : Codec(type) {
   DCHECK(type == CodecType::LZ4 || type == CodecType::LZ4_VARINT_SIZE);
 
   switch (level) {
-  case COMPRESSION_LEVEL_FASTEST:
-  case COMPRESSION_LEVEL_DEFAULT:
-    level = 1;
-    break;
-  case COMPRESSION_LEVEL_BEST:
-    level = 2;
-    break;
+    case COMPRESSION_LEVEL_FASTEST:
+    case COMPRESSION_LEVEL_DEFAULT:
+      level = 1;
+      break;
+    case COMPRESSION_LEVEL_BEST:
+      level = 2;
+      break;
   }
   if (level < 1 || level > 2) {
     throw std::invalid_argument(to<std::string>(
@@ -243,31 +585,41 @@ uint64_t LZ4Codec::doMaxUncompressedLength() const {
   return LZ4_MAX_INPUT_SIZE;
 }
 
+uint64_t LZ4Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+  return LZ4_compressBound(uncompressedLength) +
+      (encodeSize() ? kMaxVarintLength64 : 0);
+}
+
 std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
-  std::unique_ptr<IOBuf> clone;
+  IOBuf clone;
   if (data->isChained()) {
     // LZ4 doesn't support streaming, so we have to coalesce
-    clone = data->clone();
-    clone->coalesce();
-    data = clone.get();
+    clone = data->cloneCoalescedAsValue();
+    data = &clone;
   }
 
-  uint32_t extraSize = encodeSize() ? kMaxVarintLength64 : 0;
-  auto out = IOBuf::create(extraSize + LZ4_compressBound(data->length()));
+  auto out = IOBuf::create(maxCompressedLength(data->length()));
   if (encodeSize()) {
     encodeVarintToIOBuf(data->length(), out.get());
   }
 
   int n;
+  auto input = reinterpret_cast<const char*>(data->data());
+  auto output = reinterpret_cast<char*>(out->writableTail());
+  const auto inputLength = data->length();
+#if LZ4_VERSION_NUMBER >= 10700
+  if (highCompression_) {
+    n = LZ4_compress_HC(input, output, inputLength, out->tailroom(), 0);
+  } else {
+    n = LZ4_compress_default(input, output, inputLength, out->tailroom());
+  }
+#else
   if (highCompression_) {
-    n = LZ4_compressHC(reinterpret_cast<const char*>(data->data()),
-                       reinterpret_cast<char*>(out->writableTail()),
-                       data->length());
+    n = LZ4_compressHC(input, output, inputLength);
   } else {
-    n = LZ4_compress(reinterpret_cast<const char*>(data->data()),
-                     reinterpret_cast<char*>(out->writableTail()),
-                     data->length());
+    n = LZ4_compress(input, output, inputLength);
   }
+#endif
 
   CHECK_GE(n, 0);
   CHECK_LE(n, out->capacity());
@@ -278,29 +630,26 @@ std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
 
 std::unique_ptr<IOBuf> LZ4Codec::doUncompress(
     const IOBuf* data,
-    uint64_t uncompressedLength) {
-  std::unique_ptr<IOBuf> clone;
+    Optional<uint64_t> uncompressedLength) {
+  IOBuf clone;
   if (data->isChained()) {
     // LZ4 doesn't support streaming, so we have to coalesce
-    clone = data->clone();
-    clone->coalesce();
-    data = clone.get();
+    clone = data->cloneCoalescedAsValue();
+    data = &clone;
   }
 
   folly::io::Cursor cursor(data);
   uint64_t actualUncompressedLength;
   if (encodeSize()) {
     actualUncompressedLength = decodeVarintFromCursor(cursor);
-    if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
-        uncompressedLength != actualUncompressedLength) {
+    if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
       throw std::runtime_error("LZ4Codec: invalid uncompressed length");
     }
   } else {
-    actualUncompressedLength = uncompressedLength;
-    if (actualUncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH ||
-        actualUncompressedLength > maxUncompressedLength()) {
-      throw std::runtime_error("LZ4Codec: invalid uncompressed length");
-    }
+    // Invariants
+    DCHECK(uncompressedLength.hasValue());
+    DCHECK(*uncompressedLength <= maxUncompressedLength());
+    actualUncompressedLength = *uncompressedLength;
   }
 
   auto sp = StringPiece{cursor.peekBytes()};
@@ -319,7 +668,187 @@ std::unique_ptr<IOBuf> LZ4Codec::doUncompress(
   return out;
 }
 
-#endif  // FOLLY_HAVE_LIBLZ4
+#if LZ4_VERSION_NUMBER >= 10301
+
+class LZ4FrameCodec final : public Codec {
+ public:
+  static std::unique_ptr<Codec> create(int level, CodecType type);
+  explicit LZ4FrameCodec(int level, CodecType type);
+  ~LZ4FrameCodec() override;
+
+  std::vector<std::string> validPrefixes() const override;
+  bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
+      const override;
+
+ private:
+  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
+
+  std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
+  std::unique_ptr<IOBuf> doUncompress(
+      const IOBuf* data,
+      Optional<uint64_t> uncompressedLength) override;
+
+  // Reset the dctx_ if it is dirty or null.
+  void resetDCtx();
+
+  int level_;
+  LZ4F_decompressionContext_t dctx_{nullptr};
+  bool dirty_{false};
+};
+
+/* static */ std::unique_ptr<Codec> LZ4FrameCodec::create(
+    int level,
+    CodecType type) {
+  return std::make_unique<LZ4FrameCodec>(level, type);
+}
+
+static constexpr uint32_t kLZ4FrameMagicLE = 0x184D2204;
+
+std::vector<std::string> LZ4FrameCodec::validPrefixes() const {
+  return {prefixToStringLE(kLZ4FrameMagicLE)};
+}
+
+bool LZ4FrameCodec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
+  return dataStartsWithLE(data, kLZ4FrameMagicLE);
+}
+
+uint64_t LZ4FrameCodec::doMaxCompressedLength(
+    uint64_t uncompressedLength) const {
+  LZ4F_preferences_t prefs{};
+  prefs.compressionLevel = level_;
+  prefs.frameInfo.contentSize = uncompressedLength;
+  return LZ4F_compressFrameBound(uncompressedLength, &prefs);
+}
+
+static size_t lz4FrameThrowOnError(size_t code) {
+  if (LZ4F_isError(code)) {
+    throw std::runtime_error(
+        to<std::string>("LZ4Frame error: ", LZ4F_getErrorName(code)));
+  }
+  return code;
+}
+
+void LZ4FrameCodec::resetDCtx() {
+  if (dctx_ && !dirty_) {
+    return;
+  }
+  if (dctx_) {
+    LZ4F_freeDecompressionContext(dctx_);
+  }
+  lz4FrameThrowOnError(LZ4F_createDecompressionContext(&dctx_, 100));
+  dirty_ = false;
+}
+
+LZ4FrameCodec::LZ4FrameCodec(int level, CodecType type) : Codec(type) {
+  DCHECK(type == CodecType::LZ4_FRAME);
+  switch (level) {
+    case COMPRESSION_LEVEL_FASTEST:
+    case COMPRESSION_LEVEL_DEFAULT:
+      level_ = 0;
+      break;
+    case COMPRESSION_LEVEL_BEST:
+      level_ = 16;
+      break;
+    default:
+      level_ = level;
+      break;
+  }
+}
+
+LZ4FrameCodec::~LZ4FrameCodec() {
+  if (dctx_) {
+    LZ4F_freeDecompressionContext(dctx_);
+  }
+}
+
+std::unique_ptr<IOBuf> LZ4FrameCodec::doCompress(const IOBuf* data) {
+  // LZ4 Frame compression doesn't support streaming so we have to coalesce
+  IOBuf clone;
+  if (data->isChained()) {
+    clone = data->cloneCoalescedAsValue();
+    data = &clone;
+  }
+  // Set preferences
+  const auto uncompressedLength = data->length();
+  LZ4F_preferences_t prefs{};
+  prefs.compressionLevel = level_;
+  prefs.frameInfo.contentSize = uncompressedLength;
+  // Compress
+  auto buf = IOBuf::create(maxCompressedLength(uncompressedLength));
+  const size_t written = lz4FrameThrowOnError(LZ4F_compressFrame(
+      buf->writableTail(),
+      buf->tailroom(),
+      data->data(),
+      data->length(),
+      &prefs));
+  buf->append(written);
+  return buf;
+}
+
+std::unique_ptr<IOBuf> LZ4FrameCodec::doUncompress(
+    const IOBuf* data,
+    Optional<uint64_t> uncompressedLength) {
+  // Reset the dctx if any errors have occurred
+  resetDCtx();
+  // Coalesce the data
+  ByteRange in = *data->begin();
+  IOBuf clone;
+  if (data->isChained()) {
+    clone = data->cloneCoalescedAsValue();
+    in = clone.coalesce();
+  }
+  data = nullptr;
+  // Select decompression options
+  LZ4F_decompressOptions_t options;
+  options.stableDst = 1;
+  // Select blockSize and growthSize for the IOBufQueue
+  IOBufQueue queue(IOBufQueue::cacheChainLength());
+  auto blockSize = uint64_t{64} << 10;
+  auto growthSize = uint64_t{4} << 20;
+  if (uncompressedLength) {
+    // Allocate uncompressedLength in one chunk (up to 64 MB)
+    const auto allocateSize = std::min(*uncompressedLength, uint64_t{64} << 20);
+    queue.preallocate(allocateSize, allocateSize);
+    blockSize = std::min(*uncompressedLength, blockSize);
+    growthSize = std::min(*uncompressedLength, growthSize);
+  } else {
+    // Reduce growthSize for small data
+    const auto guessUncompressedLen =
+        4 * std::max<uint64_t>(blockSize, in.size());
+    growthSize = std::min(guessUncompressedLen, growthSize);
+  }
+  // Once LZ4_decompress() is called, the dctx_ cannot be reused until it
+  // returns 0
+  dirty_ = true;
+  // Decompress until the frame is over
+  size_t code = 0;
+  do {
+    // Allocate enough space to decompress at least a block
+    void* out;
+    size_t outSize;
+    std::tie(out, outSize) = queue.preallocate(blockSize, growthSize);
+    // Decompress
+    size_t inSize = in.size();
+    code = lz4FrameThrowOnError(
+        LZ4F_decompress(dctx_, out, &outSize, in.data(), &inSize, &options));
+    if (in.empty() && outSize == 0 && code != 0) {
+      // We passed no input, no output was produced, and the frame isn't over
+      // No more forward progress is possible
+      throw std::runtime_error("LZ4Frame error: Incomplete frame");
+    }
+    in.uncheckedAdvance(inSize);
+    queue.postallocate(outSize);
+  } while (code != 0);
+  // At this point the decompression context can be reused
+  dirty_ = false;
+  if (uncompressedLength && queue.chainLength() != *uncompressedLength) {
+    throw std::runtime_error("LZ4Frame error: Invalid uncompressedLength");
+  }
+  return queue.move();
+}
+
+#endif // LZ4_VERSION_NUMBER >= 10301
+#endif // FOLLY_HAVE_LIBLZ4
 
 #if FOLLY_HAVE_LIBSNAPPY
 
@@ -369,23 +898,24 @@ class SnappyCodec final : public Codec {
 
  private:
   uint64_t doMaxUncompressedLength() const override;
+  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
   std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
   std::unique_ptr<IOBuf> doUncompress(
       const IOBuf* data,
-      uint64_t uncompressedLength) override;
+      Optional<uint64_t> uncompressedLength) override;
 };
 
 std::unique_ptr<Codec> SnappyCodec::create(int level, CodecType type) {
-  return make_unique<SnappyCodec>(level, type);
+  return std::make_unique<SnappyCodec>(level, type);
 }
 
 SnappyCodec::SnappyCodec(int level, CodecType type) : Codec(type) {
   DCHECK(type == CodecType::SNAPPY);
   switch (level) {
-  case COMPRESSION_LEVEL_FASTEST:
-  case COMPRESSION_LEVEL_DEFAULT:
-  case COMPRESSION_LEVEL_BEST:
-    level = 1;
+    case COMPRESSION_LEVEL_FASTEST:
+    case COMPRESSION_LEVEL_DEFAULT:
+    case COMPRESSION_LEVEL_BEST:
+      level = 1;
   }
   if (level != 1) {
     throw std::invalid_argument(to<std::string>(
@@ -398,10 +928,13 @@ uint64_t SnappyCodec::doMaxUncompressedLength() const {
   return std::numeric_limits<uint32_t>::max();
 }
 
+uint64_t SnappyCodec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+  return snappy::MaxCompressedLength(uncompressedLength);
+}
+
 std::unique_ptr<IOBuf> SnappyCodec::doCompress(const IOBuf* data) {
   IOBufSnappySource source(data);
-  auto out =
-    IOBuf::create(snappy::MaxCompressedLength(source.Available()));
+  auto out = IOBuf::create(maxCompressedLength(source.Available()));
 
   snappy::UncheckedByteArraySink sink(reinterpret_cast<char*>(
       out->writableTail()));
@@ -413,8 +946,9 @@ std::unique_ptr<IOBuf> SnappyCodec::doCompress(const IOBuf* data) {
   return out;
 }
 
-std::unique_ptr<IOBuf> SnappyCodec::doUncompress(const IOBuf* data,
-                                                 uint64_t uncompressedLength) {
+std::unique_ptr<IOBuf> SnappyCodec::doUncompress(
+    const IOBuf* data,
+    Optional<uint64_t> uncompressedLength) {
   uint32_t actualUncompressedLength = 0;
 
   {
@@ -422,8 +956,7 @@ std::unique_ptr<IOBuf> SnappyCodec::doUncompress(const IOBuf* data,
     if (!snappy::GetUncompressedLength(&source, &actualUncompressedLength)) {
       throw std::runtime_error("snappy::GetUncompressedLength failed");
     }
-    if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
-        uncompressedLength != actualUncompressedLength) {
+    if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
       throw std::runtime_error("snappy: invalid uncompressed length");
     }
   }
@@ -444,666 +977,1117 @@ std::unique_ptr<IOBuf> SnappyCodec::doUncompress(const IOBuf* data,
 
 #endif  // FOLLY_HAVE_LIBSNAPPY
 
-#if FOLLY_HAVE_LIBZ
+#if FOLLY_HAVE_LIBLZMA
+
 /**
- * Zlib codec
+ * LZMA2 compression
  */
-class ZlibCodec final : public Codec {
+class LZMA2StreamCodec final : public StreamCodec {
  public:
-  static std::unique_ptr<Codec> create(int level, CodecType type);
-  explicit ZlibCodec(int level, CodecType type);
+  static std::unique_ptr<Codec> createCodec(int level, CodecType type);
+  static std::unique_ptr<StreamCodec> createStream(int level, CodecType type);
+  explicit LZMA2StreamCodec(int level, CodecType type);
+  ~LZMA2StreamCodec() override;
+
+  std::vector<std::string> validPrefixes() const override;
+  bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
+      const override;
 
  private:
-  std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
-  std::unique_ptr<IOBuf> doUncompress(
-      const IOBuf* data,
-      uint64_t uncompressedLength) override;
+  bool doNeedsDataLength() const override;
+  uint64_t doMaxUncompressedLength() const override;
+  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
+
+  bool encodeSize() const {
+    return type() == CodecType::LZMA2_VARINT_SIZE;
+  }
+
+  void doResetStream() override;
+  bool doCompressStream(
+      ByteRange& input,
+      MutableByteRange& output,
+      StreamCodec::FlushOp flushOp) override;
+  bool doUncompressStream(
+      ByteRange& input,
+      MutableByteRange& output,
+      StreamCodec::FlushOp flushOp) override;
+
+  void resetCStream();
+  void resetDStream();
+
+  bool decodeAndCheckVarint(ByteRange& input);
+  bool flushVarintBuffer(MutableByteRange& output);
+  void resetVarintBuffer();
 
-  std::unique_ptr<IOBuf> addOutputBuffer(z_stream* stream, uint32_t length);
-  bool doInflate(z_stream* stream, IOBuf* head, uint32_t bufferLength);
+  Optional<lzma_stream> cstream_{};
+  Optional<lzma_stream> dstream_{};
+
+  std::array<uint8_t, kMaxVarintLength64> varintBuffer_;
+  ByteRange varintToEncode_;
+  size_t varintBufferPos_{0};
 
   int level_;
+  bool needReset_{true};
+  bool needDecodeSize_{false};
 };
 
-std::unique_ptr<Codec> ZlibCodec::create(int level, CodecType type) {
-  return make_unique<ZlibCodec>(level, type);
+static constexpr uint64_t kLZMA2MagicLE = 0x005A587A37FD;
+static constexpr unsigned kLZMA2MagicBytes = 6;
+
+std::vector<std::string> LZMA2StreamCodec::validPrefixes() const {
+  if (type() == CodecType::LZMA2_VARINT_SIZE) {
+    return {};
+  }
+  return {prefixToStringLE(kLZMA2MagicLE, kLZMA2MagicBytes)};
+}
+
+bool LZMA2StreamCodec::doNeedsDataLength() const {
+  return encodeSize();
+}
+
+bool LZMA2StreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
+    const {
+  if (type() == CodecType::LZMA2_VARINT_SIZE) {
+    return false;
+  }
+  // Returns false for all inputs less than 8 bytes.
+  // This is okay, because no valid LZMA2 streams are less than 8 bytes.
+  return dataStartsWithLE(data, kLZMA2MagicLE, kLZMA2MagicBytes);
+}
+
+std::unique_ptr<Codec> LZMA2StreamCodec::createCodec(
+    int level,
+    CodecType type) {
+  return make_unique<LZMA2StreamCodec>(level, type);
+}
+
+std::unique_ptr<StreamCodec> LZMA2StreamCodec::createStream(
+    int level,
+    CodecType type) {
+  return make_unique<LZMA2StreamCodec>(level, type);
 }
 
-ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) {
-  DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP);
+LZMA2StreamCodec::LZMA2StreamCodec(int level, CodecType type)
+    : StreamCodec(type) {
+  DCHECK(type == CodecType::LZMA2 || type == CodecType::LZMA2_VARINT_SIZE);
   switch (level) {
-  case COMPRESSION_LEVEL_FASTEST:
-    level = 1;
-    break;
-  case COMPRESSION_LEVEL_DEFAULT:
-    level = Z_DEFAULT_COMPRESSION;
-    break;
-  case COMPRESSION_LEVEL_BEST:
-    level = 9;
-    break;
-  }
-  if (level != Z_DEFAULT_COMPRESSION && (level < 0 || level > 9)) {
-    throw std::invalid_argument(to<std::string>(
-        "ZlibCodec: invalid level: ", level));
+    case COMPRESSION_LEVEL_FASTEST:
+      level = 0;
+      break;
+    case COMPRESSION_LEVEL_DEFAULT:
+      level = LZMA_PRESET_DEFAULT;
+      break;
+    case COMPRESSION_LEVEL_BEST:
+      level = 9;
+      break;
+  }
+  if (level < 0 || level > 9) {
+    throw std::invalid_argument(
+        to<std::string>("LZMA2Codec: invalid level: ", level));
   }
   level_ = level;
 }
 
-std::unique_ptr<IOBuf> ZlibCodec::addOutputBuffer(z_stream* stream,
-                                                  uint32_t length) {
-  CHECK_EQ(stream->avail_out, 0);
+LZMA2StreamCodec::~LZMA2StreamCodec() {
+  if (cstream_) {
+    lzma_end(cstream_.get_pointer());
+    cstream_.clear();
+  }
+  if (dstream_) {
+    lzma_end(dstream_.get_pointer());
+    dstream_.clear();
+  }
+}
 
-  auto buf = IOBuf::create(length);
-  buf->append(length);
+uint64_t LZMA2StreamCodec::doMaxUncompressedLength() const {
+  // From lzma/base.h: "Stream is roughly 8 EiB (2^63 bytes)"
+  return uint64_t(1) << 63;
+}
 
-  stream->next_out = buf->writableData();
-  stream->avail_out = buf->length();
+uint64_t LZMA2StreamCodec::doMaxCompressedLength(
+    uint64_t uncompressedLength) const {
+  return lzma_stream_buffer_bound(uncompressedLength) +
+      (encodeSize() ? kMaxVarintLength64 : 0);
+}
 
-  return buf;
+void LZMA2StreamCodec::doResetStream() {
+  needReset_ = true;
 }
 
-bool ZlibCodec::doInflate(z_stream* stream,
-                          IOBuf* head,
-                          uint32_t bufferLength) {
-  if (stream->avail_out == 0) {
-    head->prependChain(addOutputBuffer(stream, bufferLength));
+void LZMA2StreamCodec::resetCStream() {
+  if (!cstream_) {
+    cstream_.assign(LZMA_STREAM_INIT);
+  }
+  lzma_ret const rc =
+      lzma_easy_encoder(cstream_.get_pointer(), level_, LZMA_CHECK_NONE);
+  if (rc != LZMA_OK) {
+    throw std::runtime_error(folly::to<std::string>(
+        "LZMA2StreamCodec: lzma_easy_encoder error: ", rc));
   }
+}
 
-  int rc = inflate(stream, Z_NO_FLUSH);
+void LZMA2StreamCodec::resetDStream() {
+  if (!dstream_) {
+    dstream_.assign(LZMA_STREAM_INIT);
+  }
+  lzma_ret const rc = lzma_auto_decoder(
+      dstream_.get_pointer(), std::numeric_limits<uint64_t>::max(), 0);
+  if (rc != LZMA_OK) {
+    throw std::runtime_error(folly::to<std::string>(
+        "LZMA2StreamCodec: lzma_auto_decoder error: ", rc));
+  }
+}
 
+static lzma_ret lzmaThrowOnError(lzma_ret const rc) {
   switch (rc) {
-  case Z_OK:
-    break;
-  case Z_STREAM_END:
-    return true;
-  case Z_BUF_ERROR:
-  case Z_NEED_DICT:
-  case Z_DATA_ERROR:
-  case Z_MEM_ERROR:
-    throw std::runtime_error(to<std::string>(
-        "ZlibCodec: inflate error: ", rc, ": ", stream->msg));
-  default:
-    CHECK(false) << rc << ": " << stream->msg;
+    case LZMA_OK:
+    case LZMA_STREAM_END:
+    case LZMA_BUF_ERROR: // not fatal: returned if no progress was made twice
+      return rc;
+    default:
+      throw std::runtime_error(
+          to<std::string>("LZMA2StreamCodec: error: ", rc));
   }
-
-  return false;
 }
 
-std::unique_ptr<IOBuf> ZlibCodec::doCompress(const IOBuf* data) {
-  z_stream stream;
-  stream.zalloc = nullptr;
-  stream.zfree = nullptr;
-  stream.opaque = nullptr;
-
-  // Using deflateInit2() to support gzip.  "The windowBits parameter is the
-  // base two logarithm of the maximum window size (...) The default value is
-  // 15 (...) Add 16 to windowBits to write a simple gzip header and trailer
-  // around the compressed data instead of a zlib wrapper. The gzip header
-  // will have no file name, no extra data, no comment, no modification time
-  // (set to zero), no header crc, and the operating system will be set to 255
-  // (unknown)."
-  int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
-  // All other parameters (method, memLevel, strategy) get default values from
-  // the zlib manual.
-  int rc = deflateInit2(&stream,
-                        level_,
-                        Z_DEFLATED,
-                        windowBits,
-                        /* memLevel */ 8,
-                        Z_DEFAULT_STRATEGY);
-  if (rc != Z_OK) {
-    throw std::runtime_error(to<std::string>(
-        "ZlibCodec: deflateInit error: ", rc, ": ", stream.msg));
+static lzma_action lzmaTranslateFlush(StreamCodec::FlushOp flush) {
+  switch (flush) {
+    case StreamCodec::FlushOp::NONE:
+      return LZMA_RUN;
+    case StreamCodec::FlushOp::FLUSH:
+      return LZMA_SYNC_FLUSH;
+    case StreamCodec::FlushOp::END:
+      return LZMA_FINISH;
+    default:
+      throw std::invalid_argument("LZMA2StreamCodec: Invalid flush");
   }
+}
 
-  stream.next_in = stream.next_out = nullptr;
-  stream.avail_in = stream.avail_out = 0;
-  stream.total_in = stream.total_out = 0;
-
-  bool success = false;
-
+/**
+ * Flushes the varint buffer.
+ * Advances output by the number of bytes written.
+ * Returns true when flushing is complete.
+ */
+bool LZMA2StreamCodec::flushVarintBuffer(MutableByteRange& output) {
+  if (varintToEncode_.empty()) {
+    return true;
+  }
+  const size_t numBytesToCopy = std::min(varintToEncode_.size(), output.size());
+  if (numBytesToCopy > 0) {
+    memcpy(output.data(), varintToEncode_.data(), numBytesToCopy);
+  }
+  varintToEncode_.advance(numBytesToCopy);
+  output.advance(numBytesToCopy);
+  return varintToEncode_.empty();
+}
+
+bool LZMA2StreamCodec::doCompressStream(
+    ByteRange& input,
+    MutableByteRange& output,
+    StreamCodec::FlushOp flushOp) {
+  if (needReset_) {
+    resetCStream();
+    if (encodeSize()) {
+      varintBufferPos_ = 0;
+      size_t const varintSize =
+          encodeVarint(*uncompressedLength(), varintBuffer_.data());
+      varintToEncode_ = {varintBuffer_.data(), varintSize};
+    }
+    needReset_ = false;
+  }
+
+  if (!flushVarintBuffer(output)) {
+    return false;
+  }
+
+  cstream_->next_in = const_cast<uint8_t*>(input.data());
+  cstream_->avail_in = input.size();
+  cstream_->next_out = output.data();
+  cstream_->avail_out = output.size();
   SCOPE_EXIT {
-    int rc = deflateEnd(&stream);
-    // If we're here because of an exception, it's okay if some data
-    // got dropped.
-    CHECK(rc == Z_OK || (!success && rc == Z_DATA_ERROR))
-      << rc << ": " << stream.msg;
+    input.uncheckedAdvance(input.size() - cstream_->avail_in);
+    output.uncheckedAdvance(output.size() - cstream_->avail_out);
   };
+  lzma_ret const rc = lzmaThrowOnError(
+      lzma_code(cstream_.get_pointer(), lzmaTranslateFlush(flushOp)));
+  switch (flushOp) {
+    case StreamCodec::FlushOp::NONE:
+      return false;
+    case StreamCodec::FlushOp::FLUSH:
+      return cstream_->avail_in == 0 && cstream_->avail_out != 0;
+    case StreamCodec::FlushOp::END:
+      return rc == LZMA_STREAM_END;
+    default:
+      throw std::invalid_argument("LZMA2StreamCodec: invalid FlushOp");
+  }
+}
 
-  uint64_t uncompressedLength = data->computeChainDataLength();
-  uint64_t maxCompressedLength = deflateBound(&stream, uncompressedLength);
-
-  // Max 64MiB in one go
-  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20;    // 64MiB
-  constexpr uint32_t defaultBufferLength = uint32_t(4) << 20;     // 4MiB
+/**
+ * Attempts to decode a varint from input.
+ * The function advances input by the number of bytes read.
+ *
+ * If there are too many bytes and the varint is not valid, throw a
+ * runtime_error.
+ *
+ * If the uncompressed length was provided and a decoded varint does not match
+ * the provided length, throw a runtime_error.
+ *
+ * Returns true if the varint was successfully decoded and matches the
+ * uncompressed length if provided, and false if more bytes are needed.
+ */
+bool LZMA2StreamCodec::decodeAndCheckVarint(ByteRange& input) {
+  if (input.empty()) {
+    return false;
+  }
+  size_t const numBytesToCopy =
+      std::min(kMaxVarintLength64 - varintBufferPos_, input.size());
+  memcpy(varintBuffer_.data() + varintBufferPos_, input.data(), numBytesToCopy);
+
+  size_t const rangeSize = varintBufferPos_ + numBytesToCopy;
+  ByteRange range{varintBuffer_.data(), rangeSize};
+  auto const ret = tryDecodeVarint(range);
+
+  if (ret.hasValue()) {
+    size_t const varintSize = rangeSize - range.size();
+    input.advance(varintSize - varintBufferPos_);
+    if (uncompressedLength() && *uncompressedLength() != ret.value()) {
+      throw std::runtime_error("LZMA2StreamCodec: invalid uncompressed length");
+    }
+    return true;
+  } else if (ret.error() == DecodeVarintError::TooManyBytes) {
+    throw std::runtime_error("LZMA2StreamCodec: invalid uncompressed length");
+  } else {
+    // Too few bytes
+    input.advance(numBytesToCopy);
+    varintBufferPos_ += numBytesToCopy;
+    return false;
+  }
+}
 
-  auto out = addOutputBuffer(
-      &stream,
-      (maxCompressedLength <= maxSingleStepLength ?
-       maxCompressedLength :
-       defaultBufferLength));
-
-  for (auto& range : *data) {
-    uint64_t remaining = range.size();
-    uint64_t written = 0;
-    while (remaining) {
-      uint32_t step = (remaining > maxSingleStepLength ?
-                       maxSingleStepLength : remaining);
-      stream.next_in = const_cast<uint8_t*>(range.data() + written);
-      stream.avail_in = step;
-      remaining -= step;
-      written += step;
-
-      while (stream.avail_in != 0) {
-        if (stream.avail_out == 0) {
-          out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
-        }
-
-        rc = deflate(&stream, Z_NO_FLUSH);
-
-        CHECK_EQ(rc, Z_OK) << stream.msg;
-      }
+bool LZMA2StreamCodec::doUncompressStream(
+    ByteRange& input,
+    MutableByteRange& output,
+    StreamCodec::FlushOp flushOp) {
+  if (needReset_) {
+    resetDStream();
+    needReset_ = false;
+    needDecodeSize_ = encodeSize();
+    if (encodeSize()) {
+      // Reset buffer
+      varintBufferPos_ = 0;
     }
   }
 
-  do {
-    if (stream.avail_out == 0) {
-      out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
+  if (needDecodeSize_) {
+    // Try decoding the varint. If the input does not contain the entire varint,
+    // buffer the input. If the varint can not be decoded, fail.
+    if (!decodeAndCheckVarint(input)) {
+      return false;
     }
+    needDecodeSize_ = false;
+  }
 
-    rc = deflate(&stream, Z_FINISH);
-  } while (rc == Z_OK);
+  dstream_->next_in = const_cast<uint8_t*>(input.data());
+  dstream_->avail_in = input.size();
+  dstream_->next_out = output.data();
+  dstream_->avail_out = output.size();
+  SCOPE_EXIT {
+    input.advance(input.size() - dstream_->avail_in);
+    output.advance(output.size() - dstream_->avail_out);
+  };
 
-  CHECK_EQ(rc, Z_STREAM_END) << stream.msg;
+  lzma_ret rc;
+  switch (flushOp) {
+    case StreamCodec::FlushOp::NONE:
+    case StreamCodec::FlushOp::FLUSH:
+      rc = lzmaThrowOnError(lzma_code(dstream_.get_pointer(), LZMA_RUN));
+      break;
+    case StreamCodec::FlushOp::END:
+      rc = lzmaThrowOnError(lzma_code(dstream_.get_pointer(), LZMA_FINISH));
+      break;
+    default:
+      throw std::invalid_argument("LZMA2StreamCodec: invalid flush");
+  }
+  return rc == LZMA_STREAM_END;
+}
+#endif // FOLLY_HAVE_LIBLZMA
 
-  out->prev()->trimEnd(stream.avail_out);
+#ifdef FOLLY_HAVE_LIBZSTD
 
-  success = true;  // we survived
+namespace {
+void zstdFreeCStream(ZSTD_CStream* zcs) {
+  ZSTD_freeCStream(zcs);
+}
 
-  return out;
+void zstdFreeDStream(ZSTD_DStream* zds) {
+  ZSTD_freeDStream(zds);
+}
 }
 
-std::unique_ptr<IOBuf> ZlibCodec::doUncompress(const IOBuf* data,
-                                               uint64_t uncompressedLength) {
-  z_stream stream;
-  stream.zalloc = nullptr;
-  stream.zfree = nullptr;
-  stream.opaque = nullptr;
+/**
+ * ZSTD compression
+ */
+class ZSTDStreamCodec final : public StreamCodec {
+ public:
+  static std::unique_ptr<Codec> createCodec(int level, CodecType);
+  static std::unique_ptr<StreamCodec> createStream(int level, CodecType);
+  explicit ZSTDStreamCodec(int level, CodecType type);
 
-  // "The windowBits parameter is the base two logarithm of the maximum window
-  // size (...) The default value is 15 (...) add 16 to decode only the gzip
-  // format (the zlib format will return a Z_DATA_ERROR)."
-  int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
-  int rc = inflateInit2(&stream, windowBits);
-  if (rc != Z_OK) {
-    throw std::runtime_error(to<std::string>(
-        "ZlibCodec: inflateInit error: ", rc, ": ", stream.msg));
-  }
+  std::vector<std::string> validPrefixes() const override;
+  bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
+      const override;
 
-  stream.next_in = stream.next_out = nullptr;
-  stream.avail_in = stream.avail_out = 0;
-  stream.total_in = stream.total_out = 0;
+ private:
+  bool doNeedsUncompressedLength() const override;
+  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
+  Optional<uint64_t> doGetUncompressedLength(
+      IOBuf const* data,
+      Optional<uint64_t> uncompressedLength) const override;
+
+  void doResetStream() override;
+  bool doCompressStream(
+      ByteRange& input,
+      MutableByteRange& output,
+      StreamCodec::FlushOp flushOp) override;
+  bool doUncompressStream(
+      ByteRange& input,
+      MutableByteRange& output,
+      StreamCodec::FlushOp flushOp) override;
+
+  void resetCStream();
+  void resetDStream();
+
+  bool tryBlockCompress(ByteRange& input, MutableByteRange& output) const;
+  bool tryBlockUncompress(ByteRange& input, MutableByteRange& output) const;
 
-  bool success = false;
+  int level_;
+  bool needReset_{true};
+  std::unique_ptr<
+      ZSTD_CStream,
+      folly::static_function_deleter<ZSTD_CStream, &zstdFreeCStream>>
+      cstream_{nullptr};
+  std::unique_ptr<
+      ZSTD_DStream,
+      folly::static_function_deleter<ZSTD_DStream, &zstdFreeDStream>>
+      dstream_{nullptr};
+};
 
-  SCOPE_EXIT {
-    int rc = inflateEnd(&stream);
-    // If we're here because of an exception, it's okay if some data
-    // got dropped.
-    CHECK(rc == Z_OK || (!success && rc == Z_DATA_ERROR))
-      << rc << ": " << stream.msg;
-  };
+static constexpr uint32_t kZSTDMagicLE = 0xFD2FB528;
 
-  // Max 64MiB in one go
-  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20;    // 64MiB
-  constexpr uint32_t defaultBufferLength = uint32_t(4) << 20;     // 4MiB
+std::vector<std::string> ZSTDStreamCodec::validPrefixes() const {
+  return {prefixToStringLE(kZSTDMagicLE)};
+}
 
-  auto out = addOutputBuffer(
-      &stream,
-      ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
-        uncompressedLength <= maxSingleStepLength) ?
-       uncompressedLength :
-       defaultBufferLength));
-
-  bool streamEnd = false;
-  for (auto& range : *data) {
-    if (range.empty()) {
-      continue;
-    }
+bool ZSTDStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
+    const {
+  return dataStartsWithLE(data, kZSTDMagicLE);
+}
 
-    stream.next_in = const_cast<uint8_t*>(range.data());
-    stream.avail_in = range.size();
+std::unique_ptr<Codec> ZSTDStreamCodec::createCodec(int level, CodecType type) {
+  return make_unique<ZSTDStreamCodec>(level, type);
+}
 
-    while (stream.avail_in != 0) {
-      if (streamEnd) {
-        throw std::runtime_error(to<std::string>(
-            "ZlibCodec: junk after end of data"));
-      }
+std::unique_ptr<StreamCodec> ZSTDStreamCodec::createStream(
+    int level,
+    CodecType type) {
+  return make_unique<ZSTDStreamCodec>(level, type);
+}
 
-      streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
+ZSTDStreamCodec::ZSTDStreamCodec(int level, CodecType type)
+    : StreamCodec(type) {
+  DCHECK(type == CodecType::ZSTD);
+  switch (level) {
+    case COMPRESSION_LEVEL_FASTEST:
+      level = 1;
+      break;
+    case COMPRESSION_LEVEL_DEFAULT:
+      level = 1;
+      break;
+    case COMPRESSION_LEVEL_BEST:
+      level = 19;
+      break;
+  }
+  if (level < 1 || level > ZSTD_maxCLevel()) {
+    throw std::invalid_argument(
+        to<std::string>("ZSTD: invalid level: ", level));
+  }
+  level_ = level;
+}
+
+bool ZSTDStreamCodec::doNeedsUncompressedLength() const {
+  return false;
+}
+
+uint64_t ZSTDStreamCodec::doMaxCompressedLength(
+    uint64_t uncompressedLength) const {
+  return ZSTD_compressBound(uncompressedLength);
+}
+
+void zstdThrowIfError(size_t rc) {
+  if (!ZSTD_isError(rc)) {
+    return;
+  }
+  throw std::runtime_error(
+      to<std::string>("ZSTD returned an error: ", ZSTD_getErrorName(rc)));
+}
+
+Optional<uint64_t> ZSTDStreamCodec::doGetUncompressedLength(
+    IOBuf const* data,
+    Optional<uint64_t> uncompressedLength) const {
+  // Read decompressed size from frame if available in first IOBuf.
+  auto const decompressedSize =
+      ZSTD_getDecompressedSize(data->data(), data->length());
+  if (decompressedSize != 0) {
+    if (uncompressedLength && *uncompressedLength != decompressedSize) {
+      throw std::runtime_error("ZSTD: invalid uncompressed length");
     }
+    uncompressedLength = decompressedSize;
   }
+  return uncompressedLength;
+}
 
-  while (!streamEnd) {
-    streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
+void ZSTDStreamCodec::doResetStream() {
+  needReset_ = true;
+}
+
+bool ZSTDStreamCodec::tryBlockCompress(
+    ByteRange& input,
+    MutableByteRange& output) const {
+  DCHECK(needReset_);
+  // We need to know that we have enough output space to use block compression
+  if (output.size() < ZSTD_compressBound(input.size())) {
+    return false;
   }
+  size_t const length = ZSTD_compress(
+      output.data(), output.size(), input.data(), input.size(), level_);
+  zstdThrowIfError(length);
+  input.uncheckedAdvance(input.size());
+  output.uncheckedAdvance(length);
+  return true;
+}
 
-  out->prev()->trimEnd(stream.avail_out);
+void ZSTDStreamCodec::resetCStream() {
+  if (!cstream_) {
+    cstream_.reset(ZSTD_createCStream());
+    if (!cstream_) {
+      throw std::bad_alloc{};
+    }
+  }
+  // Advanced API usage works for all supported versions of zstd.
+  // Required to set contentSizeFlag.
+  auto params = ZSTD_getParams(level_, uncompressedLength().value_or(0), 0);
+  params.fParams.contentSizeFlag = uncompressedLength().hasValue();
+  zstdThrowIfError(ZSTD_initCStream_advanced(
+      cstream_.get(), nullptr, 0, params, uncompressedLength().value_or(0)));
+}
 
-  if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
-      uncompressedLength != stream.total_out) {
-    throw std::runtime_error(to<std::string>(
-        "ZlibCodec: invalid uncompressed length"));
+bool ZSTDStreamCodec::doCompressStream(
+    ByteRange& input,
+    MutableByteRange& output,
+    StreamCodec::FlushOp flushOp) {
+  if (needReset_) {
+    // If we are given all the input in one chunk try to use block compression
+    if (flushOp == StreamCodec::FlushOp::END &&
+        tryBlockCompress(input, output)) {
+      return true;
+    }
+    resetCStream();
+    needReset_ = false;
+  }
+  ZSTD_inBuffer in = {input.data(), input.size(), 0};
+  ZSTD_outBuffer out = {output.data(), output.size(), 0};
+  SCOPE_EXIT {
+    input.uncheckedAdvance(in.pos);
+    output.uncheckedAdvance(out.pos);
+  };
+  if (flushOp == StreamCodec::FlushOp::NONE || !input.empty()) {
+    zstdThrowIfError(ZSTD_compressStream(cstream_.get(), &out, &in));
+  }
+  if (in.pos == in.size && flushOp != StreamCodec::FlushOp::NONE) {
+    size_t rc;
+    switch (flushOp) {
+      case StreamCodec::FlushOp::FLUSH:
+        rc = ZSTD_flushStream(cstream_.get(), &out);
+        break;
+      case StreamCodec::FlushOp::END:
+        rc = ZSTD_endStream(cstream_.get(), &out);
+        break;
+      default:
+        throw std::invalid_argument("ZSTD: invalid FlushOp");
+    }
+    zstdThrowIfError(rc);
+    if (rc == 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ZSTDStreamCodec::tryBlockUncompress(
+    ByteRange& input,
+    MutableByteRange& output) const {
+  DCHECK(needReset_);
+#if ZSTD_VERSION_NUMBER < 10104
+  // We require ZSTD_findFrameCompressedSize() to perform this optimization.
+  return false;
+#else
+  // We need to know the uncompressed length and have enough output space.
+  if (!uncompressedLength() || output.size() < *uncompressedLength()) {
+    return false;
   }
+  size_t const compressedLength =
+      ZSTD_findFrameCompressedSize(input.data(), input.size());
+  zstdThrowIfError(compressedLength);
+  size_t const length = ZSTD_decompress(
+      output.data(), *uncompressedLength(), input.data(), compressedLength);
+  zstdThrowIfError(length);
+  if (length != *uncompressedLength()) {
+    throw std::runtime_error("ZSTDStreamCodec: Incorrect uncompressed length");
+  }
+  input.uncheckedAdvance(compressedLength);
+  output.uncheckedAdvance(length);
+  return true;
+#endif
+}
 
-  success = true;  // we survived
+void ZSTDStreamCodec::resetDStream() {
+  if (!dstream_) {
+    dstream_.reset(ZSTD_createDStream());
+    if (!dstream_) {
+      throw std::bad_alloc{};
+    }
+  }
+  zstdThrowIfError(ZSTD_initDStream(dstream_.get()));
+}
 
-  return out;
+bool ZSTDStreamCodec::doUncompressStream(
+    ByteRange& input,
+    MutableByteRange& output,
+    StreamCodec::FlushOp flushOp) {
+  if (needReset_) {
+    // If we are given all the input in one chunk try to use block uncompression
+    if (flushOp == StreamCodec::FlushOp::END &&
+        tryBlockUncompress(input, output)) {
+      return true;
+    }
+    resetDStream();
+    needReset_ = false;
+  }
+  ZSTD_inBuffer in = {input.data(), input.size(), 0};
+  ZSTD_outBuffer out = {output.data(), output.size(), 0};
+  SCOPE_EXIT {
+    input.uncheckedAdvance(in.pos);
+    output.uncheckedAdvance(out.pos);
+  };
+  size_t const rc = ZSTD_decompressStream(dstream_.get(), &out, &in);
+  zstdThrowIfError(rc);
+  return rc == 0;
 }
 
-#endif  // FOLLY_HAVE_LIBZ
+#endif // FOLLY_HAVE_LIBZSTD
 
-#if FOLLY_HAVE_LIBLZMA
+#if FOLLY_HAVE_LIBBZ2
 
-/**
- * LZMA2 compression
- */
-class LZMA2Codec final : public Codec {
+class Bzip2Codec final : public Codec {
  public:
   static std::unique_ptr<Codec> create(int level, CodecType type);
-  explicit LZMA2Codec(int level, CodecType type);
+  explicit Bzip2Codec(int level, CodecType type);
 
- private:
-  bool doNeedsUncompressedLength() const override;
-  uint64_t doMaxUncompressedLength() const override;
+  std::vector<std::string> validPrefixes() const override;
+  bool canUncompress(IOBuf const* data, Optional<uint64_t> uncompressedLength)
+      const override;
 
-  bool encodeSize() const { return type() == CodecType::LZMA2_VARINT_SIZE; }
-
-  std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
+ private:
+  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
+  std::unique_ptr<IOBuf> doCompress(IOBuf const* data) override;
   std::unique_ptr<IOBuf> doUncompress(
-      const IOBuf* data,
-      uint64_t uncompressedLength) override;
-
-  std::unique_ptr<IOBuf> addOutputBuffer(lzma_stream* stream, size_t length);
-  bool doInflate(lzma_stream* stream, IOBuf* head, size_t bufferLength);
+      IOBuf const* data,
+      Optional<uint64_t> uncompressedLength) override;
 
   int level_;
 };
 
-std::unique_ptr<Codec> LZMA2Codec::create(int level, CodecType type) {
-  return make_unique<LZMA2Codec>(level, type);
+/* static */ std::unique_ptr<Codec> Bzip2Codec::create(
+    int level,
+    CodecType type) {
+  return std::make_unique<Bzip2Codec>(level, type);
 }
 
-LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) {
-  DCHECK(type == CodecType::LZMA2 || type == CodecType::LZMA2_VARINT_SIZE);
+Bzip2Codec::Bzip2Codec(int level, CodecType type) : Codec(type) {
+  DCHECK(type == CodecType::BZIP2);
   switch (level) {
-  case COMPRESSION_LEVEL_FASTEST:
-    level = 0;
-    break;
-  case COMPRESSION_LEVEL_DEFAULT:
-    level = LZMA_PRESET_DEFAULT;
-    break;
-  case COMPRESSION_LEVEL_BEST:
-    level = 9;
-    break;
+    case COMPRESSION_LEVEL_FASTEST:
+      level = 1;
+      break;
+    case COMPRESSION_LEVEL_DEFAULT:
+      level = 9;
+      break;
+    case COMPRESSION_LEVEL_BEST:
+      level = 9;
+      break;
   }
-  if (level < 0 || level > 9) {
-    throw std::invalid_argument(to<std::string>(
-        "LZMA2Codec: invalid level: ", level));
+  if (level < 1 || level > 9) {
+    throw std::invalid_argument(
+        to<std::string>("Bzip2: invalid level: ", level));
   }
   level_ = level;
 }
 
-bool LZMA2Codec::doNeedsUncompressedLength() const {
-  return !encodeSize();
+static uint32_t constexpr kBzip2MagicLE = 0x685a42;
+static uint64_t constexpr kBzip2MagicBytes = 3;
+
+std::vector<std::string> Bzip2Codec::validPrefixes() const {
+  return {prefixToStringLE(kBzip2MagicLE, kBzip2MagicBytes)};
 }
 
-uint64_t LZMA2Codec::doMaxUncompressedLength() const {
-  // From lzma/base.h: "Stream is roughly 8 EiB (2^63 bytes)"
-  return uint64_t(1) << 63;
+bool Bzip2Codec::canUncompress(IOBuf const* data, Optional<uint64_t>) const {
+  return dataStartsWithLE(data, kBzip2MagicLE, kBzip2MagicBytes);
 }
 
-std::unique_ptr<IOBuf> LZMA2Codec::addOutputBuffer(
-    lzma_stream* stream,
-    size_t length) {
+uint64_t Bzip2Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+  // http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html#bzbufftobuffcompress
+  //   To guarantee that the compressed data will fit in its buffer, allocate an
+  //   output buffer of size 1% larger than the uncompressed data, plus six
+  //   hundred extra bytes.
+  return uncompressedLength + uncompressedLength / 100 + 600;
+}
+
+static bz_stream createBzStream() {
+  bz_stream stream;
+  stream.bzalloc = nullptr;
+  stream.bzfree = nullptr;
+  stream.opaque = nullptr;
+  stream.next_in = stream.next_out = nullptr;
+  stream.avail_in = stream.avail_out = 0;
+  return stream;
+}
+
+// Throws on error condition, otherwise returns the code.
+static int bzCheck(int const rc) {
+  switch (rc) {
+    case BZ_OK:
+    case BZ_RUN_OK:
+    case BZ_FLUSH_OK:
+    case BZ_FINISH_OK:
+    case BZ_STREAM_END:
+      return rc;
+    default:
+      throw std::runtime_error(to<std::string>("Bzip2 error: ", rc));
+  }
+}
 
-  CHECK_EQ(stream->avail_out, 0);
+static std::unique_ptr<IOBuf> addOutputBuffer(
+    bz_stream* stream,
+    uint64_t const bufferLength) {
+  DCHECK_LE(bufferLength, std::numeric_limits<unsigned>::max());
+  DCHECK_EQ(stream->avail_out, 0);
 
-  auto buf = IOBuf::create(length);
-  buf->append(length);
+  auto buf = IOBuf::create(bufferLength);
+  buf->append(buf->capacity());
 
-  stream->next_out = buf->writableData();
+  stream->next_out = reinterpret_cast<char*>(buf->writableData());
   stream->avail_out = buf->length();
 
   return buf;
 }
 
-std::unique_ptr<IOBuf> LZMA2Codec::doCompress(const IOBuf* data) {
-  lzma_ret rc;
-  lzma_stream stream = LZMA_STREAM_INIT;
-
-  rc = lzma_easy_encoder(&stream, level_, LZMA_CHECK_NONE);
-  if (rc != LZMA_OK) {
-    throw std::runtime_error(folly::to<std::string>(
-      "LZMA2Codec: lzma_easy_encoder error: ", rc));
-  }
-
-  SCOPE_EXIT { lzma_end(&stream); };
-
-  uint64_t uncompressedLength = data->computeChainDataLength();
-  uint64_t maxCompressedLength = lzma_stream_buffer_bound(uncompressedLength);
+std::unique_ptr<IOBuf> Bzip2Codec::doCompress(IOBuf const* data) {
+  bz_stream stream = createBzStream();
+  bzCheck(BZ2_bzCompressInit(&stream, level_, 0, 0));
+  SCOPE_EXIT {
+    bzCheck(BZ2_bzCompressEnd(&stream));
+  };
 
-  // Max 64MiB in one go
-  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20;    // 64MiB
-  constexpr uint32_t defaultBufferLength = uint32_t(4) << 20;     // 4MiB
+  uint64_t const uncompressedLength = data->computeChainDataLength();
+  uint64_t const maxCompressedLen = maxCompressedLength(uncompressedLength);
+  uint64_t constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MiB
+  uint64_t constexpr kDefaultBufferLength = uint64_t(4) << 20;
 
   auto out = addOutputBuffer(
-    &stream,
-    (maxCompressedLength <= maxSingleStepLength ?
-     maxCompressedLength :
-     defaultBufferLength));
-
-  if (encodeSize()) {
-    auto size = IOBuf::createCombined(kMaxVarintLength64);
-    encodeVarintToIOBuf(uncompressedLength, size.get());
-    size->appendChain(std::move(out));
-    out = std::move(size);
-  }
-
-  for (auto& range : *data) {
-    if (range.empty()) {
-      continue;
-    }
+      &stream,
+      maxCompressedLen <= kMaxSingleStepLength ? maxCompressedLen
+                                               : kDefaultBufferLength);
 
-    stream.next_in = const_cast<uint8_t*>(range.data());
-    stream.avail_in = range.size();
+  for (auto range : *data) {
+    while (!range.empty()) {
+      auto const inSize = std::min<size_t>(range.size(), kMaxSingleStepLength);
+      stream.next_in =
+          const_cast<char*>(reinterpret_cast<char const*>(range.data()));
+      stream.avail_in = inSize;
 
-    while (stream.avail_in != 0) {
       if (stream.avail_out == 0) {
-        out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
+        out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
       }
 
-      rc = lzma_code(&stream, LZMA_RUN);
-
-      if (rc != LZMA_OK) {
-        throw std::runtime_error(folly::to<std::string>(
-          "LZMA2Codec: lzma_code error: ", rc));
-      }
+      bzCheck(BZ2_bzCompress(&stream, BZ_RUN));
+      range.uncheckedAdvance(inSize - stream.avail_in);
     }
   }
-
   do {
     if (stream.avail_out == 0) {
-      out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
+      out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
     }
-
-    rc = lzma_code(&stream, LZMA_FINISH);
-  } while (rc == LZMA_OK);
-
-  if (rc != LZMA_STREAM_END) {
-    throw std::runtime_error(folly::to<std::string>(
-      "LZMA2Codec: lzma_code ended with error: ", rc));
-  }
+  } while (bzCheck(BZ2_bzCompress(&stream, BZ_FINISH)) != BZ_STREAM_END);
 
   out->prev()->trimEnd(stream.avail_out);
 
   return out;
 }
 
-bool LZMA2Codec::doInflate(lzma_stream* stream,
-                          IOBuf* head,
-                          size_t bufferLength) {
-  if (stream->avail_out == 0) {
-    head->prependChain(addOutputBuffer(stream, bufferLength));
-  }
-
-  lzma_ret rc = lzma_code(stream, LZMA_RUN);
-
-  switch (rc) {
-  case LZMA_OK:
-    break;
-  case LZMA_STREAM_END:
-    return true;
-  default:
-    throw std::runtime_error(to<std::string>(
-        "LZMA2Codec: lzma_code error: ", rc));
-  }
-
-  return false;
-}
-
-std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
-                                               uint64_t uncompressedLength) {
-  lzma_ret rc;
-  lzma_stream stream = LZMA_STREAM_INIT;
-
-  rc = lzma_auto_decoder(&stream, std::numeric_limits<uint64_t>::max(), 0);
-  if (rc != LZMA_OK) {
-    throw std::runtime_error(folly::to<std::string>(
-      "LZMA2Codec: lzma_auto_decoder error: ", rc));
-  }
-
-  SCOPE_EXIT { lzma_end(&stream); };
+std::unique_ptr<IOBuf> Bzip2Codec::doUncompress(
+    const IOBuf* data,
+    Optional<uint64_t> uncompressedLength) {
+  bz_stream stream = createBzStream();
+  bzCheck(BZ2_bzDecompressInit(&stream, 0, 0));
+  SCOPE_EXIT {
+    bzCheck(BZ2_bzDecompressEnd(&stream));
+  };
 
-  // Max 64MiB in one go
-  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20;    // 64MiB
-  constexpr uint32_t defaultBufferLength = uint32_t(4) << 20;     // 4MiB
-
-  folly::io::Cursor cursor(data);
-  uint64_t actualUncompressedLength;
-  if (encodeSize()) {
-    actualUncompressedLength = decodeVarintFromCursor(cursor);
-    if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
-        uncompressedLength != actualUncompressedLength) {
-      throw std::runtime_error("LZMA2Codec: invalid uncompressed length");
-    }
-  } else {
-    actualUncompressedLength = uncompressedLength;
-    DCHECK_NE(actualUncompressedLength, UNKNOWN_UNCOMPRESSED_LENGTH);
-  }
+  uint64_t constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MiB
+  uint64_t const kBlockSize = uint64_t(100) << 10; // 100 KiB
+  uint64_t const kDefaultBufferLength =
+      computeBufferLength(data->computeChainDataLength(), kBlockSize);
 
   auto out = addOutputBuffer(
       &stream,
-      (actualUncompressedLength <= maxSingleStepLength ?
-       actualUncompressedLength :
-       defaultBufferLength));
-
-  bool streamEnd = false;
-  auto buf = cursor.peekBytes();
-  while (!buf.empty()) {
-    stream.next_in = const_cast<uint8_t*>(buf.data());
-    stream.avail_in = buf.size();
-
-    while (stream.avail_in != 0) {
-      if (streamEnd) {
-        throw std::runtime_error(to<std::string>(
-            "LZMA2Codec: junk after end of data"));
+      ((uncompressedLength && *uncompressedLength <= kMaxSingleStepLength)
+           ? *uncompressedLength
+           : kDefaultBufferLength));
+
+  int rc = BZ_OK;
+  for (auto range : *data) {
+    while (!range.empty()) {
+      auto const inSize = std::min<size_t>(range.size(), kMaxSingleStepLength);
+      stream.next_in =
+          const_cast<char*>(reinterpret_cast<char const*>(range.data()));
+      stream.avail_in = inSize;
+
+      if (stream.avail_out == 0) {
+        out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
       }
 
-      streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
+      rc = bzCheck(BZ2_bzDecompress(&stream));
+      range.uncheckedAdvance(inSize - stream.avail_in);
     }
-
-    cursor.skip(buf.size());
-    buf = cursor.peekBytes();
   }
-
-  while (!streamEnd) {
-    streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
+  while (rc != BZ_STREAM_END) {
+    if (stream.avail_out == 0) {
+      out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
+    }
+    size_t const outputSize = stream.avail_out;
+    rc = bzCheck(BZ2_bzDecompress(&stream));
+    if (outputSize == stream.avail_out) {
+      throw std::runtime_error("Bzip2Codec: Truncated input");
+    }
   }
 
   out->prev()->trimEnd(stream.avail_out);
 
-  if (actualUncompressedLength != stream.total_out) {
-    throw std::runtime_error(to<std::string>(
-        "LZMA2Codec: invalid uncompressed length"));
+  uint64_t const totalOut =
+      (uint64_t(stream.total_out_hi32) << 32) + stream.total_out_lo32;
+  if (uncompressedLength && uncompressedLength != totalOut) {
+    throw std::runtime_error("Bzip2 error: Invalid uncompressed length");
   }
 
   return out;
 }
 
-#endif  // FOLLY_HAVE_LIBLZMA
+#endif // FOLLY_HAVE_LIBBZ2
 
-#ifdef FOLLY_HAVE_LIBZSTD
+#if FOLLY_HAVE_LIBZ
+
+zlib::Options getZlibOptions(CodecType type) {
+  DCHECK(type == CodecType::GZIP || type == CodecType::ZLIB);
+  return type == CodecType::GZIP ? zlib::defaultGzipOptions()
+                                 : zlib::defaultZlibOptions();
+}
+
+std::unique_ptr<Codec> getZlibCodec(int level, CodecType type) {
+  return zlib::getCodec(getZlibOptions(type), level);
+}
+
+std::unique_ptr<StreamCodec> getZlibStreamCodec(int level, CodecType type) {
+  return zlib::getStreamCodec(getZlibOptions(type), level);
+}
+
+#endif // FOLLY_HAVE_LIBZ
 
 /**
- * ZSTD compression
+ * Automatic decompression
  */
-class ZSTDCodec final : public Codec {
+class AutomaticCodec final : public Codec {
  public:
-  static std::unique_ptr<Codec> create(int level, CodecType);
-  explicit ZSTDCodec(int level, CodecType type);
+  static std::unique_ptr<Codec> create(
+      std::vector<std::unique_ptr<Codec>> customCodecs);
+  explicit AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs);
+
+  std::vector<std::string> validPrefixes() const override;
+  bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
+      const override;
 
  private:
   bool doNeedsUncompressedLength() const override;
-  std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
+  uint64_t doMaxUncompressedLength() const override;
+
+  uint64_t doMaxCompressedLength(uint64_t) const override {
+    throw std::runtime_error(
+        "AutomaticCodec error: maxCompressedLength() not supported.");
+  }
+  std::unique_ptr<IOBuf> doCompress(const IOBuf*) override {
+    throw std::runtime_error("AutomaticCodec error: compress() not supported.");
+  }
   std::unique_ptr<IOBuf> doUncompress(
       const IOBuf* data,
-      uint64_t uncompressedLength) override;
+      Optional<uint64_t> uncompressedLength) override;
 
-  int level_{1};
-};
+  void addCodecIfSupported(CodecType type);
 
-std::unique_ptr<Codec> ZSTDCodec::create(int level, CodecType type) {
-  return make_unique<ZSTDCodec>(level, type);
-}
+  // Throws iff the codecs aren't compatible (very slow)
+  void checkCompatibleCodecs() const;
 
-ZSTDCodec::ZSTDCodec(int level, CodecType type) : Codec(type) {
-  DCHECK(type == CodecType::ZSTD);
-  switch (level) {
-    case COMPRESSION_LEVEL_FASTEST:
-      level_ = 1;
-      break;
-    case COMPRESSION_LEVEL_DEFAULT:
-      level_ = 1;
-      break;
-    case COMPRESSION_LEVEL_BEST:
-      level_ = 19;
-      break;
+  std::vector<std::unique_ptr<Codec>> codecs_;
+  bool needsUncompressedLength_;
+  uint64_t maxUncompressedLength_;
+};
+
+std::vector<std::string> AutomaticCodec::validPrefixes() const {
+  std::unordered_set<std::string> prefixes;
+  for (const auto& codec : codecs_) {
+    const auto codecPrefixes = codec->validPrefixes();
+    prefixes.insert(codecPrefixes.begin(), codecPrefixes.end());
   }
+  return std::vector<std::string>{prefixes.begin(), prefixes.end()};
 }
 
-bool ZSTDCodec::doNeedsUncompressedLength() const {
-  return true;
+bool AutomaticCodec::canUncompress(
+    const IOBuf* data,
+    Optional<uint64_t> uncompressedLength) const {
+  return std::any_of(
+      codecs_.begin(),
+      codecs_.end(),
+      [data, uncompressedLength](std::unique_ptr<Codec> const& codec) {
+        return codec->canUncompress(data, uncompressedLength);
+      });
 }
 
-std::unique_ptr<IOBuf> ZSTDCodec::doCompress(const IOBuf* data) {
-  size_t rc;
-  size_t maxCompressedLength = ZSTD_compressBound(data->length());
-  auto out = IOBuf::createCombined(maxCompressedLength);
-
-  CHECK_EQ(out->length(), 0);
-
-  rc = ZSTD_compress(out->writableTail(),
-                     out->capacity(),
-                     data->data(),
-                     data->length(),
-                     level_);
-
-  if (ZSTD_isError(rc)) {
-    throw std::runtime_error(to<std::string>(
-          "ZSTD compression returned an error: ",
-          ZSTD_getErrorName(rc)));
+void AutomaticCodec::addCodecIfSupported(CodecType type) {
+  const bool present = std::any_of(
+      codecs_.begin(),
+      codecs_.end(),
+      [&type](std::unique_ptr<Codec> const& codec) {
+        return codec->type() == type;
+      });
+  if (hasCodec(type) && !present) {
+    codecs_.push_back(getCodec(type));
   }
-
-  out->append(rc);
-  CHECK_EQ(out->length(), rc);
-
-  return out;
 }
 
-std::unique_ptr<IOBuf> ZSTDCodec::doUncompress(const IOBuf* data,
-                                               uint64_t uncompressedLength) {
-  size_t rc;
-  auto out = IOBuf::createCombined(uncompressedLength);
-
-  CHECK_GE(out->capacity(), uncompressedLength);
-  CHECK_EQ(out->length(), 0);
-
-  rc = ZSTD_decompress(
-      out->writableTail(), out->capacity(), data->data(), data->length());
+/* static */ std::unique_ptr<Codec> AutomaticCodec::create(
+    std::vector<std::unique_ptr<Codec>> customCodecs) {
+  return std::make_unique<AutomaticCodec>(std::move(customCodecs));
+}
 
-  if (ZSTD_isError(rc)) {
-    throw std::runtime_error(to<std::string>(
-          "ZSTD decompression returned an error: ",
-          ZSTD_getErrorName(rc)));
+AutomaticCodec::AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs)
+    : Codec(CodecType::USER_DEFINED), codecs_(std::move(customCodecs)) {
+  // Fastest -> slowest
+  addCodecIfSupported(CodecType::LZ4_FRAME);
+  addCodecIfSupported(CodecType::ZSTD);
+  addCodecIfSupported(CodecType::ZLIB);
+  addCodecIfSupported(CodecType::GZIP);
+  addCodecIfSupported(CodecType::LZMA2);
+  addCodecIfSupported(CodecType::BZIP2);
+  if (kIsDebug) {
+    checkCompatibleCodecs();
   }
+  // Check that none of the codes are are null
+  DCHECK(std::none_of(
+      codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
+        return codec == nullptr;
+      }));
+
+  needsUncompressedLength_ = std::any_of(
+      codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
+        return codec->needsUncompressedLength();
+      });
+
+  const auto it = std::max_element(
+      codecs_.begin(),
+      codecs_.end(),
+      [](std::unique_ptr<Codec> const& lhs, std::unique_ptr<Codec> const& rhs) {
+        return lhs->maxUncompressedLength() < rhs->maxUncompressedLength();
+      });
+  DCHECK(it != codecs_.end());
+  maxUncompressedLength_ = (*it)->maxUncompressedLength();
+}
 
-  out->append(rc);
-  CHECK_EQ(out->length(), rc);
+void AutomaticCodec::checkCompatibleCodecs() const {
+  // Keep track of all the possible headers.
+  std::unordered_set<std::string> headers;
+  // The empty header is not allowed.
+  headers.insert("");
+  // Step 1:
+  // Construct a set of headers and check that none of the headers occur twice.
+  // Eliminate edge cases.
+  for (auto&& codec : codecs_) {
+    const auto codecHeaders = codec->validPrefixes();
+    // Codecs without any valid headers are not allowed.
+    if (codecHeaders.empty()) {
+      throw std::invalid_argument{
+          "AutomaticCodec: validPrefixes() must not be empty."};
+    }
+    // Insert all the headers for the current codec.
+    const size_t beforeSize = headers.size();
+    headers.insert(codecHeaders.begin(), codecHeaders.end());
+    // Codecs are not compatible if any header occurred twice.
+    if (beforeSize + codecHeaders.size() != headers.size()) {
+      throw std::invalid_argument{
+          "AutomaticCodec: Two valid prefixes collide."};
+    }
+  }
+  // Step 2:
+  // Check if any strict non-empty prefix of any header is a header.
+  for (const auto& header : headers) {
+    for (size_t i = 1; i < header.size(); ++i) {
+      if (headers.count(header.substr(0, i))) {
+        throw std::invalid_argument{
+            "AutomaticCodec: One valid prefix is a prefix of another valid "
+            "prefix."};
+      }
+    }
+  }
+}
 
-  return out;
+bool AutomaticCodec::doNeedsUncompressedLength() const {
+  return needsUncompressedLength_;
 }
 
-#endif  // FOLLY_HAVE_LIBZSTD
+uint64_t AutomaticCodec::doMaxUncompressedLength() const {
+  return maxUncompressedLength_;
+}
 
-}  // namespace
+std::unique_ptr<IOBuf> AutomaticCodec::doUncompress(
+    const IOBuf* data,
+    Optional<uint64_t> uncompressedLength) {
+  for (auto&& codec : codecs_) {
+    if (codec->canUncompress(data, uncompressedLength)) {
+      return codec->uncompress(data, uncompressedLength);
+    }
+  }
+  throw std::runtime_error("AutomaticCodec error: Unknown compressed data");
+}
 
-std::unique_ptr<Codec> getCodec(CodecType type, int level) {
-  typedef std::unique_ptr<Codec> (*CodecFactory)(int, CodecType);
+using CodecFactory = std::unique_ptr<Codec> (*)(int, CodecType);
+using StreamCodecFactory = std::unique_ptr<StreamCodec> (*)(int, CodecType);
+struct Factory {
+  CodecFactory codec;
+  StreamCodecFactory stream;
+};
 
-  static CodecFactory codecFactories[
-    static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
-    nullptr,  // USER_DEFINED
-    NoCompressionCodec::create,
+constexpr Factory
+    codecFactories[static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
+        {}, // USER_DEFINED
+        {NoCompressionCodec::create, nullptr},
 
 #if FOLLY_HAVE_LIBLZ4
-    LZ4Codec::create,
+        {LZ4Codec::create, nullptr},
 #else
-    nullptr,
+        {},
 #endif
 
 #if FOLLY_HAVE_LIBSNAPPY
-    SnappyCodec::create,
+        {SnappyCodec::create, nullptr},
 #else
-    nullptr,
+        {},
 #endif
 
 #if FOLLY_HAVE_LIBZ
-    ZlibCodec::create,
+        {getZlibCodec, getZlibStreamCodec},
 #else
-    nullptr,
+        {},
 #endif
 
 #if FOLLY_HAVE_LIBLZ4
-    LZ4Codec::create,
+        {LZ4Codec::create, nullptr},
 #else
-    nullptr,
+        {},
 #endif
 
 #if FOLLY_HAVE_LIBLZMA
-    LZMA2Codec::create,
-    LZMA2Codec::create,
+        {LZMA2StreamCodec::createCodec, LZMA2StreamCodec::createStream},
+        {LZMA2StreamCodec::createCodec, LZMA2StreamCodec::createStream},
 #else
-    nullptr,
-    nullptr,
+        {},
+        {},
 #endif
 
 #if FOLLY_HAVE_LIBZSTD
-    ZSTDCodec::create,
+        {ZSTDStreamCodec::createCodec, ZSTDStreamCodec::createStream},
 #else
-    nullptr,
+        {},
 #endif
 
 #if FOLLY_HAVE_LIBZ
-    ZlibCodec::create,
+        {getZlibCodec, getZlibStreamCodec},
 #else
-    nullptr,
+        {},
 #endif
-  };
 
-  size_t idx = static_cast<size_t>(type);
+#if (FOLLY_HAVE_LIBLZ4 && LZ4_VERSION_NUMBER >= 10301)
+        {LZ4FrameCodec::create, nullptr},
+#else
+        {},
+#endif
+
+#if FOLLY_HAVE_LIBBZ2
+        {Bzip2Codec::create, nullptr},
+#else
+        {},
+#endif
+};
+
+Factory const& getFactory(CodecType type) {
+  size_t const idx = static_cast<size_t>(type);
   if (idx >= static_cast<size_t>(CodecType::NUM_CODEC_TYPES)) {
-    throw std::invalid_argument(to<std::string>(
-        "Compression type ", idx, " not supported"));
+    throw std::invalid_argument(
+        to<std::string>("Compression type ", idx, " invalid"));
   }
-  auto factory = codecFactories[idx];
+  return codecFactories[idx];
+}
+} // namespace
+
+bool hasCodec(CodecType type) {
+  return getFactory(type).codec != nullptr;
+}
+
+std::unique_ptr<Codec> getCodec(CodecType type, int level) {
+  auto const factory = getFactory(type).codec;
   if (!factory) {
-    throw std::invalid_argument(to<std::string>(
-        "Compression type ", idx, " not supported"));
+    throw std::invalid_argument(
+        to<std::string>("Compression type ", type, " not supported"));
   }
   auto codec = (*factory)(level, type);
-  DCHECK_EQ(static_cast<size_t>(codec->type()), idx);
+  DCHECK(codec->type() == type);
   return codec;
 }
 
-}}  // namespaces
+bool hasStreamCodec(CodecType type) {
+  return getFactory(type).stream != nullptr;
+}
+
+std::unique_ptr<StreamCodec> getStreamCodec(CodecType type, int level) {
+  auto const factory = getFactory(type).stream;
+  if (!factory) {
+    throw std::invalid_argument(
+        to<std::string>("Compression type ", type, " not supported"));
+  }
+  auto codec = (*factory)(level, type);
+  DCHECK(codec->type() == type);
+  return codec;
+}
+
+std::unique_ptr<Codec> getAutoUncompressionCodec(
+    std::vector<std::unique_ptr<Codec>> customCodecs) {
+  return AutomaticCodec::create(std::move(customCodecs));
+}
+} // namespace io
+} // namespace folly