2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/io/Compression.h>
22 #if LZ4_VERSION_NUMBER >= 10301
27 #include <glog/logging.h>
29 #if FOLLY_HAVE_LIBSNAPPY
31 #include <snappy-sinksource.h>
38 #if FOLLY_HAVE_LIBLZMA
42 #if FOLLY_HAVE_LIBZSTD
43 #define ZSTD_STATIC_LINKING_ONLY
51 #include <folly/Bits.h>
52 #include <folly/Conv.h>
53 #include <folly/Memory.h>
54 #include <folly/Portability.h>
55 #include <folly/ScopeGuard.h>
56 #include <folly/Varint.h>
57 #include <folly/io/Cursor.h>
59 #include <unordered_set>
61 namespace folly { namespace io {
63 Codec::Codec(CodecType type) : type_(type) { }
65 // Ensure consistent behavior in the nullptr case
66 std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
67 if (data == nullptr) {
68 throw std::invalid_argument("Codec: data must not be nullptr");
70 uint64_t len = data->computeChainDataLength();
72 return IOBuf::create(0);
74 if (len > maxUncompressedLength()) {
75 throw std::runtime_error("Codec: uncompressed length too large");
78 return doCompress(data);
81 std::string Codec::compress(const StringPiece data) {
82 const uint64_t len = data.size();
86 if (len > maxUncompressedLength()) {
87 throw std::runtime_error("Codec: uncompressed length too large");
90 return doCompressString(data);
93 std::unique_ptr<IOBuf> Codec::uncompress(
95 Optional<uint64_t> uncompressedLength) {
96 if (data == nullptr) {
97 throw std::invalid_argument("Codec: data must not be nullptr");
99 if (!uncompressedLength) {
100 if (needsUncompressedLength()) {
101 throw std::invalid_argument("Codec: uncompressed length required");
103 } else if (*uncompressedLength > maxUncompressedLength()) {
104 throw std::runtime_error("Codec: uncompressed length too large");
108 if (uncompressedLength.value_or(0) != 0) {
109 throw std::runtime_error("Codec: invalid uncompressed length");
111 return IOBuf::create(0);
114 return doUncompress(data, uncompressedLength);
117 std::string Codec::uncompress(
118 const StringPiece data,
119 Optional<uint64_t> uncompressedLength) {
120 if (!uncompressedLength) {
121 if (needsUncompressedLength()) {
122 throw std::invalid_argument("Codec: uncompressed length required");
124 } else if (*uncompressedLength > maxUncompressedLength()) {
125 throw std::runtime_error("Codec: uncompressed length too large");
129 if (uncompressedLength.value_or(0) != 0) {
130 throw std::runtime_error("Codec: invalid uncompressed length");
135 return doUncompressString(data, uncompressedLength);
138 bool Codec::needsUncompressedLength() const {
139 return doNeedsUncompressedLength();
142 uint64_t Codec::maxUncompressedLength() const {
143 return doMaxUncompressedLength();
146 bool Codec::doNeedsUncompressedLength() const {
150 uint64_t Codec::doMaxUncompressedLength() const {
151 return UNLIMITED_UNCOMPRESSED_LENGTH;
154 std::vector<std::string> Codec::validPrefixes() const {
158 bool Codec::canUncompress(const IOBuf*, Optional<uint64_t>) const {
162 std::string Codec::doCompressString(const StringPiece data) {
163 const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data};
164 auto outputBuffer = doCompress(&inputBuffer);
166 output.reserve(outputBuffer->computeChainDataLength());
167 for (auto range : *outputBuffer) {
168 output.append(reinterpret_cast<const char*>(range.data()), range.size());
173 std::string Codec::doUncompressString(
174 const StringPiece data,
175 Optional<uint64_t> uncompressedLength) {
176 const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data};
177 auto outputBuffer = doUncompress(&inputBuffer, uncompressedLength);
179 output.reserve(outputBuffer->computeChainDataLength());
180 for (auto range : *outputBuffer) {
181 output.append(reinterpret_cast<const char*>(range.data()), range.size());
186 uint64_t Codec::maxCompressedLength(uint64_t uncompressedLength) const {
187 if (uncompressedLength == 0) {
190 return doMaxCompressedLength(uncompressedLength);
193 Optional<uint64_t> Codec::getUncompressedLength(
194 const folly::IOBuf* data,
195 Optional<uint64_t> uncompressedLength) const {
196 auto const compressedLength = data->computeChainDataLength();
197 if (uncompressedLength == uint64_t(0) || compressedLength == 0) {
198 if (uncompressedLength.value_or(0) != 0 || compressedLength != 0) {
199 throw std::runtime_error("Invalid uncompressed length");
203 return doGetUncompressedLength(data, uncompressedLength);
206 Optional<uint64_t> Codec::doGetUncompressedLength(
208 Optional<uint64_t> uncompressedLength) const {
209 return uncompressedLength;
212 bool StreamCodec::needsDataLength() const {
213 return doNeedsDataLength();
216 bool StreamCodec::doNeedsDataLength() const {
220 void StreamCodec::assertStateIs(State expected) const {
221 if (state_ != expected) {
222 throw std::logic_error(folly::to<std::string>(
223 "Codec: state is ", state_, "; expected state ", expected));
227 void StreamCodec::resetStream(Optional<uint64_t> uncompressedLength) {
228 state_ = State::RESET;
229 uncompressedLength_ = uncompressedLength;
233 bool StreamCodec::compressStream(
235 MutableByteRange& output,
236 StreamCodec::FlushOp flushOp) {
237 if (state_ == State::RESET && input.empty()) {
238 if (flushOp == StreamCodec::FlushOp::NONE) {
241 if (flushOp == StreamCodec::FlushOp::END &&
242 uncompressedLength().value_or(0) != 0) {
243 throw std::runtime_error("Codec: invalid uncompressed length");
247 if (state_ == State::RESET && !input.empty() &&
248 uncompressedLength() == uint64_t(0)) {
249 throw std::runtime_error("Codec: invalid uncompressed length");
251 // Handle input state transitions
253 case StreamCodec::FlushOp::NONE:
254 if (state_ == State::RESET) {
255 state_ = State::COMPRESS;
257 assertStateIs(State::COMPRESS);
259 case StreamCodec::FlushOp::FLUSH:
260 if (state_ == State::RESET || state_ == State::COMPRESS) {
261 state_ = State::COMPRESS_FLUSH;
263 assertStateIs(State::COMPRESS_FLUSH);
265 case StreamCodec::FlushOp::END:
266 if (state_ == State::RESET || state_ == State::COMPRESS) {
267 state_ = State::COMPRESS_END;
269 assertStateIs(State::COMPRESS_END);
272 bool const done = doCompressStream(input, output, flushOp);
273 // Handle output state transitions
275 if (state_ == State::COMPRESS_FLUSH) {
276 state_ = State::COMPRESS;
277 } else if (state_ == State::COMPRESS_END) {
280 // Check internal invariants
281 DCHECK(input.empty());
282 DCHECK(flushOp != StreamCodec::FlushOp::NONE);
287 bool StreamCodec::uncompressStream(
289 MutableByteRange& output,
290 StreamCodec::FlushOp flushOp) {
291 if (state_ == State::RESET && input.empty()) {
292 if (uncompressedLength().value_or(0) == 0) {
297 // Handle input state transitions
298 if (state_ == State::RESET) {
299 state_ = State::UNCOMPRESS;
301 assertStateIs(State::UNCOMPRESS);
302 bool const done = doUncompressStream(input, output, flushOp);
303 // Handle output state transitions
310 static std::unique_ptr<IOBuf> addOutputBuffer(
311 MutableByteRange& output,
313 DCHECK(output.empty());
314 auto buffer = IOBuf::create(size);
315 buffer->append(buffer->capacity());
316 output = {buffer->writableData(), buffer->length()};
320 std::unique_ptr<IOBuf> StreamCodec::doCompress(IOBuf const* data) {
321 uint64_t const uncompressedLength = data->computeChainDataLength();
322 resetStream(uncompressedLength);
323 uint64_t const maxCompressedLen = maxCompressedLength(uncompressedLength);
325 auto constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MB
326 auto constexpr kDefaultBufferLength = uint64_t(4) << 20; // 4 MB
328 MutableByteRange output;
329 auto buffer = addOutputBuffer(
331 maxCompressedLen <= kMaxSingleStepLength ? maxCompressedLen
332 : kDefaultBufferLength);
334 // Compress the entire IOBuf chain into the IOBuf chain pointed to by buffer
335 IOBuf const* current = data;
336 ByteRange input{current->data(), current->length()};
337 StreamCodec::FlushOp flushOp = StreamCodec::FlushOp::NONE;
339 while (input.empty() && current->next() != data) {
340 current = current->next();
341 input = {current->data(), current->length()};
343 if (current->next() == data) {
344 // This is the last input buffer so end the stream
345 flushOp = StreamCodec::FlushOp::END;
347 if (output.empty()) {
348 buffer->prependChain(addOutputBuffer(output, kDefaultBufferLength));
350 size_t const inputSize = input.size();
351 size_t const outputSize = output.size();
352 bool const done = compressStream(input, output, flushOp);
354 DCHECK(input.empty());
355 DCHECK(flushOp == StreamCodec::FlushOp::END);
356 DCHECK_EQ(current->next(), data);
359 if (inputSize == input.size() && outputSize == output.size()) {
360 throw std::runtime_error("Codec: No forward progress made");
363 buffer->prev()->trimEnd(output.size());
367 static uint64_t computeBufferLength(
368 uint64_t const compressedLength,
369 uint64_t const blockSize) {
370 uint64_t constexpr kMaxBufferLength = uint64_t(4) << 20; // 4 MiB
371 uint64_t const goodBufferSize = 4 * std::max(blockSize, compressedLength);
372 return std::min(goodBufferSize, kMaxBufferLength);
375 std::unique_ptr<IOBuf> StreamCodec::doUncompress(
377 Optional<uint64_t> uncompressedLength) {
378 auto constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MB
379 auto constexpr kBlockSize = uint64_t(128) << 10;
380 auto const defaultBufferLength =
381 computeBufferLength(data->computeChainDataLength(), kBlockSize);
383 uncompressedLength = getUncompressedLength(data, uncompressedLength);
384 resetStream(uncompressedLength);
386 MutableByteRange output;
387 auto buffer = addOutputBuffer(
389 (uncompressedLength && *uncompressedLength <= kMaxSingleStepLength
390 ? *uncompressedLength
391 : defaultBufferLength));
393 // Uncompress the entire IOBuf chain into the IOBuf chain pointed to by buffer
394 IOBuf const* current = data;
395 ByteRange input{current->data(), current->length()};
396 StreamCodec::FlushOp flushOp = StreamCodec::FlushOp::NONE;
398 while (input.empty() && current->next() != data) {
399 current = current->next();
400 input = {current->data(), current->length()};
402 if (current->next() == data) {
403 // Tell the uncompressor there is no more input (it may optimize)
404 flushOp = StreamCodec::FlushOp::END;
406 if (output.empty()) {
407 buffer->prependChain(addOutputBuffer(output, defaultBufferLength));
409 size_t const inputSize = input.size();
410 size_t const outputSize = output.size();
411 bool const done = uncompressStream(input, output, flushOp);
415 if (inputSize == input.size() && outputSize == output.size()) {
416 throw std::runtime_error("Codec: Truncated data");
419 if (!input.empty()) {
420 throw std::runtime_error("Codec: Junk after end of data");
423 buffer->prev()->trimEnd(output.size());
424 if (uncompressedLength &&
425 *uncompressedLength != buffer->computeChainDataLength()) {
426 throw std::runtime_error("Codec: invalid uncompressed length");
437 class NoCompressionCodec final : public Codec {
439 static std::unique_ptr<Codec> create(int level, CodecType type);
440 explicit NoCompressionCodec(int level, CodecType type);
443 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
444 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
445 std::unique_ptr<IOBuf> doUncompress(
447 Optional<uint64_t> uncompressedLength) override;
450 std::unique_ptr<Codec> NoCompressionCodec::create(int level, CodecType type) {
451 return std::make_unique<NoCompressionCodec>(level, type);
454 NoCompressionCodec::NoCompressionCodec(int level, CodecType type)
456 DCHECK(type == CodecType::NO_COMPRESSION);
458 case COMPRESSION_LEVEL_DEFAULT:
459 case COMPRESSION_LEVEL_FASTEST:
460 case COMPRESSION_LEVEL_BEST:
464 throw std::invalid_argument(to<std::string>(
465 "NoCompressionCodec: invalid level ", level));
469 uint64_t NoCompressionCodec::doMaxCompressedLength(
470 uint64_t uncompressedLength) const {
471 return uncompressedLength;
474 std::unique_ptr<IOBuf> NoCompressionCodec::doCompress(
476 return data->clone();
479 std::unique_ptr<IOBuf> NoCompressionCodec::doUncompress(
481 Optional<uint64_t> uncompressedLength) {
482 if (uncompressedLength &&
483 data->computeChainDataLength() != *uncompressedLength) {
484 throw std::runtime_error(
485 to<std::string>("NoCompressionCodec: invalid uncompressed length"));
487 return data->clone();
490 #if (FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA)
494 void encodeVarintToIOBuf(uint64_t val, folly::IOBuf* out) {
495 DCHECK_GE(out->tailroom(), kMaxVarintLength64);
496 out->append(encodeVarint(val, out->writableTail()));
499 inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
502 for (int shift = 0; shift <= 63; shift += 7) {
503 b = cursor.read<int8_t>();
504 val |= static_cast<uint64_t>(b & 0x7f) << shift;
510 throw std::invalid_argument("Invalid varint value. Too big.");
517 #endif // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA
521 * Reads sizeof(T) bytes, and returns false if not enough bytes are available.
522 * Returns true if the first n bytes are equal to prefix when interpreted as
525 template <typename T>
526 typename std::enable_if<std::is_unsigned<T>::value, bool>::type
527 dataStartsWithLE(const IOBuf* data, T prefix, uint64_t n = sizeof(T)) {
529 DCHECK_LE(n, sizeof(T));
532 if (!cursor.tryReadLE(value)) {
535 const T mask = n == sizeof(T) ? T(-1) : (T(1) << (8 * n)) - 1;
536 return prefix == (value & mask);
539 template <typename T>
540 typename std::enable_if<std::is_arithmetic<T>::value, std::string>::type
541 prefixToStringLE(T prefix, uint64_t n = sizeof(T)) {
543 DCHECK_LE(n, sizeof(T));
544 prefix = Endian::little(prefix);
547 memcpy(&result[0], &prefix, n);
552 #if FOLLY_HAVE_LIBLZ4
557 class LZ4Codec final : public Codec {
559 static std::unique_ptr<Codec> create(int level, CodecType type);
560 explicit LZ4Codec(int level, CodecType type);
563 bool doNeedsUncompressedLength() const override;
564 uint64_t doMaxUncompressedLength() const override;
565 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
567 bool encodeSize() const { return type() == CodecType::LZ4_VARINT_SIZE; }
569 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
570 std::unique_ptr<IOBuf> doUncompress(
572 Optional<uint64_t> uncompressedLength) override;
574 bool highCompression_;
577 std::unique_ptr<Codec> LZ4Codec::create(int level, CodecType type) {
578 return std::make_unique<LZ4Codec>(level, type);
581 LZ4Codec::LZ4Codec(int level, CodecType type) : Codec(type) {
582 DCHECK(type == CodecType::LZ4 || type == CodecType::LZ4_VARINT_SIZE);
585 case COMPRESSION_LEVEL_FASTEST:
586 case COMPRESSION_LEVEL_DEFAULT:
589 case COMPRESSION_LEVEL_BEST:
593 if (level < 1 || level > 2) {
594 throw std::invalid_argument(to<std::string>(
595 "LZ4Codec: invalid level: ", level));
597 highCompression_ = (level > 1);
600 bool LZ4Codec::doNeedsUncompressedLength() const {
601 return !encodeSize();
604 // The value comes from lz4.h in lz4-r117, but older versions of lz4 don't
605 // define LZ4_MAX_INPUT_SIZE (even though the max size is the same), so do it
607 #ifndef LZ4_MAX_INPUT_SIZE
608 # define LZ4_MAX_INPUT_SIZE 0x7E000000
611 uint64_t LZ4Codec::doMaxUncompressedLength() const {
612 return LZ4_MAX_INPUT_SIZE;
615 uint64_t LZ4Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
616 return LZ4_compressBound(uncompressedLength) +
617 (encodeSize() ? kMaxVarintLength64 : 0);
620 std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
622 if (data->isChained()) {
623 // LZ4 doesn't support streaming, so we have to coalesce
624 clone = data->cloneCoalescedAsValue();
628 auto out = IOBuf::create(maxCompressedLength(data->length()));
630 encodeVarintToIOBuf(data->length(), out.get());
634 auto input = reinterpret_cast<const char*>(data->data());
635 auto output = reinterpret_cast<char*>(out->writableTail());
636 const auto inputLength = data->length();
637 #if LZ4_VERSION_NUMBER >= 10700
638 if (highCompression_) {
639 n = LZ4_compress_HC(input, output, inputLength, out->tailroom(), 0);
641 n = LZ4_compress_default(input, output, inputLength, out->tailroom());
644 if (highCompression_) {
645 n = LZ4_compressHC(input, output, inputLength);
647 n = LZ4_compress(input, output, inputLength);
652 CHECK_LE(n, out->capacity());
658 std::unique_ptr<IOBuf> LZ4Codec::doUncompress(
660 Optional<uint64_t> uncompressedLength) {
662 if (data->isChained()) {
663 // LZ4 doesn't support streaming, so we have to coalesce
664 clone = data->cloneCoalescedAsValue();
668 folly::io::Cursor cursor(data);
669 uint64_t actualUncompressedLength;
671 actualUncompressedLength = decodeVarintFromCursor(cursor);
672 if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
673 throw std::runtime_error("LZ4Codec: invalid uncompressed length");
677 DCHECK(uncompressedLength.hasValue());
678 DCHECK(*uncompressedLength <= maxUncompressedLength());
679 actualUncompressedLength = *uncompressedLength;
682 auto sp = StringPiece{cursor.peekBytes()};
683 auto out = IOBuf::create(actualUncompressedLength);
684 int n = LZ4_decompress_safe(
686 reinterpret_cast<char*>(out->writableTail()),
688 actualUncompressedLength);
690 if (n < 0 || uint64_t(n) != actualUncompressedLength) {
691 throw std::runtime_error(to<std::string>(
692 "LZ4 decompression returned invalid value ", n));
694 out->append(actualUncompressedLength);
698 #if LZ4_VERSION_NUMBER >= 10301
700 class LZ4FrameCodec final : public Codec {
702 static std::unique_ptr<Codec> create(int level, CodecType type);
703 explicit LZ4FrameCodec(int level, CodecType type);
704 ~LZ4FrameCodec() override;
706 std::vector<std::string> validPrefixes() const override;
707 bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
711 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
713 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
714 std::unique_ptr<IOBuf> doUncompress(
716 Optional<uint64_t> uncompressedLength) override;
718 // Reset the dctx_ if it is dirty or null.
722 LZ4F_decompressionContext_t dctx_{nullptr};
726 /* static */ std::unique_ptr<Codec> LZ4FrameCodec::create(
729 return std::make_unique<LZ4FrameCodec>(level, type);
732 static constexpr uint32_t kLZ4FrameMagicLE = 0x184D2204;
734 std::vector<std::string> LZ4FrameCodec::validPrefixes() const {
735 return {prefixToStringLE(kLZ4FrameMagicLE)};
738 bool LZ4FrameCodec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
739 return dataStartsWithLE(data, kLZ4FrameMagicLE);
742 uint64_t LZ4FrameCodec::doMaxCompressedLength(
743 uint64_t uncompressedLength) const {
744 LZ4F_preferences_t prefs{};
745 prefs.compressionLevel = level_;
746 prefs.frameInfo.contentSize = uncompressedLength;
747 return LZ4F_compressFrameBound(uncompressedLength, &prefs);
750 static size_t lz4FrameThrowOnError(size_t code) {
751 if (LZ4F_isError(code)) {
752 throw std::runtime_error(
753 to<std::string>("LZ4Frame error: ", LZ4F_getErrorName(code)));
758 void LZ4FrameCodec::resetDCtx() {
759 if (dctx_ && !dirty_) {
763 LZ4F_freeDecompressionContext(dctx_);
765 lz4FrameThrowOnError(LZ4F_createDecompressionContext(&dctx_, 100));
769 LZ4FrameCodec::LZ4FrameCodec(int level, CodecType type) : Codec(type) {
770 DCHECK(type == CodecType::LZ4_FRAME);
772 case COMPRESSION_LEVEL_FASTEST:
773 case COMPRESSION_LEVEL_DEFAULT:
776 case COMPRESSION_LEVEL_BEST:
785 LZ4FrameCodec::~LZ4FrameCodec() {
787 LZ4F_freeDecompressionContext(dctx_);
791 std::unique_ptr<IOBuf> LZ4FrameCodec::doCompress(const IOBuf* data) {
792 // LZ4 Frame compression doesn't support streaming so we have to coalesce
794 if (data->isChained()) {
795 clone = data->cloneCoalescedAsValue();
799 const auto uncompressedLength = data->length();
800 LZ4F_preferences_t prefs{};
801 prefs.compressionLevel = level_;
802 prefs.frameInfo.contentSize = uncompressedLength;
804 auto buf = IOBuf::create(maxCompressedLength(uncompressedLength));
805 const size_t written = lz4FrameThrowOnError(LZ4F_compressFrame(
811 buf->append(written);
815 std::unique_ptr<IOBuf> LZ4FrameCodec::doUncompress(
817 Optional<uint64_t> uncompressedLength) {
818 // Reset the dctx if any errors have occurred
821 ByteRange in = *data->begin();
823 if (data->isChained()) {
824 clone = data->cloneCoalescedAsValue();
825 in = clone.coalesce();
828 // Select decompression options
829 LZ4F_decompressOptions_t options;
830 options.stableDst = 1;
831 // Select blockSize and growthSize for the IOBufQueue
832 IOBufQueue queue(IOBufQueue::cacheChainLength());
833 auto blockSize = uint64_t{64} << 10;
834 auto growthSize = uint64_t{4} << 20;
835 if (uncompressedLength) {
836 // Allocate uncompressedLength in one chunk (up to 64 MB)
837 const auto allocateSize = std::min(*uncompressedLength, uint64_t{64} << 20);
838 queue.preallocate(allocateSize, allocateSize);
839 blockSize = std::min(*uncompressedLength, blockSize);
840 growthSize = std::min(*uncompressedLength, growthSize);
842 // Reduce growthSize for small data
843 const auto guessUncompressedLen =
844 4 * std::max<uint64_t>(blockSize, in.size());
845 growthSize = std::min(guessUncompressedLen, growthSize);
847 // Once LZ4_decompress() is called, the dctx_ cannot be reused until it
850 // Decompress until the frame is over
853 // Allocate enough space to decompress at least a block
856 std::tie(out, outSize) = queue.preallocate(blockSize, growthSize);
858 size_t inSize = in.size();
859 code = lz4FrameThrowOnError(
860 LZ4F_decompress(dctx_, out, &outSize, in.data(), &inSize, &options));
861 if (in.empty() && outSize == 0 && code != 0) {
862 // We passed no input, no output was produced, and the frame isn't over
863 // No more forward progress is possible
864 throw std::runtime_error("LZ4Frame error: Incomplete frame");
866 in.uncheckedAdvance(inSize);
867 queue.postallocate(outSize);
869 // At this point the decompression context can be reused
871 if (uncompressedLength && queue.chainLength() != *uncompressedLength) {
872 throw std::runtime_error("LZ4Frame error: Invalid uncompressedLength");
877 #endif // LZ4_VERSION_NUMBER >= 10301
878 #endif // FOLLY_HAVE_LIBLZ4
880 #if FOLLY_HAVE_LIBSNAPPY
887 * Implementation of snappy::Source that reads from a IOBuf chain.
889 class IOBufSnappySource final : public snappy::Source {
891 explicit IOBufSnappySource(const IOBuf* data);
892 size_t Available() const override;
893 const char* Peek(size_t* len) override;
894 void Skip(size_t n) override;
900 IOBufSnappySource::IOBufSnappySource(const IOBuf* data)
901 : available_(data->computeChainDataLength()),
905 size_t IOBufSnappySource::Available() const {
909 const char* IOBufSnappySource::Peek(size_t* len) {
910 auto sp = StringPiece{cursor_.peekBytes()};
915 void IOBufSnappySource::Skip(size_t n) {
916 CHECK_LE(n, available_);
921 class SnappyCodec final : public Codec {
923 static std::unique_ptr<Codec> create(int level, CodecType type);
924 explicit SnappyCodec(int level, CodecType type);
927 uint64_t doMaxUncompressedLength() const override;
928 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
929 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
930 std::unique_ptr<IOBuf> doUncompress(
932 Optional<uint64_t> uncompressedLength) override;
935 std::unique_ptr<Codec> SnappyCodec::create(int level, CodecType type) {
936 return std::make_unique<SnappyCodec>(level, type);
939 SnappyCodec::SnappyCodec(int level, CodecType type) : Codec(type) {
940 DCHECK(type == CodecType::SNAPPY);
942 case COMPRESSION_LEVEL_FASTEST:
943 case COMPRESSION_LEVEL_DEFAULT:
944 case COMPRESSION_LEVEL_BEST:
948 throw std::invalid_argument(to<std::string>(
949 "SnappyCodec: invalid level: ", level));
953 uint64_t SnappyCodec::doMaxUncompressedLength() const {
954 // snappy.h uses uint32_t for lengths, so there's that.
955 return std::numeric_limits<uint32_t>::max();
958 uint64_t SnappyCodec::doMaxCompressedLength(uint64_t uncompressedLength) const {
959 return snappy::MaxCompressedLength(uncompressedLength);
962 std::unique_ptr<IOBuf> SnappyCodec::doCompress(const IOBuf* data) {
963 IOBufSnappySource source(data);
964 auto out = IOBuf::create(maxCompressedLength(source.Available()));
966 snappy::UncheckedByteArraySink sink(reinterpret_cast<char*>(
967 out->writableTail()));
969 size_t n = snappy::Compress(&source, &sink);
971 CHECK_LE(n, out->capacity());
976 std::unique_ptr<IOBuf> SnappyCodec::doUncompress(
978 Optional<uint64_t> uncompressedLength) {
979 uint32_t actualUncompressedLength = 0;
982 IOBufSnappySource source(data);
983 if (!snappy::GetUncompressedLength(&source, &actualUncompressedLength)) {
984 throw std::runtime_error("snappy::GetUncompressedLength failed");
986 if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
987 throw std::runtime_error("snappy: invalid uncompressed length");
991 auto out = IOBuf::create(actualUncompressedLength);
994 IOBufSnappySource source(data);
995 if (!snappy::RawUncompress(&source,
996 reinterpret_cast<char*>(out->writableTail()))) {
997 throw std::runtime_error("snappy::RawUncompress failed");
1001 out->append(actualUncompressedLength);
1005 #endif // FOLLY_HAVE_LIBSNAPPY
1011 class ZlibStreamCodec final : public StreamCodec {
1013 static std::unique_ptr<Codec> createCodec(int level, CodecType type);
1014 static std::unique_ptr<StreamCodec> createStream(int level, CodecType type);
1015 explicit ZlibStreamCodec(int level, CodecType type);
1016 ~ZlibStreamCodec() override;
1018 std::vector<std::string> validPrefixes() const override;
1019 bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
1023 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
1025 void doResetStream() override;
1026 bool doCompressStream(
1028 MutableByteRange& output,
1029 StreamCodec::FlushOp flush) override;
1030 bool doUncompressStream(
1032 MutableByteRange& output,
1033 StreamCodec::FlushOp flush) override;
1035 void resetDeflateStream();
1036 void resetInflateStream();
1038 Optional<z_stream> deflateStream_{};
1039 Optional<z_stream> inflateStream_{};
1041 bool needReset_{true};
1044 static constexpr uint16_t kGZIPMagicLE = 0x8B1F;
1046 std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
1047 if (type() == CodecType::ZLIB) {
1048 // Zlib streams start with a 2 byte header.
1055 // We won't restrict the values of any sub-fields except as described below.
1057 // The lowest 4 bits of CMF is the compression method (CM).
1058 // CM == 0x8 is the deflate compression method, which is currently the only
1059 // supported compression method, so any valid prefix must have CM == 0x8.
1061 // The lowest 5 bits of FLG is FCHECK.
1062 // FCHECK must be such that the two header bytes are a multiple of 31 when
1063 // interpreted as a big endian 16-bit number.
1064 std::vector<std::string> result;
1065 // 16 values for the first byte, 8 values for the second byte.
1066 // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
1067 result.reserve(132);
1068 // Select all values for the CMF byte that use the deflate algorithm 0x8.
1069 for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
1070 // Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
1071 for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
1072 uint16_t prefix = first | second;
1074 prefix += 31 - (prefix % 31);
1075 result.push_back(prefixToStringLE(Endian::big(prefix)));
1076 // zlib won't produce this, but it is a valid prefix.
1077 if ((prefix & 0x1F) == 31) {
1079 result.push_back(prefixToStringLE(Endian::big(prefix)));
1085 // The gzip frame starts with 2 magic bytes.
1086 return {prefixToStringLE(kGZIPMagicLE)};
1090 bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
1092 if (type() == CodecType::ZLIB) {
1094 Cursor cursor{data};
1095 if (!cursor.tryReadBE(value)) {
1098 // zlib compressed if using deflate and is a multiple of 31.
1099 return (value & 0x0F00) == 0x0800 && value % 31 == 0;
1101 return dataStartsWithLE(data, kGZIPMagicLE);
1105 uint64_t ZlibStreamCodec::doMaxCompressedLength(
1106 uint64_t uncompressedLength) const {
1107 return deflateBound(nullptr, uncompressedLength);
1110 std::unique_ptr<Codec> ZlibStreamCodec::createCodec(int level, CodecType type) {
1111 return std::make_unique<ZlibStreamCodec>(level, type);
1114 std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
1117 return std::make_unique<ZlibStreamCodec>(level, type);
1120 ZlibStreamCodec::ZlibStreamCodec(int level, CodecType type)
1121 : StreamCodec(type) {
1122 DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP);
1124 case COMPRESSION_LEVEL_FASTEST:
1127 case COMPRESSION_LEVEL_DEFAULT:
1128 level = Z_DEFAULT_COMPRESSION;
1130 case COMPRESSION_LEVEL_BEST:
1134 if (level != Z_DEFAULT_COMPRESSION && (level < 0 || level > 9)) {
1135 throw std::invalid_argument(
1136 to<std::string>("ZlibStreamCodec: invalid level: ", level));
1141 ZlibStreamCodec::~ZlibStreamCodec() {
1142 if (deflateStream_) {
1143 deflateEnd(deflateStream_.get_pointer());
1144 deflateStream_.clear();
1146 if (inflateStream_) {
1147 inflateEnd(inflateStream_.get_pointer());
1148 inflateStream_.clear();
1152 void ZlibStreamCodec::doResetStream() {
1156 void ZlibStreamCodec::resetDeflateStream() {
1157 if (deflateStream_) {
1158 int const rc = deflateReset(deflateStream_.get_pointer());
1160 deflateStream_.clear();
1161 throw std::runtime_error(
1162 to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
1166 deflateStream_ = z_stream{};
1167 // Using deflateInit2() to support gzip. "The windowBits parameter is the
1168 // base two logarithm of the maximum window size (...) The default value is
1169 // 15 (...) Add 16 to windowBits to write a simple gzip header and trailer
1170 // around the compressed data instead of a zlib wrapper. The gzip header
1171 // will have no file name, no extra data, no comment, no modification time
1172 // (set to zero), no header crc, and the operating system will be set to 255
1174 int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
1175 // All other parameters (method, memLevel, strategy) get default values from
1177 int const rc = deflateInit2(
1178 deflateStream_.get_pointer(),
1183 Z_DEFAULT_STRATEGY);
1185 deflateStream_.clear();
1186 throw std::runtime_error(
1187 to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
1191 void ZlibStreamCodec::resetInflateStream() {
1192 if (inflateStream_) {
1193 int const rc = inflateReset(inflateStream_.get_pointer());
1195 inflateStream_.clear();
1196 throw std::runtime_error(
1197 to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
1201 inflateStream_ = z_stream{};
1202 // "The windowBits parameter is the base two logarithm of the maximum window
1203 // size (...) The default value is 15 (...) add 16 to decode only the gzip
1204 // format (the zlib format will return a Z_DATA_ERROR)."
1205 int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
1206 int const rc = inflateInit2(inflateStream_.get_pointer(), windowBits);
1208 inflateStream_.clear();
1209 throw std::runtime_error(
1210 to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
1214 static int zlibTranslateFlush(StreamCodec::FlushOp flush) {
1216 case StreamCodec::FlushOp::NONE:
1218 case StreamCodec::FlushOp::FLUSH:
1219 return Z_SYNC_FLUSH;
1220 case StreamCodec::FlushOp::END:
1223 throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
1227 static int zlibThrowOnError(int rc) {
1234 throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
1238 bool ZlibStreamCodec::doCompressStream(
1240 MutableByteRange& output,
1241 StreamCodec::FlushOp flush) {
1243 resetDeflateStream();
1246 DCHECK(deflateStream_.hasValue());
1247 // zlib will return Z_STREAM_ERROR if output.data() is null.
1248 if (output.data() == nullptr) {
1251 deflateStream_->next_in = const_cast<uint8_t*>(input.data());
1252 deflateStream_->avail_in = input.size();
1253 deflateStream_->next_out = output.data();
1254 deflateStream_->avail_out = output.size();
1256 input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
1257 output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
1259 int const rc = zlibThrowOnError(
1260 deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
1262 case StreamCodec::FlushOp::NONE:
1264 case StreamCodec::FlushOp::FLUSH:
1265 return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
1266 case StreamCodec::FlushOp::END:
1267 return rc == Z_STREAM_END;
1269 throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
1273 bool ZlibStreamCodec::doUncompressStream(
1275 MutableByteRange& output,
1276 StreamCodec::FlushOp flush) {
1278 resetInflateStream();
1281 DCHECK(inflateStream_.hasValue());
1282 // zlib will return Z_STREAM_ERROR if output.data() is null.
1283 if (output.data() == nullptr) {
1286 inflateStream_->next_in = const_cast<uint8_t*>(input.data());
1287 inflateStream_->avail_in = input.size();
1288 inflateStream_->next_out = output.data();
1289 inflateStream_->avail_out = output.size();
1291 input.advance(input.size() - inflateStream_->avail_in);
1292 output.advance(output.size() - inflateStream_->avail_out);
1294 int const rc = zlibThrowOnError(
1295 inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
1296 return rc == Z_STREAM_END;
1299 #endif // FOLLY_HAVE_LIBZ
1301 #if FOLLY_HAVE_LIBLZMA
1306 class LZMA2Codec final : public Codec {
1308 static std::unique_ptr<Codec> create(int level, CodecType type);
1309 explicit LZMA2Codec(int level, CodecType type);
1311 std::vector<std::string> validPrefixes() const override;
1312 bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
1316 bool doNeedsUncompressedLength() const override;
1317 uint64_t doMaxUncompressedLength() const override;
1318 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
1320 bool encodeSize() const { return type() == CodecType::LZMA2_VARINT_SIZE; }
1322 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
1323 std::unique_ptr<IOBuf> doUncompress(
1325 Optional<uint64_t> uncompressedLength) override;
1327 std::unique_ptr<IOBuf> addOutputBuffer(lzma_stream* stream, size_t length);
1328 bool doInflate(lzma_stream* stream, IOBuf* head, size_t bufferLength);
1333 static constexpr uint64_t kLZMA2MagicLE = 0x005A587A37FD;
1334 static constexpr unsigned kLZMA2MagicBytes = 6;
1336 std::vector<std::string> LZMA2Codec::validPrefixes() const {
1337 if (type() == CodecType::LZMA2_VARINT_SIZE) {
1340 return {prefixToStringLE(kLZMA2MagicLE, kLZMA2MagicBytes)};
1343 bool LZMA2Codec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
1344 if (type() == CodecType::LZMA2_VARINT_SIZE) {
1347 // Returns false for all inputs less than 8 bytes.
1348 // This is okay, because no valid LZMA2 streams are less than 8 bytes.
1349 return dataStartsWithLE(data, kLZMA2MagicLE, kLZMA2MagicBytes);
1352 std::unique_ptr<Codec> LZMA2Codec::create(int level, CodecType type) {
1353 return std::make_unique<LZMA2Codec>(level, type);
1356 LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) {
1357 DCHECK(type == CodecType::LZMA2 || type == CodecType::LZMA2_VARINT_SIZE);
1359 case COMPRESSION_LEVEL_FASTEST:
1362 case COMPRESSION_LEVEL_DEFAULT:
1363 level = LZMA_PRESET_DEFAULT;
1365 case COMPRESSION_LEVEL_BEST:
1369 if (level < 0 || level > 9) {
1370 throw std::invalid_argument(to<std::string>(
1371 "LZMA2Codec: invalid level: ", level));
1376 bool LZMA2Codec::doNeedsUncompressedLength() const {
1380 uint64_t LZMA2Codec::doMaxUncompressedLength() const {
1381 // From lzma/base.h: "Stream is roughly 8 EiB (2^63 bytes)"
1382 return uint64_t(1) << 63;
1385 uint64_t LZMA2Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
1386 return lzma_stream_buffer_bound(uncompressedLength) +
1387 (encodeSize() ? kMaxVarintLength64 : 0);
1390 std::unique_ptr<IOBuf> LZMA2Codec::addOutputBuffer(
1391 lzma_stream* stream,
1394 CHECK_EQ(stream->avail_out, 0);
1396 auto buf = IOBuf::create(length);
1397 buf->append(buf->capacity());
1399 stream->next_out = buf->writableData();
1400 stream->avail_out = buf->length();
1405 std::unique_ptr<IOBuf> LZMA2Codec::doCompress(const IOBuf* data) {
1407 lzma_stream stream = LZMA_STREAM_INIT;
1409 rc = lzma_easy_encoder(&stream, level_, LZMA_CHECK_NONE);
1410 if (rc != LZMA_OK) {
1411 throw std::runtime_error(folly::to<std::string>(
1412 "LZMA2Codec: lzma_easy_encoder error: ", rc));
1415 SCOPE_EXIT { lzma_end(&stream); };
1417 uint64_t uncompressedLength = data->computeChainDataLength();
1418 uint64_t maxCompressedLength = lzma_stream_buffer_bound(uncompressedLength);
1420 // Max 64MiB in one go
1421 constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
1422 constexpr uint32_t defaultBufferLength = uint32_t(4) << 20; // 4MiB
1424 auto out = addOutputBuffer(
1426 (maxCompressedLength <= maxSingleStepLength ?
1427 maxCompressedLength :
1428 defaultBufferLength));
1431 auto size = IOBuf::createCombined(kMaxVarintLength64);
1432 encodeVarintToIOBuf(uncompressedLength, size.get());
1433 size->appendChain(std::move(out));
1434 out = std::move(size);
1437 for (auto& range : *data) {
1438 if (range.empty()) {
1442 stream.next_in = const_cast<uint8_t*>(range.data());
1443 stream.avail_in = range.size();
1445 while (stream.avail_in != 0) {
1446 if (stream.avail_out == 0) {
1447 out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
1450 rc = lzma_code(&stream, LZMA_RUN);
1452 if (rc != LZMA_OK) {
1453 throw std::runtime_error(folly::to<std::string>(
1454 "LZMA2Codec: lzma_code error: ", rc));
1460 if (stream.avail_out == 0) {
1461 out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
1464 rc = lzma_code(&stream, LZMA_FINISH);
1465 } while (rc == LZMA_OK);
1467 if (rc != LZMA_STREAM_END) {
1468 throw std::runtime_error(folly::to<std::string>(
1469 "LZMA2Codec: lzma_code ended with error: ", rc));
1472 out->prev()->trimEnd(stream.avail_out);
1477 bool LZMA2Codec::doInflate(lzma_stream* stream,
1479 size_t bufferLength) {
1480 if (stream->avail_out == 0) {
1481 head->prependChain(addOutputBuffer(stream, bufferLength));
1484 lzma_ret rc = lzma_code(stream, LZMA_RUN);
1489 case LZMA_STREAM_END:
1492 throw std::runtime_error(to<std::string>(
1493 "LZMA2Codec: lzma_code error: ", rc));
1499 std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(
1501 Optional<uint64_t> uncompressedLength) {
1503 lzma_stream stream = LZMA_STREAM_INIT;
1505 rc = lzma_auto_decoder(&stream, std::numeric_limits<uint64_t>::max(), 0);
1506 if (rc != LZMA_OK) {
1507 throw std::runtime_error(folly::to<std::string>(
1508 "LZMA2Codec: lzma_auto_decoder error: ", rc));
1511 SCOPE_EXIT { lzma_end(&stream); };
1513 // Max 64MiB in one go
1514 constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
1515 constexpr uint32_t defaultBufferLength = uint32_t(256) << 10; // 256 KiB
1517 folly::io::Cursor cursor(data);
1519 const uint64_t actualUncompressedLength = decodeVarintFromCursor(cursor);
1520 if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
1521 throw std::runtime_error("LZMA2Codec: invalid uncompressed length");
1523 uncompressedLength = actualUncompressedLength;
1526 auto out = addOutputBuffer(
1528 ((uncompressedLength && *uncompressedLength <= maxSingleStepLength)
1529 ? *uncompressedLength
1530 : defaultBufferLength));
1532 bool streamEnd = false;
1533 auto buf = cursor.peekBytes();
1534 while (!buf.empty()) {
1535 stream.next_in = const_cast<uint8_t*>(buf.data());
1536 stream.avail_in = buf.size();
1538 while (stream.avail_in != 0) {
1540 throw std::runtime_error(to<std::string>(
1541 "LZMA2Codec: junk after end of data"));
1544 streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
1547 cursor.skip(buf.size());
1548 buf = cursor.peekBytes();
1551 while (!streamEnd) {
1552 streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
1555 out->prev()->trimEnd(stream.avail_out);
1557 if (uncompressedLength && *uncompressedLength != stream.total_out) {
1558 throw std::runtime_error(
1559 to<std::string>("LZMA2Codec: invalid uncompressed length"));
1565 #endif // FOLLY_HAVE_LIBLZMA
1567 #ifdef FOLLY_HAVE_LIBZSTD
1570 void zstdFreeCStream(ZSTD_CStream* zcs) {
1571 ZSTD_freeCStream(zcs);
1574 void zstdFreeDStream(ZSTD_DStream* zds) {
1575 ZSTD_freeDStream(zds);
1582 class ZSTDStreamCodec final : public StreamCodec {
1584 static std::unique_ptr<Codec> createCodec(int level, CodecType);
1585 static std::unique_ptr<StreamCodec> createStream(int level, CodecType);
1586 explicit ZSTDStreamCodec(int level, CodecType type);
1588 std::vector<std::string> validPrefixes() const override;
1589 bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
1593 bool doNeedsUncompressedLength() const override;
1594 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
1595 Optional<uint64_t> doGetUncompressedLength(
1597 Optional<uint64_t> uncompressedLength) const override;
1599 void doResetStream() override;
1600 bool doCompressStream(
1602 MutableByteRange& output,
1603 StreamCodec::FlushOp flushOp) override;
1604 bool doUncompressStream(
1606 MutableByteRange& output,
1607 StreamCodec::FlushOp flushOp) override;
1609 void resetCStream();
1610 void resetDStream();
1612 bool tryBlockCompress(ByteRange& input, MutableByteRange& output) const;
1613 bool tryBlockUncompress(ByteRange& input, MutableByteRange& output) const;
1616 bool needReset_{true};
1619 folly::static_function_deleter<ZSTD_CStream, &zstdFreeCStream>>
1623 folly::static_function_deleter<ZSTD_DStream, &zstdFreeDStream>>
1627 static constexpr uint32_t kZSTDMagicLE = 0xFD2FB528;
1629 std::vector<std::string> ZSTDStreamCodec::validPrefixes() const {
1630 return {prefixToStringLE(kZSTDMagicLE)};
1633 bool ZSTDStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
1635 return dataStartsWithLE(data, kZSTDMagicLE);
1638 std::unique_ptr<Codec> ZSTDStreamCodec::createCodec(int level, CodecType type) {
1639 return make_unique<ZSTDStreamCodec>(level, type);
1642 std::unique_ptr<StreamCodec> ZSTDStreamCodec::createStream(
1645 return make_unique<ZSTDStreamCodec>(level, type);
1648 ZSTDStreamCodec::ZSTDStreamCodec(int level, CodecType type)
1649 : StreamCodec(type) {
1650 DCHECK(type == CodecType::ZSTD);
1652 case COMPRESSION_LEVEL_FASTEST:
1655 case COMPRESSION_LEVEL_DEFAULT:
1658 case COMPRESSION_LEVEL_BEST:
1662 if (level < 1 || level > ZSTD_maxCLevel()) {
1663 throw std::invalid_argument(
1664 to<std::string>("ZSTD: invalid level: ", level));
1669 bool ZSTDStreamCodec::doNeedsUncompressedLength() const {
1673 uint64_t ZSTDStreamCodec::doMaxCompressedLength(
1674 uint64_t uncompressedLength) const {
1675 return ZSTD_compressBound(uncompressedLength);
1678 void zstdThrowIfError(size_t rc) {
1679 if (!ZSTD_isError(rc)) {
1682 throw std::runtime_error(
1683 to<std::string>("ZSTD returned an error: ", ZSTD_getErrorName(rc)));
1686 Optional<uint64_t> ZSTDStreamCodec::doGetUncompressedLength(
1688 Optional<uint64_t> uncompressedLength) const {
1689 // Read decompressed size from frame if available in first IOBuf.
1690 auto const decompressedSize =
1691 ZSTD_getDecompressedSize(data->data(), data->length());
1692 if (decompressedSize != 0) {
1693 if (uncompressedLength && *uncompressedLength != decompressedSize) {
1694 throw std::runtime_error("ZSTD: invalid uncompressed length");
1696 uncompressedLength = decompressedSize;
1698 return uncompressedLength;
1701 void ZSTDStreamCodec::doResetStream() {
1705 bool ZSTDStreamCodec::tryBlockCompress(
1707 MutableByteRange& output) const {
1709 // We need to know that we have enough output space to use block compression
1710 if (output.size() < ZSTD_compressBound(input.size())) {
1713 size_t const length = ZSTD_compress(
1714 output.data(), output.size(), input.data(), input.size(), level_);
1715 zstdThrowIfError(length);
1716 input.uncheckedAdvance(input.size());
1717 output.uncheckedAdvance(length);
1721 void ZSTDStreamCodec::resetCStream() {
1723 cstream_.reset(ZSTD_createCStream());
1725 throw std::bad_alloc{};
1728 // Advanced API usage works for all supported versions of zstd.
1729 // Required to set contentSizeFlag.
1730 auto params = ZSTD_getParams(level_, uncompressedLength().value_or(0), 0);
1731 params.fParams.contentSizeFlag = uncompressedLength().hasValue();
1732 zstdThrowIfError(ZSTD_initCStream_advanced(
1733 cstream_.get(), nullptr, 0, params, uncompressedLength().value_or(0)));
1736 bool ZSTDStreamCodec::doCompressStream(
1738 MutableByteRange& output,
1739 StreamCodec::FlushOp flushOp) {
1741 // If we are given all the input in one chunk try to use block compression
1742 if (flushOp == StreamCodec::FlushOp::END &&
1743 tryBlockCompress(input, output)) {
1749 ZSTD_inBuffer in = {input.data(), input.size(), 0};
1750 ZSTD_outBuffer out = {output.data(), output.size(), 0};
1752 input.uncheckedAdvance(in.pos);
1753 output.uncheckedAdvance(out.pos);
1755 if (flushOp == StreamCodec::FlushOp::NONE || !input.empty()) {
1756 zstdThrowIfError(ZSTD_compressStream(cstream_.get(), &out, &in));
1758 if (in.pos == in.size && flushOp != StreamCodec::FlushOp::NONE) {
1761 case StreamCodec::FlushOp::FLUSH:
1762 rc = ZSTD_flushStream(cstream_.get(), &out);
1764 case StreamCodec::FlushOp::END:
1765 rc = ZSTD_endStream(cstream_.get(), &out);
1768 throw std::invalid_argument("ZSTD: invalid FlushOp");
1770 zstdThrowIfError(rc);
1778 bool ZSTDStreamCodec::tryBlockUncompress(
1780 MutableByteRange& output) const {
1782 #if ZSTD_VERSION_NUMBER < 10104
1783 // We require ZSTD_findFrameCompressedSize() to perform this optimization.
1786 // We need to know the uncompressed length and have enough output space.
1787 if (!uncompressedLength() || output.size() < *uncompressedLength()) {
1790 size_t const compressedLength =
1791 ZSTD_findFrameCompressedSize(input.data(), input.size());
1792 zstdThrowIfError(compressedLength);
1793 size_t const length = ZSTD_decompress(
1794 output.data(), *uncompressedLength(), input.data(), compressedLength);
1795 zstdThrowIfError(length);
1796 if (length != *uncompressedLength()) {
1797 throw std::runtime_error("ZSTDStreamCodec: Incorrect uncompressed length");
1799 input.uncheckedAdvance(compressedLength);
1800 output.uncheckedAdvance(length);
1805 void ZSTDStreamCodec::resetDStream() {
1807 dstream_.reset(ZSTD_createDStream());
1809 throw std::bad_alloc{};
1812 zstdThrowIfError(ZSTD_initDStream(dstream_.get()));
1815 bool ZSTDStreamCodec::doUncompressStream(
1817 MutableByteRange& output,
1818 StreamCodec::FlushOp flushOp) {
1820 // If we are given all the input in one chunk try to use block uncompression
1821 if (flushOp == StreamCodec::FlushOp::END &&
1822 tryBlockUncompress(input, output)) {
1828 ZSTD_inBuffer in = {input.data(), input.size(), 0};
1829 ZSTD_outBuffer out = {output.data(), output.size(), 0};
1831 input.uncheckedAdvance(in.pos);
1832 output.uncheckedAdvance(out.pos);
1834 size_t const rc = ZSTD_decompressStream(dstream_.get(), &out, &in);
1835 zstdThrowIfError(rc);
1839 #endif // FOLLY_HAVE_LIBZSTD
1841 #if FOLLY_HAVE_LIBBZ2
1843 class Bzip2Codec final : public Codec {
1845 static std::unique_ptr<Codec> create(int level, CodecType type);
1846 explicit Bzip2Codec(int level, CodecType type);
1848 std::vector<std::string> validPrefixes() const override;
1849 bool canUncompress(IOBuf const* data, Optional<uint64_t> uncompressedLength)
1853 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
1854 std::unique_ptr<IOBuf> doCompress(IOBuf const* data) override;
1855 std::unique_ptr<IOBuf> doUncompress(
1857 Optional<uint64_t> uncompressedLength) override;
1862 /* static */ std::unique_ptr<Codec> Bzip2Codec::create(
1865 return std::make_unique<Bzip2Codec>(level, type);
1868 Bzip2Codec::Bzip2Codec(int level, CodecType type) : Codec(type) {
1869 DCHECK(type == CodecType::BZIP2);
1871 case COMPRESSION_LEVEL_FASTEST:
1874 case COMPRESSION_LEVEL_DEFAULT:
1877 case COMPRESSION_LEVEL_BEST:
1881 if (level < 1 || level > 9) {
1882 throw std::invalid_argument(
1883 to<std::string>("Bzip2: invalid level: ", level));
1888 static uint32_t constexpr kBzip2MagicLE = 0x685a42;
1889 static uint64_t constexpr kBzip2MagicBytes = 3;
1891 std::vector<std::string> Bzip2Codec::validPrefixes() const {
1892 return {prefixToStringLE(kBzip2MagicLE, kBzip2MagicBytes)};
1895 bool Bzip2Codec::canUncompress(IOBuf const* data, Optional<uint64_t>) const {
1896 return dataStartsWithLE(data, kBzip2MagicLE, kBzip2MagicBytes);
1899 uint64_t Bzip2Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
1900 // http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html#bzbufftobuffcompress
1901 // To guarantee that the compressed data will fit in its buffer, allocate an
1902 // output buffer of size 1% larger than the uncompressed data, plus six
1903 // hundred extra bytes.
1904 return uncompressedLength + uncompressedLength / 100 + 600;
1907 static bz_stream createBzStream() {
1909 stream.bzalloc = nullptr;
1910 stream.bzfree = nullptr;
1911 stream.opaque = nullptr;
1912 stream.next_in = stream.next_out = nullptr;
1913 stream.avail_in = stream.avail_out = 0;
1917 // Throws on error condition, otherwise returns the code.
1918 static int bzCheck(int const rc) {
1927 throw std::runtime_error(to<std::string>("Bzip2 error: ", rc));
1931 static std::unique_ptr<IOBuf> addOutputBuffer(
1933 uint64_t const bufferLength) {
1934 DCHECK_LE(bufferLength, std::numeric_limits<unsigned>::max());
1935 DCHECK_EQ(stream->avail_out, 0);
1937 auto buf = IOBuf::create(bufferLength);
1938 buf->append(buf->capacity());
1940 stream->next_out = reinterpret_cast<char*>(buf->writableData());
1941 stream->avail_out = buf->length();
1946 std::unique_ptr<IOBuf> Bzip2Codec::doCompress(IOBuf const* data) {
1947 bz_stream stream = createBzStream();
1948 bzCheck(BZ2_bzCompressInit(&stream, level_, 0, 0));
1950 bzCheck(BZ2_bzCompressEnd(&stream));
1953 uint64_t const uncompressedLength = data->computeChainDataLength();
1954 uint64_t const maxCompressedLen = maxCompressedLength(uncompressedLength);
1955 uint64_t constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MiB
1956 uint64_t constexpr kDefaultBufferLength = uint64_t(4) << 20;
1958 auto out = addOutputBuffer(
1960 maxCompressedLen <= kMaxSingleStepLength ? maxCompressedLen
1961 : kDefaultBufferLength);
1963 for (auto range : *data) {
1964 while (!range.empty()) {
1965 auto const inSize = std::min<size_t>(range.size(), kMaxSingleStepLength);
1967 const_cast<char*>(reinterpret_cast<char const*>(range.data()));
1968 stream.avail_in = inSize;
1970 if (stream.avail_out == 0) {
1971 out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
1974 bzCheck(BZ2_bzCompress(&stream, BZ_RUN));
1975 range.uncheckedAdvance(inSize - stream.avail_in);
1979 if (stream.avail_out == 0) {
1980 out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
1982 } while (bzCheck(BZ2_bzCompress(&stream, BZ_FINISH)) != BZ_STREAM_END);
1984 out->prev()->trimEnd(stream.avail_out);
1989 std::unique_ptr<IOBuf> Bzip2Codec::doUncompress(
1991 Optional<uint64_t> uncompressedLength) {
1992 bz_stream stream = createBzStream();
1993 bzCheck(BZ2_bzDecompressInit(&stream, 0, 0));
1995 bzCheck(BZ2_bzDecompressEnd(&stream));
1998 uint64_t constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MiB
1999 uint64_t const kBlockSize = uint64_t(100) << 10; // 100 KiB
2000 uint64_t const kDefaultBufferLength =
2001 computeBufferLength(data->computeChainDataLength(), kBlockSize);
2003 auto out = addOutputBuffer(
2005 ((uncompressedLength && *uncompressedLength <= kMaxSingleStepLength)
2006 ? *uncompressedLength
2007 : kDefaultBufferLength));
2010 for (auto range : *data) {
2011 while (!range.empty()) {
2012 auto const inSize = std::min<size_t>(range.size(), kMaxSingleStepLength);
2014 const_cast<char*>(reinterpret_cast<char const*>(range.data()));
2015 stream.avail_in = inSize;
2017 if (stream.avail_out == 0) {
2018 out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
2021 rc = bzCheck(BZ2_bzDecompress(&stream));
2022 range.uncheckedAdvance(inSize - stream.avail_in);
2025 while (rc != BZ_STREAM_END) {
2026 if (stream.avail_out == 0) {
2027 out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
2029 size_t const outputSize = stream.avail_out;
2030 rc = bzCheck(BZ2_bzDecompress(&stream));
2031 if (outputSize == stream.avail_out) {
2032 throw std::runtime_error("Bzip2Codec: Truncated input");
2036 out->prev()->trimEnd(stream.avail_out);
2038 uint64_t const totalOut =
2039 (uint64_t(stream.total_out_hi32) << 32) + stream.total_out_lo32;
2040 if (uncompressedLength && uncompressedLength != totalOut) {
2041 throw std::runtime_error("Bzip2 error: Invalid uncompressed length");
2047 #endif // FOLLY_HAVE_LIBBZ2
2050 * Automatic decompression
2052 class AutomaticCodec final : public Codec {
2054 static std::unique_ptr<Codec> create(
2055 std::vector<std::unique_ptr<Codec>> customCodecs);
2056 explicit AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs);
2058 std::vector<std::string> validPrefixes() const override;
2059 bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
2063 bool doNeedsUncompressedLength() const override;
2064 uint64_t doMaxUncompressedLength() const override;
2066 uint64_t doMaxCompressedLength(uint64_t) const override {
2067 throw std::runtime_error(
2068 "AutomaticCodec error: maxCompressedLength() not supported.");
2070 std::unique_ptr<IOBuf> doCompress(const IOBuf*) override {
2071 throw std::runtime_error("AutomaticCodec error: compress() not supported.");
2073 std::unique_ptr<IOBuf> doUncompress(
2075 Optional<uint64_t> uncompressedLength) override;
2077 void addCodecIfSupported(CodecType type);
2079 // Throws iff the codecs aren't compatible (very slow)
2080 void checkCompatibleCodecs() const;
2082 std::vector<std::unique_ptr<Codec>> codecs_;
2083 bool needsUncompressedLength_;
2084 uint64_t maxUncompressedLength_;
2087 std::vector<std::string> AutomaticCodec::validPrefixes() const {
2088 std::unordered_set<std::string> prefixes;
2089 for (const auto& codec : codecs_) {
2090 const auto codecPrefixes = codec->validPrefixes();
2091 prefixes.insert(codecPrefixes.begin(), codecPrefixes.end());
2093 return std::vector<std::string>{prefixes.begin(), prefixes.end()};
2096 bool AutomaticCodec::canUncompress(
2098 Optional<uint64_t> uncompressedLength) const {
2102 [data, uncompressedLength](std::unique_ptr<Codec> const& codec) {
2103 return codec->canUncompress(data, uncompressedLength);
2107 void AutomaticCodec::addCodecIfSupported(CodecType type) {
2108 const bool present = std::any_of(
2111 [&type](std::unique_ptr<Codec> const& codec) {
2112 return codec->type() == type;
2114 if (hasCodec(type) && !present) {
2115 codecs_.push_back(getCodec(type));
2119 /* static */ std::unique_ptr<Codec> AutomaticCodec::create(
2120 std::vector<std::unique_ptr<Codec>> customCodecs) {
2121 return std::make_unique<AutomaticCodec>(std::move(customCodecs));
2124 AutomaticCodec::AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs)
2125 : Codec(CodecType::USER_DEFINED), codecs_(std::move(customCodecs)) {
2126 // Fastest -> slowest
2127 addCodecIfSupported(CodecType::LZ4_FRAME);
2128 addCodecIfSupported(CodecType::ZSTD);
2129 addCodecIfSupported(CodecType::ZLIB);
2130 addCodecIfSupported(CodecType::GZIP);
2131 addCodecIfSupported(CodecType::LZMA2);
2132 addCodecIfSupported(CodecType::BZIP2);
2134 checkCompatibleCodecs();
2136 // Check that none of the codes are are null
2137 DCHECK(std::none_of(
2138 codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
2139 return codec == nullptr;
2142 needsUncompressedLength_ = std::any_of(
2143 codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
2144 return codec->needsUncompressedLength();
2147 const auto it = std::max_element(
2150 [](std::unique_ptr<Codec> const& lhs, std::unique_ptr<Codec> const& rhs) {
2151 return lhs->maxUncompressedLength() < rhs->maxUncompressedLength();
2153 DCHECK(it != codecs_.end());
2154 maxUncompressedLength_ = (*it)->maxUncompressedLength();
2157 void AutomaticCodec::checkCompatibleCodecs() const {
2158 // Keep track of all the possible headers.
2159 std::unordered_set<std::string> headers;
2160 // The empty header is not allowed.
2163 // Construct a set of headers and check that none of the headers occur twice.
2164 // Eliminate edge cases.
2165 for (auto&& codec : codecs_) {
2166 const auto codecHeaders = codec->validPrefixes();
2167 // Codecs without any valid headers are not allowed.
2168 if (codecHeaders.empty()) {
2169 throw std::invalid_argument{
2170 "AutomaticCodec: validPrefixes() must not be empty."};
2172 // Insert all the headers for the current codec.
2173 const size_t beforeSize = headers.size();
2174 headers.insert(codecHeaders.begin(), codecHeaders.end());
2175 // Codecs are not compatible if any header occurred twice.
2176 if (beforeSize + codecHeaders.size() != headers.size()) {
2177 throw std::invalid_argument{
2178 "AutomaticCodec: Two valid prefixes collide."};
2182 // Check if any strict non-empty prefix of any header is a header.
2183 for (const auto& header : headers) {
2184 for (size_t i = 1; i < header.size(); ++i) {
2185 if (headers.count(header.substr(0, i))) {
2186 throw std::invalid_argument{
2187 "AutomaticCodec: One valid prefix is a prefix of another valid "
2194 bool AutomaticCodec::doNeedsUncompressedLength() const {
2195 return needsUncompressedLength_;
2198 uint64_t AutomaticCodec::doMaxUncompressedLength() const {
2199 return maxUncompressedLength_;
2202 std::unique_ptr<IOBuf> AutomaticCodec::doUncompress(
2204 Optional<uint64_t> uncompressedLength) {
2205 for (auto&& codec : codecs_) {
2206 if (codec->canUncompress(data, uncompressedLength)) {
2207 return codec->uncompress(data, uncompressedLength);
2210 throw std::runtime_error("AutomaticCodec error: Unknown compressed data");
2213 using CodecFactory = std::unique_ptr<Codec> (*)(int, CodecType);
2214 using StreamCodecFactory = std::unique_ptr<StreamCodec> (*)(int, CodecType);
2217 StreamCodecFactory stream;
2221 codecFactories[static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
2223 {NoCompressionCodec::create, nullptr},
2225 #if FOLLY_HAVE_LIBLZ4
2226 {LZ4Codec::create, nullptr},
2231 #if FOLLY_HAVE_LIBSNAPPY
2232 {SnappyCodec::create, nullptr},
2238 {ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream},
2243 #if FOLLY_HAVE_LIBLZ4
2244 {LZ4Codec::create, nullptr},
2249 #if FOLLY_HAVE_LIBLZMA
2250 {LZMA2Codec::create, nullptr},
2251 {LZMA2Codec::create, nullptr},
2257 #if FOLLY_HAVE_LIBZSTD
2258 {ZSTDStreamCodec::createCodec, ZSTDStreamCodec::createStream},
2264 {ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream},
2269 #if (FOLLY_HAVE_LIBLZ4 && LZ4_VERSION_NUMBER >= 10301)
2270 {LZ4FrameCodec::create, nullptr},
2275 #if FOLLY_HAVE_LIBBZ2
2276 {Bzip2Codec::create, nullptr},
2282 Factory const& getFactory(CodecType type) {
2283 size_t const idx = static_cast<size_t>(type);
2284 if (idx >= static_cast<size_t>(CodecType::NUM_CODEC_TYPES)) {
2285 throw std::invalid_argument(
2286 to<std::string>("Compression type ", idx, " invalid"));
2288 return codecFactories[idx];
2292 bool hasCodec(CodecType type) {
2293 return getFactory(type).codec != nullptr;
2296 std::unique_ptr<Codec> getCodec(CodecType type, int level) {
2297 auto const factory = getFactory(type).codec;
2299 throw std::invalid_argument(
2300 to<std::string>("Compression type ", type, " not supported"));
2302 auto codec = (*factory)(level, type);
2303 DCHECK(codec->type() == type);
2307 bool hasStreamCodec(CodecType type) {
2308 return getFactory(type).stream != nullptr;
2311 std::unique_ptr<StreamCodec> getStreamCodec(CodecType type, int level) {
2312 auto const factory = getFactory(type).stream;
2314 throw std::invalid_argument(
2315 to<std::string>("Compression type ", type, " not supported"));
2317 auto codec = (*factory)(level, type);
2318 DCHECK(codec->type() == type);
2322 std::unique_ptr<Codec> getAutoUncompressionCodec(
2323 std::vector<std::unique_ptr<Codec>> customCodecs) {
2324 return AutomaticCodec::create(std::move(customCodecs));