Summary:
`ZlibCodec::doUncompress()` always allocates at least 4 MiB when the uncompressed size is unkown.
We can get a 10-20% speed boost for small inputs (as well as significant memory savings) if we dynamically choose the buffer size.
Benchmarks before the change:
============================================================================
experimental/terrelln/CompressionBenchmark.cpp relative time/iter iters/s
============================================================================
uncompress(ZLIB_100) 2.96us 338.19K
uncompress(ZLIB_1000) 7.22us 138.44K
uncompress(ZLIB_10000) 36.86us 27.13K
uncompress(ZLIB_100000) 299.70us 3.34K
uncompress(ZLIB_1000000) 2.74ms 365.32
uncompress(ZLIB_10000000) 26.91ms 37.16
uncompress(ZLIB_100000000) 273.92ms 3.65
============================================================================
After:
============================================================================
experimental/terrelln/CompressionBenchmark.cpp relative time/iter iters/s
============================================================================
uncompress(ZLIB_100) 2.34us 426.68K
uncompress(ZLIB_1000) 6.24us 160.38K
uncompress(ZLIB_10000) 35.52us 28.16K
uncompress(ZLIB_100000) 296.30us 3.37K
uncompress(ZLIB_1000000) 2.74ms 365.25
uncompress(ZLIB_10000000) 27.16ms 36.82
uncompress(ZLIB_100000000) 273.72ms 3.65
============================================================================
Reviewed By: yfeldblum
Differential Revision:
D4783871
fbshipit-source-id:
0fcce191ea520fd2117551db679c86f5ff734893
return out;
}
+static uint64_t computeBufferLength(uint64_t const compressedLength) {
+ constexpr uint64_t kMaxBufferLength = uint64_t(4) << 20; // 4 MiB
+ constexpr uint64_t kBlockSize = uint64_t(32) << 10; // 32 KiB
+ const uint64_t goodBufferSize = 4 * std::max(kBlockSize, compressedLength);
+ return std::min(goodBufferSize, kMaxBufferLength);
+}
+
std::unique_ptr<IOBuf> ZlibCodec::doUncompress(const IOBuf* data,
uint64_t uncompressedLength) {
z_stream stream;
};
// Max 64MiB in one go
- constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
- constexpr uint32_t defaultBufferLength = uint32_t(4) << 20; // 4MiB
+ constexpr uint64_t maxSingleStepLength = uint64_t(64) << 20; // 64MiB
+ const uint64_t defaultBufferLength =
+ computeBufferLength(data->computeChainDataLength());
auto out = addOutputBuffer(
&stream,