2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <folly/Optional.h>
26 #include <folly/Range.h>
27 #include <folly/io/IOBuf.h>
30 * Compression / decompression over IOBufs
33 namespace folly { namespace io {
35 enum class CodecType {
37 * This codec type is not defined; getCodec() will throw an exception
38 * if used. Useful if deriving your own classes from Codec without
39 * going through the getCodec() interface.
50 * Use LZ4 compression.
51 * Levels supported: 1 = fast, 2 = best; default = 1
56 * Use Snappy compression.
62 * Use zlib compression.
63 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
68 * Use LZ4 compression, prefixed with size (as Varint).
73 * Use LZMA2 compression.
74 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
77 LZMA2_VARINT_SIZE = 7,
80 * Use ZSTD compression.
85 * Use gzip compression. This is the same compression algorithm as ZLIB but
86 * gzip-compressed files tend to be easier to work with from the command line.
87 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
92 * Use LZ4 frame compression.
93 * Levels supported: 0 = fast, 16 = best; default = 0
98 * Use bzip2 compression.
99 * Levels supported: 1 = fast, 9 = best; default = 9
103 NUM_CODEC_TYPES = 12,
111 * Return the maximum length of data that may be compressed with this codec.
112 * NO_COMPRESSION and ZLIB support arbitrary lengths;
113 * LZ4 supports up to 1.9GiB; SNAPPY supports up to 4GiB.
114 * May return UNLIMITED_UNCOMPRESSED_LENGTH (uint64_t(-1)) if unlimited.
116 uint64_t maxUncompressedLength() const;
119 * Return the codec's type.
121 CodecType type() const { return type_; }
124 * Does this codec need the exact uncompressed length on decompression?
126 bool needsUncompressedLength() const;
129 * Compress data, returning an IOBuf (which may share storage with data).
130 * Throws std::invalid_argument if data is larger than
131 * maxUncompressedLength().
133 * Regardless of the behavior of the underlying compressor, compressing
134 * an empty IOBuf chain will return an empty IOBuf chain.
136 std::unique_ptr<IOBuf> compress(const folly::IOBuf* data);
139 * Compresses data. May involve additional copies compared to the overload
140 * that takes and returns IOBufs. Has the same error semantics as the IOBuf
143 std::string compress(StringPiece data);
146 * Uncompress data. Throws std::runtime_error on decompression error.
148 * Some codecs (LZ4) require the exact uncompressed length; this is indicated
149 * by needsUncompressedLength().
151 * For other codes (zlib), knowing the exact uncompressed length ahead of
152 * time might be faster.
154 * Regardless of the behavior of the underlying compressor, uncompressing
155 * an empty IOBuf chain will return an empty IOBuf chain.
157 static constexpr uint64_t UNLIMITED_UNCOMPRESSED_LENGTH = uint64_t(-1);
159 std::unique_ptr<IOBuf> uncompress(
161 folly::Optional<uint64_t> uncompressedLength = folly::none);
164 * Uncompresses data. May involve additional copies compared to the overload
165 * that takes and returns IOBufs. Has the same error semantics as the IOBuf
168 std::string uncompress(
170 folly::Optional<uint64_t> uncompressedLength = folly::none);
173 explicit Codec(CodecType type);
177 * Returns a superset of the set of prefixes for which canUncompress() will
178 * return true. A superset is allowed for optimizations in canUncompress()
179 * based on other knowledge such as length. None of the prefixes may be empty.
180 * default: No prefixes.
182 virtual std::vector<std::string> validPrefixes() const;
185 * Returns true if the codec thinks it can uncompress the data.
186 * If a codec doesn't have magic bytes at the beginning, like LZ4 and Snappy,
187 * it can always return false.
188 * default: Returns false.
190 virtual bool canUncompress(
191 const folly::IOBuf* data,
192 folly::Optional<uint64_t> uncompressedLength = folly::none) const;
195 // default: no limits (save for special value UNKNOWN_UNCOMPRESSED_LENGTH)
196 virtual uint64_t doMaxUncompressedLength() const;
197 // default: doesn't need uncompressed length
198 virtual bool doNeedsUncompressedLength() const;
199 virtual std::unique_ptr<IOBuf> doCompress(const folly::IOBuf* data) = 0;
200 virtual std::unique_ptr<IOBuf> doUncompress(
201 const folly::IOBuf* data,
202 folly::Optional<uint64_t> uncompressedLength) = 0;
203 // default: an implementation is provided by default to wrap the strings into
204 // IOBufs and delegate to the IOBuf methods. This incurs a copy of the output
205 // from IOBuf to string. Implementers, at their discretion, can override
206 // these methods to avoid the copy.
207 virtual std::string doCompressString(StringPiece data);
208 virtual std::string doUncompressString(
210 folly::Optional<uint64_t> uncompressedLength);
215 constexpr int COMPRESSION_LEVEL_FASTEST = -1;
216 constexpr int COMPRESSION_LEVEL_DEFAULT = -2;
217 constexpr int COMPRESSION_LEVEL_BEST = -3;
220 * Return a codec for the given type. Throws on error. The level
221 * is a non-negative codec-dependent integer indicating the level of
222 * compression desired, or one of the following constants:
224 * COMPRESSION_LEVEL_FASTEST is fastest (uses least CPU / memory,
226 * COMPRESSION_LEVEL_DEFAULT is the default (likely a tradeoff between
228 * COMPRESSION_LEVEL_BEST is the best compression (uses most CPU / memory,
231 * When decompressing, the compression level is ignored. All codecs will
232 * decompress all data compressed with the a codec of the same type, regardless
233 * of compression level.
235 std::unique_ptr<Codec> getCodec(CodecType type,
236 int level = COMPRESSION_LEVEL_DEFAULT);
239 * Returns a codec that can uncompress any of the given codec types as well as
240 * {LZ4_FRAME, ZSTD, ZLIB, GZIP, LZMA2, BZIP2}. Appends each default codec to
241 * customCodecs in order, so long as a codec with the same type() isn't already
242 * present. When uncompress() is called, each codec's canUncompress() is called
243 * in the order that they are given. Appended default codecs are checked last.
244 * uncompress() is called on the first codec whose canUncompress() returns true.
245 * An exception is thrown if no codec canUncompress() the data.
246 * An exception is thrown if the chosen codec's uncompress() throws on the data.
247 * An exception is thrown if compress() is called on the returned codec.
249 * Requirements are checked in debug mode and are as follows:
250 * Let headers be the concatenation of every codec's validPrefixes().
251 * 1. Each codec must override validPrefixes() and canUncompress().
252 * 2. No codec's validPrefixes() may be empty.
253 * 3. No header in headers may be empty.
254 * 4. headers must not contain any duplicate elements.
255 * 5. No strict non-empty prefix of any header in headers may be in headers.
257 std::unique_ptr<Codec> getAutoUncompressionCodec(
258 std::vector<std::unique_ptr<Codec>> customCodecs = {});
261 * Check if a specified codec is supported.
263 bool hasCodec(CodecType type);