#include "folly/Memory.h"
#include "folly/Portability.h"
#include "folly/ScopeGuard.h"
+#include "folly/Varint.h"
#include "folly/io/Cursor.h"
namespace folly { namespace io {
+Codec::Codec(CodecType type) : type_(type) { }
+
// Ensure consistent behavior in the nullptr case
std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
return !data->empty() ? doCompress(data) : IOBuf::create(0);
return doMaxUncompressedLength();
}
-CodecType Codec::type() const {
- return doType();
-}
-
bool Codec::doNeedsUncompressedLength() const {
return false;
}
*/
class NoCompressionCodec FOLLY_FINAL : public Codec {
public:
- static std::unique_ptr<Codec> create(int level);
- explicit NoCompressionCodec(int level);
+ static std::unique_ptr<Codec> create(int level, CodecType type);
+ explicit NoCompressionCodec(int level, CodecType type);
private:
- CodecType doType() const FOLLY_OVERRIDE;
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
uint64_t uncompressedLength) FOLLY_OVERRIDE;
};
-std::unique_ptr<Codec> NoCompressionCodec::create(int level) {
- return make_unique<NoCompressionCodec>(level);
+std::unique_ptr<Codec> NoCompressionCodec::create(int level, CodecType type) {
+ return make_unique<NoCompressionCodec>(level, type);
}
-NoCompressionCodec::NoCompressionCodec(int level) {
+NoCompressionCodec::NoCompressionCodec(int level, CodecType type)
+ : Codec(type) {
+ DCHECK(type == CodecType::NO_COMPRESSION);
switch (level) {
case COMPRESSION_LEVEL_DEFAULT:
case COMPRESSION_LEVEL_FASTEST:
}
}
-CodecType NoCompressionCodec::doType() const {
- return CodecType::NO_COMPRESSION;
-}
-
std::unique_ptr<IOBuf> NoCompressionCodec::doCompress(
const IOBuf* data) {
return data->clone();
*/
class LZ4Codec FOLLY_FINAL : public Codec {
public:
- static std::unique_ptr<Codec> create(int level);
- explicit LZ4Codec(int level);
+ static std::unique_ptr<Codec> create(int level, CodecType type);
+ explicit LZ4Codec(int level, CodecType type);
private:
bool doNeedsUncompressedLength() const FOLLY_OVERRIDE;
uint64_t doMaxUncompressedLength() const FOLLY_OVERRIDE;
- CodecType doType() const FOLLY_OVERRIDE;
+
+ bool encodeSize() const { return type() == CodecType::LZ4_VARINT_SIZE; }
+
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
bool highCompression_;
};
-std::unique_ptr<Codec> LZ4Codec::create(int level) {
- return make_unique<LZ4Codec>(level);
+std::unique_ptr<Codec> LZ4Codec::create(int level, CodecType type) {
+ return make_unique<LZ4Codec>(level, type);
}
-LZ4Codec::LZ4Codec(int level) {
+LZ4Codec::LZ4Codec(int level, CodecType type) : Codec(type) {
+ DCHECK(type == CodecType::LZ4 || type == CodecType::LZ4_VARINT_SIZE);
+
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
case COMPRESSION_LEVEL_DEFAULT:
}
bool LZ4Codec::doNeedsUncompressedLength() const {
- return true;
+ return !encodeSize();
}
uint64_t LZ4Codec::doMaxUncompressedLength() const {
return 1.8 * (uint64_t(1) << 30);
}
-CodecType LZ4Codec::doType() const {
- return CodecType::LZ4;
+namespace {
+
+void encodeVarintToIOBuf(uint64_t val, folly::IOBuf* out) {
+ DCHECK_GE(out->tailroom(), kMaxVarintLength64);
+ out->append(encodeVarint(val, out->writableTail()));
+}
+
+uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
+ // Must have enough room in *this* buffer.
+ auto p = cursor.peek();
+ folly::ByteRange range(p.first, p.second);
+ uint64_t val = decodeVarint(range);
+ cursor.skip(range.data() - p.first);
+ return val;
}
+} // namespace
+
std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
std::unique_ptr<IOBuf> clone;
if (data->isChained()) {
data = clone.get();
}
- auto out = IOBuf::create(LZ4_compressBound(data->length()));
+ uint32_t extraSize = encodeSize() ? kMaxVarintLength64 : 0;
+ auto out = IOBuf::create(extraSize + LZ4_compressBound(data->length()));
+ if (encodeSize()) {
+ encodeVarintToIOBuf(data->length(), out.get());
+ }
+
int n;
if (highCompression_) {
- n = LZ4_compress(reinterpret_cast<const char*>(data->data()),
- reinterpret_cast<char*>(out->writableTail()),
- data->length());
- } else {
n = LZ4_compressHC(reinterpret_cast<const char*>(data->data()),
reinterpret_cast<char*>(out->writableTail()),
data->length());
+ } else {
+ n = LZ4_compress(reinterpret_cast<const char*>(data->data()),
+ reinterpret_cast<char*>(out->writableTail()),
+ data->length());
}
CHECK_GE(n, 0);
data = clone.get();
}
- auto out = IOBuf::create(uncompressedLength);
- int n = LZ4_uncompress(reinterpret_cast<const char*>(data->data()),
+ folly::io::Cursor cursor(data);
+ uint64_t actualUncompressedLength;
+ if (encodeSize()) {
+ actualUncompressedLength = decodeVarintFromCursor(cursor);
+ if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
+ uncompressedLength != actualUncompressedLength) {
+ throw std::runtime_error("LZ4Codec: invalid uncompressed length");
+ }
+ } else {
+ actualUncompressedLength = uncompressedLength;
+ DCHECK_NE(actualUncompressedLength, UNKNOWN_UNCOMPRESSED_LENGTH);
+ }
+
+ auto out = IOBuf::create(actualUncompressedLength);
+ auto p = cursor.peek();
+ int n = LZ4_uncompress(reinterpret_cast<const char*>(p.first),
reinterpret_cast<char*>(out->writableTail()),
- uncompressedLength);
- if (n != data->length()) {
+ actualUncompressedLength);
+ if (n != p.second) {
throw std::runtime_error(to<std::string>(
"LZ4 decompression returned invalid value ", n));
}
- out->append(uncompressedLength);
+ out->append(actualUncompressedLength);
return out;
}
class SnappyCodec FOLLY_FINAL : public Codec {
public:
- static std::unique_ptr<Codec> create(int level);
- explicit SnappyCodec(int level);
+ static std::unique_ptr<Codec> create(int level, CodecType type);
+ explicit SnappyCodec(int level, CodecType type);
private:
uint64_t doMaxUncompressedLength() const FOLLY_OVERRIDE;
- CodecType doType() const FOLLY_OVERRIDE;
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
uint64_t uncompressedLength) FOLLY_OVERRIDE;
};
-std::unique_ptr<Codec> SnappyCodec::create(int level) {
- return make_unique<SnappyCodec>(level);
+std::unique_ptr<Codec> SnappyCodec::create(int level, CodecType type) {
+ return make_unique<SnappyCodec>(level, type);
}
-SnappyCodec::SnappyCodec(int level) {
+SnappyCodec::SnappyCodec(int level, CodecType type) : Codec(type) {
+ DCHECK(type == CodecType::SNAPPY);
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
case COMPRESSION_LEVEL_DEFAULT:
return std::numeric_limits<uint32_t>::max();
}
-CodecType SnappyCodec::doType() const {
- return CodecType::SNAPPY;
-}
-
std::unique_ptr<IOBuf> SnappyCodec::doCompress(const IOBuf* data) {
IOBufSnappySource source(data);
auto out =
*/
class ZlibCodec FOLLY_FINAL : public Codec {
public:
- static std::unique_ptr<Codec> create(int level);
- explicit ZlibCodec(int level);
+ static std::unique_ptr<Codec> create(int level, CodecType type);
+ explicit ZlibCodec(int level, CodecType type);
private:
- CodecType doType() const FOLLY_OVERRIDE;
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
int level_;
};
-std::unique_ptr<Codec> ZlibCodec::create(int level) {
- return make_unique<ZlibCodec>(level);
+std::unique_ptr<Codec> ZlibCodec::create(int level, CodecType type) {
+ return make_unique<ZlibCodec>(level, type);
}
-ZlibCodec::ZlibCodec(int level) {
+ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) {
+ DCHECK(type == CodecType::ZLIB);
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
level = 1;
level_ = level;
}
-CodecType ZlibCodec::doType() const {
- return CodecType::ZLIB;
-}
-
std::unique_ptr<IOBuf> ZlibCodec::addOutputBuffer(z_stream* stream,
uint32_t length) {
CHECK_EQ(stream->avail_out, 0);
return out;
}
-typedef std::unique_ptr<Codec> (*CodecFactory)(int);
+typedef std::unique_ptr<Codec> (*CodecFactory)(int, CodecType);
CodecFactory gCodecFactories[
static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
+ nullptr, // USER_DEFINED
NoCompressionCodec::create,
LZ4Codec::create,
SnappyCodec::create,
- ZlibCodec::create
+ ZlibCodec::create,
+ LZ4Codec::create
};
} // namespace
throw std::invalid_argument(to<std::string>(
"Compression type ", idx, " not supported"));
}
- auto codec = (*factory)(level);
+ auto codec = (*factory)(level, type);
DCHECK_EQ(static_cast<size_t>(codec->type()), idx);
return codec;
}
namespace folly { namespace io {
enum class CodecType {
+ /**
+ * This codec type is not defined; getCodec() will throw an exception
+ * if used. Useful if deriving your own classes from Codec without
+ * going through the getCodec() interface.
+ */
+ USER_DEFINED = 0,
+
/**
* Use no compression.
* Levels supported: 0
*/
- NO_COMPRESSION = 0,
+ NO_COMPRESSION = 1,
/**
* Use LZ4 compression.
* Levels supported: 1 = fast, 2 = best; default = 1
*/
- LZ4 = 1,
+ LZ4 = 2,
/**
* Use Snappy compression.
* Levels supported: 1
*/
- SNAPPY = 2,
+ SNAPPY = 3,
/**
* Use zlib compression.
* Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
*/
- ZLIB = 3,
+ ZLIB = 4,
- NUM_CODEC_TYPES = 4,
+ /**
+ * Use LZ4 compression, prefixed with size (as Varint).
+ */
+ LZ4_VARINT_SIZE = 5,
+
+ NUM_CODEC_TYPES = 6,
};
class Codec {
/**
* Return the codec's type.
*/
- CodecType type() const;
+ CodecType type() const { return type_; }
/**
* Does this codec need the exact uncompressed length on decompression?
const IOBuf* data,
uint64_t uncompressedLength = UNKNOWN_UNCOMPRESSED_LENGTH);
+ protected:
+ explicit Codec(CodecType type);
+
private:
// default: no limits (save for special value UNKNOWN_UNCOMPRESSED_LENGTH)
virtual uint64_t doMaxUncompressedLength() const;
// default: doesn't need uncompressed length
virtual bool doNeedsUncompressedLength() const;
- virtual CodecType doType() const = 0;
virtual std::unique_ptr<IOBuf> doCompress(const folly::IOBuf* data) = 0;
virtual std::unique_ptr<IOBuf> doUncompress(const folly::IOBuf* data,
uint64_t uncompressedLength) = 0;
+
+ CodecType type_;
};
constexpr int COMPRESSION_LEVEL_FASTEST = -1;
* FASTEST and BEST)
* COMPRESSION_LEVEL_BEST is the best compression (uses most CPU / memory,
* best compression)
+ *
+ * When decompressing, the compression level is ignored. All codecs will
+ * decompress all data compressed with the a codec of the same type, regardless
+ * of compression level.
*/
std::unique_ptr<Codec> getCodec(CodecType type,
int level = COMPRESSION_LEVEL_DEFAULT);