From: Felix Handte Date: Wed, 15 Mar 2017 15:48:25 +0000 (-0700) Subject: Add String Support to Compression Codec X-Git-Tag: v2017.03.20.00~8 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a5093cb5789077f87204ddf9b0c7c9f261ea9e6c;p=folly.git Add String Support to Compression Codec Summary: This diff adds string-based compress and uncompress support to folly::io::Codec. By default, the string API is implemented by translating to and from IOBufs. Room is left for individual implementations to override that behavior and go straight to the underlying library. Reviewed By: yfeldblum Differential Revision: D4690770 fbshipit-source-id: 7e2615da9add8328654505cdda6a2c79583b4558 --- diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp index 1beea9cc..27fedd81 100644 --- a/folly/io/Compression.cpp +++ b/folly/io/Compression.cpp @@ -56,13 +56,26 @@ std::unique_ptr Codec::compress(const IOBuf* data) { uint64_t len = data->computeChainDataLength(); if (len == 0) { return IOBuf::create(0); - } else if (len > maxUncompressedLength()) { + } + if (len > maxUncompressedLength()) { throw std::runtime_error("Codec: uncompressed length too large"); } return doCompress(data); } +std::string Codec::compress(const StringPiece data) { + const uint64_t len = data.size(); + if (len == 0) { + return ""; + } + if (len > maxUncompressedLength()) { + throw std::runtime_error("Codec: uncompressed length too large"); + } + + return doCompressString(data); +} + std::unique_ptr Codec::uncompress(const IOBuf* data, uint64_t uncompressedLength) { if (uncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH) { @@ -84,6 +97,28 @@ std::unique_ptr Codec::uncompress(const IOBuf* data, return doUncompress(data, uncompressedLength); } +std::string Codec::uncompress( + const StringPiece data, + uint64_t uncompressedLength) { + if (uncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH) { + if (needsUncompressedLength()) { + throw std::invalid_argument("Codec: uncompressed length required"); + } + } else if (uncompressedLength > maxUncompressedLength()) { + throw std::runtime_error("Codec: uncompressed length too large"); + } + + if (data.empty()) { + if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH && + uncompressedLength != 0) { + throw std::runtime_error("Codec: invalid uncompressed length"); + } + return ""; + } + + return doUncompressString(data, uncompressedLength); +} + bool Codec::needsUncompressedLength() const { return doNeedsUncompressedLength(); } @@ -100,6 +135,30 @@ uint64_t Codec::doMaxUncompressedLength() const { return UNLIMITED_UNCOMPRESSED_LENGTH; } +std::string Codec::doCompressString(const StringPiece data) { + const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data}; + auto outputBuffer = doCompress(&inputBuffer); + std::string output; + output.reserve(outputBuffer->computeChainDataLength()); + for (auto range : *outputBuffer) { + output.append(reinterpret_cast(range.data()), range.size()); + } + return output; +} + +std::string Codec::doUncompressString( + const StringPiece data, + uint64_t uncompressedLength) { + const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data}; + auto outputBuffer = doUncompress(&inputBuffer, uncompressedLength); + std::string output; + output.reserve(outputBuffer->computeChainDataLength()); + for (auto range : *outputBuffer) { + output.append(reinterpret_cast(range.data()), range.size()); + } + return output; +} + namespace { /** diff --git a/folly/io/Compression.h b/folly/io/Compression.h index 78161f31..05ce06d1 100644 --- a/folly/io/Compression.h +++ b/folly/io/Compression.h @@ -20,6 +20,7 @@ #include #include +#include #include /** @@ -119,6 +120,13 @@ class Codec { */ std::unique_ptr compress(const folly::IOBuf* data); + /** + * Compresses data. May involve additional copies compared to the overload + * that takes and returns IOBufs. Has the same error semantics as the IOBuf + * version. + */ + std::string compress(StringPiece data); + /** * Uncompress data. Throws std::runtime_error on decompression error. * @@ -138,6 +146,15 @@ class Codec { const IOBuf* data, uint64_t uncompressedLength = UNKNOWN_UNCOMPRESSED_LENGTH); + /** + * Uncompresses data. May involve additional copies compared to the overload + * that takes and returns IOBufs. Has the same error semantics as the IOBuf + * version. + */ + std::string uncompress( + StringPiece data, + uint64_t uncompressedLength = UNKNOWN_UNCOMPRESSED_LENGTH); + protected: explicit Codec(CodecType type); @@ -149,6 +166,14 @@ class Codec { virtual std::unique_ptr doCompress(const folly::IOBuf* data) = 0; virtual std::unique_ptr doUncompress(const folly::IOBuf* data, uint64_t uncompressedLength) = 0; + // default: an implementation is provided by default to wrap the strings into + // IOBufs and delegate to the IOBuf methods. This incurs a copy of the output + // from IOBuf to string. Implementers, at their discretion, can override + // these methods to avoid the copy. + virtual std::string doCompressString(StringPiece data); + virtual std::string doUncompressString( + StringPiece data, + uint64_t uncompressedLength); CodecType type_; }; diff --git a/folly/io/test/CompressionTest.cpp b/folly/io/test/CompressionTest.cpp index cfd3cad3..46f40731 100644 --- a/folly/io/test/CompressionTest.cpp +++ b/folly/io/test/CompressionTest.cpp @@ -143,7 +143,9 @@ class CompressionTest codec_ = getCodec(std::tr1::get<2>(tup)); } - void runSimpleTest(const DataHolder& dh); + void runSimpleIOBufTest(const DataHolder& dh); + + void runSimpleStringTest(const DataHolder& dh); private: std::unique_ptr split(std::unique_ptr data) const; @@ -153,7 +155,7 @@ class CompressionTest std::unique_ptr codec_; }; -void CompressionTest::runSimpleTest(const DataHolder& dh) { +void CompressionTest::runSimpleIOBufTest(const DataHolder& dh) { const auto original = split(IOBuf::wrapBuffer(dh.data(uncompressedLength_))); const auto compressed = split(codec_->compress(original.get())); if (!codec_->needsUncompressedLength()) { @@ -169,6 +171,23 @@ void CompressionTest::runSimpleTest(const DataHolder& dh) { } } +void CompressionTest::runSimpleStringTest(const DataHolder& dh) { + const auto original = std::string( + reinterpret_cast(dh.data(uncompressedLength_).data()), + uncompressedLength_); + const auto compressed = codec_->compress(original); + if (!codec_->needsUncompressedLength()) { + auto uncompressed = codec_->uncompress(compressed); + EXPECT_EQ(uncompressedLength_, uncompressed.length()); + EXPECT_EQ(uncompressed, original); + } + { + auto uncompressed = codec_->uncompress(compressed, uncompressedLength_); + EXPECT_EQ(uncompressedLength_, uncompressed.length()); + EXPECT_EQ(uncompressed, original); + } +} + // Uniformly split data into (potentially empty) chunks. std::unique_ptr CompressionTest::split( std::unique_ptr data) const { @@ -196,11 +215,19 @@ std::unique_ptr CompressionTest::split( } TEST_P(CompressionTest, RandomData) { - runSimpleTest(randomDataHolder); + runSimpleIOBufTest(randomDataHolder); } TEST_P(CompressionTest, ConstantData) { - runSimpleTest(constantDataHolder); + runSimpleIOBufTest(constantDataHolder); +} + +TEST_P(CompressionTest, RandomDataString) { + runSimpleStringTest(randomDataHolder); +} + +TEST_P(CompressionTest, ConstantDataString) { + runSimpleStringTest(constantDataHolder); } INSTANTIATE_TEST_CASE_P(