From: Nick Terrell <terrelln@fb.com>
Date: Tue, 21 Mar 2017 18:13:36 +0000 (-0700)
Subject: Add IOBuf::cloneCoalesced()
X-Git-Tag: v2017.03.27.00~19
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=630673beeb35e790ed27c38ea5abe1f5436cb08e;p=folly.git

Add IOBuf::cloneCoalesced()

Summary:
`LZ4Codec::doCompress()` and `doUncompress()` need to coalesce a `const IOBuf*`, so they `clone()` and then `coalesce()`.
It is more efficient to do this in one step, as shown by the benchmarks.

Reviewed By: yfeldblum, simpkins

Differential Revision: D4727802

fbshipit-source-id: f8d5f6187f2ee804301232b3c75b848330169fc5
---

diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp
index 27fedd81..c626be74 100644
--- a/folly/io/Compression.cpp
+++ b/folly/io/Compression.cpp
@@ -303,12 +303,11 @@ uint64_t LZ4Codec::doMaxUncompressedLength() const {
 }
 
 std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
-  std::unique_ptr<IOBuf> clone;
+  IOBuf clone;
   if (data->isChained()) {
     // LZ4 doesn't support streaming, so we have to coalesce
-    clone = data->clone();
-    clone->coalesce();
-    data = clone.get();
+    clone = data->cloneCoalescedAsValue();
+    data = &clone;
   }
 
   uint32_t extraSize = encodeSize() ? kMaxVarintLength64 : 0;
@@ -345,12 +344,11 @@ std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
 std::unique_ptr<IOBuf> LZ4Codec::doUncompress(
     const IOBuf* data,
     uint64_t uncompressedLength) {
-  std::unique_ptr<IOBuf> clone;
+  IOBuf clone;
   if (data->isChained()) {
     // LZ4 doesn't support streaming, so we have to coalesce
-    clone = data->clone();
-    clone->coalesce();
-    data = clone.get();
+    clone = data->cloneCoalescedAsValue();
+    data = &clone;
   }
 
   folly::io::Cursor cursor(data);
diff --git a/folly/io/IOBuf.cpp b/folly/io/IOBuf.cpp
index 2a295aa8..571e772d 100644
--- a/folly/io/IOBuf.cpp
+++ b/folly/io/IOBuf.cpp
@@ -513,6 +513,10 @@ unique_ptr<IOBuf> IOBuf::cloneOne() const {
   return make_unique<IOBuf>(cloneOneAsValue());
 }
 
+unique_ptr<IOBuf> IOBuf::cloneCoalesced() const {
+  return make_unique<IOBuf>(cloneCoalescedAsValue());
+}
+
 IOBuf IOBuf::cloneAsValue() const {
   auto tmp = cloneOneAsValue();
 
@@ -537,6 +541,36 @@ IOBuf IOBuf::cloneOneAsValue() const {
       length_);
 }
 
+IOBuf IOBuf::cloneCoalescedAsValue() const {
+  if (!isChained()) {
+    return cloneOneAsValue();
+  }
+  // Coalesce into newBuf
+  const uint64_t newLength = computeChainDataLength();
+  const uint64_t newHeadroom = headroom();
+  const uint64_t newTailroom = prev()->tailroom();
+  const uint64_t newCapacity = newLength + newHeadroom + newTailroom;
+  IOBuf newBuf{CREATE, newCapacity};
+  newBuf.advance(newHeadroom);
+
+  auto current = this;
+  do {
+    if (current->length() > 0) {
+      DCHECK_NOTNULL(current->data());
+      DCHECK_LE(current->length(), newBuf.tailroom());
+      memcpy(newBuf.writableTail(), current->data(), current->length());
+      newBuf.append(current->length());
+    }
+    current = current->next();
+  } while (current != this);
+
+  DCHECK_EQ(newLength, newBuf.length());
+  DCHECK_EQ(newHeadroom, newBuf.headroom());
+  DCHECK_LE(newTailroom, newBuf.tailroom());
+
+  return newBuf;
+}
+
 void IOBuf::unshareOneSlow() {
   // Allocate a new buffer for the data
   uint8_t* buf;
diff --git a/folly/io/IOBuf.h b/folly/io/IOBuf.h
index 87a3ec9e..15f8839c 100644
--- a/folly/io/IOBuf.h
+++ b/folly/io/IOBuf.h
@@ -1123,6 +1123,26 @@ class IOBuf {
    */
   IOBuf cloneOneAsValue() const;
 
+  /**
+   * Return a new unchained IOBuf that may share the same data as this chain.
+   *
+   * If the IOBuf chain is not chained then the new IOBuf will point to the same
+   * underlying data buffer as the original chain. Otherwise, it will clone and
+   * coalesce the IOBuf chain.
+   *
+   * The new IOBuf will have at least as much headroom as the first IOBuf in the
+   * chain, and at least as much tailroom as the last IOBuf in the chain.
+   *
+   * Throws std::bad_alloc on error.
+   */
+  std::unique_ptr<IOBuf> cloneCoalesced() const;
+
+  /**
+   * Similar to cloneCoalesced(). But returns IOBuf by value rather than
+   * heap-allocating it.
+   */
+  IOBuf cloneCoalescedAsValue() const;
+
   /**
    * Similar to Clone(). But use other as the head node. Other nodes in the
    * chain (if any) will be allocted on heap.
diff --git a/folly/io/test/IOBufBenchmark.cpp b/folly/io/test/IOBufBenchmark.cpp
index 2298ddbb..1fa52b18 100644
--- a/folly/io/test/IOBufBenchmark.cpp
+++ b/folly/io/test/IOBufBenchmark.cpp
@@ -70,6 +70,23 @@ BENCHMARK(copyBenchmark, iters) {
   }
 }
 
+BENCHMARK(cloneCoalescedBaseline, iters) {
+  std::unique_ptr<IOBuf> buf = IOBuf::createChain(100, 10);
+  while (iters--) {
+    auto clone = buf->cloneAsValue();
+    clone.coalesce();
+    folly::doNotOptimizeAway(clone.capacity());
+  }
+}
+
+BENCHMARK_RELATIVE(cloneCoalescedBenchmark, iters) {
+  std::unique_ptr<IOBuf> buf = IOBuf::createChain(100, 10);
+  while (iters--) {
+    auto copy = buf->cloneCoalescedAsValue();
+    folly::doNotOptimizeAway(copy.capacity());
+  }
+}
+
 /**
  * ============================================================================
  * folly/io/test/IOBufBenchmark.cpp                relative  time/iter  iters/s
@@ -80,8 +97,10 @@ BENCHMARK(copyBenchmark, iters) {
  * cloneIntoBenchmark                                          30.03ns   33.30M
  * moveBenchmark                                               15.35ns   65.14M
  * copyBenchmark                                               33.63ns   29.73M
+ * cloneCoalescedBaseline                                     344.33ns    2.90M
+ * cloneCoalescedBenchmark                          605.62%    56.86ns   17.59M
  * ============================================================================
-*/
+ */
 
 int main(int argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
diff --git a/folly/io/test/IOBufTest.cpp b/folly/io/test/IOBufTest.cpp
index 6e0cf441..e33287bb 100644
--- a/folly/io/test/IOBufTest.cpp
+++ b/folly/io/test/IOBufTest.cpp
@@ -531,6 +531,15 @@ TEST(IOBuf, Chaining) {
   gen.seed(fillSeed);
   checkChain(chainClone.get(), gen);
 
+  // cloneCoalesced
+  {
+    auto chainCloneCoalesced = chainClone->cloneCoalesced();
+    EXPECT_EQ(1, chainCloneCoalesced->countChainElements());
+    EXPECT_EQ(fullLength, chainCloneCoalesced->computeChainDataLength());
+    gen.seed(fillSeed);
+    checkChain(chainCloneCoalesced.get(), gen);
+  }
+
   // Coalesce the entire chain
   chainClone->coalesce();
   EXPECT_EQ(1, chainClone->countChainElements());
@@ -1343,3 +1352,45 @@ TEST(IOBuf, CoalesceEmptyBuffers) {
 
   EXPECT_TRUE(ByteRange(StringPiece("hello")) == br);
 }
+
+TEST(IOBuf, CloneCoalescedChain) {
+  auto b = IOBuf::createChain(1000, 100);
+  b->advance(10);
+  const uint32_t fillSeed = 0x12345678;
+  boost::mt19937 gen(fillSeed);
+  {
+    auto c = b.get();
+    uint64_t length = c->tailroom();
+    do {
+      length = std::min(length, c->tailroom());
+      c->append(length--);
+      fillBuf(c, gen);
+      c = c->next();
+    } while (c != b.get());
+  }
+  auto c = b->cloneCoalescedAsValue();
+  EXPECT_FALSE(c.isChained()); // Not chained
+  EXPECT_FALSE(c.isSharedOne()); // Not shared
+  EXPECT_EQ(b->headroom(), c.headroom()); // Preserves headroom
+  EXPECT_LE(b->prev()->tailroom(), c.tailroom()); // Preserves minimum tailroom
+  EXPECT_EQ(b->computeChainDataLength(), c.length()); // Same length
+  gen.seed(fillSeed);
+  checkBuf(&c, gen); // Same contents
+}
+
+TEST(IOBuf, CloneCoalescedSingle) {
+  auto b = IOBuf::create(1000);
+  b->advance(10);
+  b->append(900);
+  const uint32_t fillSeed = 0x12345678;
+  boost::mt19937 gen(fillSeed);
+  fillBuf(b.get(), gen);
+
+  auto c = b->cloneCoalesced();
+  EXPECT_FALSE(c->isChained()); // Not chained
+  EXPECT_TRUE(c->isSharedOne()); // Shared
+  EXPECT_EQ(b->buffer(), c->buffer());
+  EXPECT_EQ(b->capacity(), c->capacity());
+  EXPECT_EQ(b->data(), c->data());
+  EXPECT_EQ(b->length(), c->length());
+}