From 43300949da6b0714233d968c65cd496cb4fe7c2a Mon Sep 17 00:00:00 2001 From: Andrew Tulloch Date: Mon, 4 Feb 2013 16:54:37 -0800 Subject: [PATCH] unsplit Summary: 1. Incorporates @tjackson's offline comments. 2. See docstrings and examples for basic usage. 3. The usecase this covers (for me and others) was where I have some map or vector of elements, and I want to form a string representation of this (for logging, fb303 exported values, etc.). Various uses have existed in fbcode (e.g. UP2X shard representations), and this seemed like a useful utility. Test Plan: unit tests. Reviewed By: tjackson@fb.com FB internal diff: D696794 --- folly/experimental/StringGen-inl.h | 63 +++++++++- folly/experimental/StringGen.h | 50 +++++++- folly/experimental/test/GenBenchmark.cpp | 143 ++++++++++++++++++----- folly/experimental/test/GenTest.cpp | 35 ++++++ 4 files changed, 261 insertions(+), 30 deletions(-) diff --git a/folly/experimental/StringGen-inl.h b/folly/experimental/StringGen-inl.h index 005381a1..c3fc4d12 100644 --- a/folly/experimental/StringGen-inl.h +++ b/folly/experimental/StringGen-inl.h @@ -18,6 +18,8 @@ #error This file may only be included from folly/experimental/StringGen.h #endif +#include "folly/Conv.h" +#include "folly/String.h" #include "folly/io/IOBuf.h" namespace folly { @@ -151,8 +153,67 @@ class SplitStringSource : public GenImpl { } }; +/** + * Unsplit - For joining tokens from a generator into a string. This is + * the inverse of `split` above. + * + * This type is primarily used through the 'unsplit' function. + */ +template +class Unsplit : public Operator> { + Delimiter delimiter_; + public: + Unsplit(const Delimiter& delimiter) + : delimiter_(delimiter) { + } + + template + Output compose(const GenImpl& source) const { + Output outputBuffer; + UnsplitBuffer unsplitter(delimiter_, &outputBuffer); + unsplitter.compose(source); + return outputBuffer; + } +}; + +/** + * UnsplitBuffer - For joining tokens from a generator into a string, + * and inserting them into a custom buffer. + * + * This type is primarily used through the 'unsplit' function. + */ +template +class UnsplitBuffer : public Operator> { + Delimiter delimiter_; + OutputBuffer* outputBuffer_; + public: + UnsplitBuffer(const Delimiter& delimiter, OutputBuffer* outputBuffer) + : delimiter_(delimiter) + , outputBuffer_(outputBuffer) { + CHECK(outputBuffer); + } + + template + void compose(const GenImpl& source) const { + // If the output buffer is empty, we skip inserting the delimiter for the + // first element. + bool skipDelim = outputBuffer_->empty(); + source | [&](Value v) { + if (skipDelim) { + skipDelim = false; + toAppend(std::forward(v), outputBuffer_); + } else { + toAppend(delimiter_, std::forward(v), outputBuffer_); + } + }; + } +}; + } // namespace detail } // namespace gen } // namespace folly - diff --git a/folly/experimental/StringGen.h b/folly/experimental/StringGen.h index c614cce5..aed03c59 100644 --- a/folly/experimental/StringGen.h +++ b/folly/experimental/StringGen.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2013 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,12 @@ namespace gen { namespace detail { class StringResplitter; class SplitStringSource; + +template +class Unsplit; + +template +class UnsplitBuffer; } // namespace detail /** @@ -46,6 +52,48 @@ S split(const StringPiece& source, char delimiter) { return S(source, delimiter); } +/* + * Joins a sequence of tokens into a string, with the chosen delimiter. + * + * E.G. + * fbstring result = split("a,b,c", ",") | unsplit(","); + * assert(result == "a,b,c"); + * + * std::string result = split("a,b,c", ",") | unsplit(" "); + * assert(result == "a b c"); + */ + + +// NOTE: The template arguments are reversed to allow the user to cleanly +// specify the output type while still inferring the type of the delimiter. +template> +Unsplit unsplit(const Delimiter& delimiter) { + return Unsplit(delimiter); +} + +/* + * Joins a sequence of tokens into a string, appending them to the output + * buffer. If the output buffer is empty, an initial delimiter will not be + * inserted at the start. + * + * E.G. + * std::string buffer; + * split("a,b,c", ",") | unsplit(",", &buffer); + * assert(buffer == "a,b,c"); + * + * std::string anotherBuffer("initial"); + * split("a,b,c", ",") | unsplit(",", &anotherbuffer); + * assert(anotherBuffer == "initial,a,b,c"); + */ +template> +UnsplitBuffer unsplit(const Delimiter& delimiter, OutputBuffer* outputBuffer) { + return UnsplitBuffer(delimiter, outputBuffer); +} + } // namespace gen } // namespace folly diff --git a/folly/experimental/test/GenBenchmark.cpp b/folly/experimental/test/GenBenchmark.cpp index 956fb756..d9949117 100644 --- a/folly/experimental/test/GenBenchmark.cpp +++ b/folly/experimental/test/GenBenchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2013 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,6 +39,12 @@ static vector testVector = seq(1, testSize.load()) | mapped([](int) { return rand(); }) | as(); + +static vector testStrVector = + seq(1, testSize.load()) + | eachTo() + | as(); + static vector> testVectorVector = seq(1, 100) | map([](int i) { @@ -386,6 +392,76 @@ BENCHMARK_RELATIVE(StringSplit_Gen_Take, iters) { BENCHMARK_DRAW_LINE() +BENCHMARK(StringUnsplit_Old, iters) { + size_t s = 0; + while (iters--) { + fbstring joined; + join(',', testStrVector, joined); + s += joined.size(); + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_RELATIVE(StringUnsplit_Old_ReusedBuffer, iters) { + size_t s = 0; + fbstring joined; + while (iters--) { + joined.clear(); + join(',', testStrVector, joined); + s += joined.size(); + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_RELATIVE(StringUnsplit_Gen, iters) { + size_t s = 0; + StringPiece line(kLine); + while (iters--) { + fbstring joined = from(testStrVector) | unsplit(','); + s += joined.size(); + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_RELATIVE(StringUnsplit_Gen_ReusedBuffer, iters) { + size_t s = 0; + fbstring buffer; + while (iters--) { + buffer.clear(); + from(testStrVector) | unsplit(',', &buffer); + s += buffer.size(); + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_DRAW_LINE() + +void StringUnsplit_Gen(size_t iters, size_t joinSize) { + std::vector v; + BENCHMARK_SUSPEND { + FOR_EACH_RANGE(i, 0, joinSize) { + v.push_back(to(rand())); + } + } + size_t s = 0; + fbstring buffer; + while (iters--) { + buffer.clear(); + from(v) | unsplit(',', &buffer); + s += buffer.size(); + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_DRAW_LINE() + +BENCHMARK_PARAM(StringUnsplit_Gen, 1000) +BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 2000) +BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 4000) +BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 8000) + +BENCHMARK_DRAW_LINE() + BENCHMARK(ByLine_Pipes, iters) { std::thread thread; int rfd; @@ -424,43 +500,54 @@ BENCHMARK(ByLine_Pipes, iters) { // ============================================================================ // folly/experimental/test/GenBenchmark.cpp relative time/iter iters/s // ============================================================================ -// Sum_Basic_NoGen 293.77ns 3.40M -// Sum_Basic_Gen 100.24% 293.08ns 3.41M +// Sum_Basic_NoGen 354.70ns 2.82M +// Sum_Basic_Gen 95.88% 369.92ns 2.70M +// ---------------------------------------------------------------------------- +// Sum_Vector_NoGen 211.89ns 4.72M +// Sum_Vector_Gen 97.49% 217.35ns 4.60M +// ---------------------------------------------------------------------------- +// Count_Vector_NoGen 13.93us 71.78K +// Count_Vector_Gen 106.38% 13.10us 76.36K +// ---------------------------------------------------------------------------- +// Fib_Sum_NoGen 4.54us 220.07K +// Fib_Sum_Gen 45.81% 9.92us 100.82K +// Fib_Sum_Gen_Static 100.00% 4.54us 220.05K // ---------------------------------------------------------------------------- -// Sum_Vector_NoGen 199.09ns 5.02M -// Sum_Vector_Gen 98.57% 201.98ns 4.95M +// VirtualGen_0Virtual 12.03us 83.14K +// VirtualGen_1Virtual 32.89% 36.57us 27.34K +// VirtualGen_2Virtual 24.98% 48.15us 20.77K +// VirtualGen_3Virtual 17.82% 67.49us 14.82K // ---------------------------------------------------------------------------- -// Count_Vector_NoGen 12.40us 80.66K -// Count_Vector_Gen 103.07% 12.03us 83.13K +// Concat_NoGen 1.92us 520.46K +// Concat_Gen 102.79% 1.87us 534.97K // ---------------------------------------------------------------------------- -// Fib_Sum_NoGen 3.65us 274.29K -// Fib_Sum_Gen 41.95% 8.69us 115.06K -// Fib_Sum_Gen_Static 86.10% 4.23us 236.15K +// Composed_NoGen 545.64ns 1.83M +// Composed_Gen 99.65% 547.55ns 1.83M +// Composed_GenRegular 99.64% 547.62ns 1.83M // ---------------------------------------------------------------------------- -// VirtualGen_0Virtual 10.10us 99.03K -// VirtualGen_1Virtual 29.67% 34.04us 29.38K -// VirtualGen_2Virtual 20.53% 49.19us 20.33K -// VirtualGen_3Virtual 15.22% 66.36us 15.07K +// StringResplitter_Big 120.88us 8.27K +// StringResplitter_Small 14.39% 839.94us 1.19K // ---------------------------------------------------------------------------- -// Concat_NoGen 2.33us 428.35K -// Concat_Gen 85.36% 2.74us 365.62K +// StringSplit_Old 421.09ns 2.37M +// StringSplit_Gen_Vector 97.73% 430.87ns 2.32M // ---------------------------------------------------------------------------- -// Composed_NoGen 552.78ns 1.81M -// Composed_Gen 100.48% 550.14ns 1.82M -// Composed_GenRegular 100.60% 549.50ns 1.82M +// StringSplit_Old_ReuseVector 80.25ns 12.46M +// StringSplit_Gen_ReuseVector 98.99% 81.07ns 12.34M +// StringSplit_Gen 117.23% 68.45ns 14.61M +// StringSplit_Gen_Take 115.23% 69.64ns 14.36M // ---------------------------------------------------------------------------- -// StringResplitter_Big 118.40us 8.45K -// StringResplitter_Small 12.96% 913.23us 1.10K +// StringUnsplit_Old 34.45us 29.02K +// StringUnsplit_Old_ReusedBuffer 100.37% 34.33us 29.13K +// StringUnsplit_Gen 106.27% 32.42us 30.84K +// StringUnsplit_Gen_ReusedBuffer 105.61% 32.62us 30.65K // ---------------------------------------------------------------------------- -// StringSplit_Old 567.61ns 1.76M -// StringSplit_Gen_Vector 146.52% 387.41ns 2.58M // ---------------------------------------------------------------------------- -// StringSplit_Old_ReuseVector 74.90ns 13.35M -// StringSplit_Gen_ReuseVector 112.29% 66.71ns 14.99M -// StringSplit_Gen 122.42% 61.18ns 16.34M -// StringSplit_Gen_Take 134.49% 55.70ns 17.95M +// StringUnsplit_Gen(1000) 32.20us 31.06K +// StringUnsplit_Gen(2000) 49.41% 65.17us 15.34K +// StringUnsplit_Gen(4000) 22.75% 141.52us 7.07K +// StringUnsplit_Gen(8000) 11.20% 287.53us 3.48K // ---------------------------------------------------------------------------- -// ByLine_Pipes 131.18ns 7.62M +// ByLine_Pipes 126.58ns 7.90M // ============================================================================ int main(int argc, char *argv[]) { diff --git a/folly/experimental/test/GenTest.cpp b/folly/experimental/test/GenTest.cpp index a4d1a77e..355cd41d 100644 --- a/folly/experimental/test/GenTest.cpp +++ b/folly/experimental/test/GenTest.cpp @@ -719,6 +719,41 @@ TEST(StringGen, Resplit) { } } +template +void runUnsplitSuite(F fn) { + fn("hello, world"); + fn("hello,world,goodbye"); + fn(" "); + fn(""); + fn(", "); + fn(", a, b,c"); +} + +TEST(StringGen, Unsplit) { + + auto basicFn = [](const StringPiece& s) { + EXPECT_EQ(split(s, ',') | unsplit(','), s); + }; + + auto existingBuffer = [](const StringPiece& s) { + folly::fbstring buffer("asdf"); + split(s, ',') | unsplit(',', &buffer); + auto expected = folly::to( + "asdf", s.empty() ? "" : ",", s); + EXPECT_EQ(buffer, expected); + }; + + auto emptyBuffer = [](const StringPiece& s) { + std::string buffer; + split(s, ',') | unsplit(',', &buffer); + EXPECT_EQ(buffer, s); + }; + + runUnsplitSuite(basicFn); + runUnsplitSuite(existingBuffer); + runUnsplitSuite(emptyBuffer); +} + TEST(FileGen, ByLine) { auto collect = eachTo() | as(); test::TemporaryFile file("ByLine"); -- 2.34.1