From db54a9093e24d174bd8ec5c36e0ad9da04514e6c Mon Sep 17 00:00:00 2001 From: Tom Jackson Date: Tue, 28 May 2013 18:17:37 -0700 Subject: [PATCH] eachToTuple() Summary: For parsing records. `eachToPair` might be a worthwhile specialization. Test Plan: Unit tests, benchmarks maybe? Reviewed By: antoine@fb.com FB internal diff: D827441 --- folly/experimental/Gen.h | 9 ++ folly/experimental/StringGen-inl.h | 42 ++++++ folly/experimental/StringGen.h | 43 +++++- folly/experimental/test/GenBenchmark.cpp | 161 +++++++++++++++++------ folly/experimental/test/GenTest.cpp | 99 ++++++++++++++ 5 files changed, 314 insertions(+), 40 deletions(-) diff --git a/folly/experimental/Gen.h b/folly/experimental/Gen.h index 264ebf66..47fab197 100644 --- a/folly/experimental/Gen.h +++ b/folly/experimental/Gen.h @@ -220,6 +220,15 @@ class To { } }; +// Specialization to allow String->StringPiece conversion +template <> +class To { + public: + StringPiece operator()(StringPiece src) const { + return src; + } +}; + namespace detail { template diff --git a/folly/experimental/StringGen-inl.h b/folly/experimental/StringGen-inl.h index 8ddab93a..a763dda0 100644 --- a/folly/experimental/StringGen-inl.h +++ b/folly/experimental/StringGen-inl.h @@ -216,6 +216,48 @@ class UnsplitBuffer : public Operator> { }; +/** + * Hack for static for-like constructs + */ +template +inline Target passthrough(Target target) { return target; } + +/** + * ParseToTuple - For splitting a record and immediatlely converting it to a + * target tuple type. Primary used through the 'eachToTuple' helper, like so: + * + * auto config + * = split("1:a 2:b", ' ') + * | eachToTuple() + * | as>>(); + * + */ +template +class SplitTo { + Delimiter delimiter_; + public: + explicit SplitTo(Delimiter delimiter) + : delimiter_(delimiter) {} + + TargetContainer operator()(StringPiece line) const { + int i = 0; + StringPiece fields[sizeof...(Targets)]; + // HACK(tjackson): Used for referencing fields[] corresponding to variadic + // template parameters. + auto eatField = [&]() -> StringPiece& { return fields[i++]; }; + if (!split(delimiter_, + line, + detail::passthrough(eatField())...)) { + throw std::runtime_error("field count mismatch"); + } + i = 0; + return TargetContainer(To()(eatField())...); + } +}; + } // namespace detail + } // namespace gen } // namespace folly diff --git a/folly/experimental/StringGen.h b/folly/experimental/StringGen.h index 14e5115d..e4b02d9a 100644 --- a/folly/experimental/StringGen.h +++ b/folly/experimental/StringGen.h @@ -18,6 +18,7 @@ #define FOLLY_STRINGGEN_H_ #include "folly/Range.h" +#include "folly/experimental/Gen.h" namespace folly { namespace gen { @@ -31,6 +32,12 @@ class Unsplit; template class UnsplitBuffer; + +template +class SplitTo; + } // namespace detail /** @@ -96,7 +103,7 @@ Unsplit unsplit(const char* delimiter) { template> -UnsplitBuffer unsplit(const Delimiter& delimiter, OutputBuffer* outputBuffer) { +UnsplitBuffer unsplit(Delimiter delimiter, OutputBuffer* outputBuffer) { return UnsplitBuffer(delimiter, outputBuffer); } @@ -106,6 +113,40 @@ UnsplitBuffer unsplit(const char* delimiter, OutputBuffer* outputBuffer) { return UnsplitBuffer(delimiter, outputBuffer); } + +template +detail::Map, char, Targets...>> +eachToTuple(char delim) { + return detail::Map< + detail::SplitTo, char, Targets...>>( + detail::SplitTo, char, Targets...>(delim)); +} + +template +detail::Map, fbstring, Targets...>> +eachToTuple(StringPiece delim) { + return detail::Map< + detail::SplitTo, fbstring, Targets...>>( + detail::SplitTo, fbstring, Targets...>(delim)); +} + +template +detail::Map, char, First, Second>> +eachToPair(char delim) { + return detail::Map< + detail::SplitTo, char, First, Second>>( + detail::SplitTo, char, First, Second>(delim)); +} + +template +detail::Map, fbstring, First, Second>> +eachToPair(StringPiece delim) { + return detail::Map< + detail::SplitTo, fbstring, First, Second>>( + detail::SplitTo, fbstring, First, Second>( + to(delim))); +} + } // namespace gen } // namespace folly diff --git a/folly/experimental/test/GenBenchmark.cpp b/folly/experimental/test/GenBenchmark.cpp index 20516a21..2b03ca0f 100644 --- a/folly/experimental/test/GenBenchmark.cpp +++ b/folly/experimental/test/GenBenchmark.cpp @@ -490,8 +490,6 @@ void StringUnsplit_Gen(size_t iters, size_t joinSize) { folly::doNotOptimizeAway(s); } -BENCHMARK_DRAW_LINE() - BENCHMARK_PARAM(StringUnsplit_Gen, 1000) BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 2000) BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 4000) @@ -499,6 +497,84 @@ BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 8000) BENCHMARK_DRAW_LINE() +fbstring records += seq(1, 1000) + | mapped([](size_t i) { + return folly::to(i, ' ', i * i, ' ', i * i * i); + }) + | unsplit('\n'); + +BENCHMARK(Records_EachToTuple, iters) { + size_t s = 0; + for (size_t i = 0; i < iters; i += 1000) { + s += split(records, '\n') + | eachToTuple(' ') + | get<1>() + | sum; + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_RELATIVE(Records_VectorStringPieceReused, iters) { + size_t s = 0; + std::vector fields; + for (size_t i = 0; i < iters; i += 1000) { + s += split(records, '\n') + | mapped([&](StringPiece line) { + fields.clear(); + folly::split(' ', line, fields); + CHECK(fields.size() == 3); + return std::make_tuple( + folly::to(fields[0]), + folly::to(fields[1]), + StringPiece(fields[2])); + }) + | get<1>() + | sum; + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_RELATIVE(Records_VectorStringPiece, iters) { + size_t s = 0; + for (size_t i = 0; i < iters; i += 1000) { + s += split(records, '\n') + | mapped([](StringPiece line) { + std::vector fields; + folly::split(' ', line, fields); + CHECK(fields.size() == 3); + return std::make_tuple( + folly::to(fields[0]), + folly::to(fields[1]), + StringPiece(fields[2])); + }) + | get<1>() + | sum; + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_RELATIVE(Records_VectorString, iters) { + size_t s = 0; + for (size_t i = 0; i < iters; i += 1000) { + s += split(records, '\n') + | mapped([](StringPiece line) { + std::vector fields; + folly::split(' ', line, fields); + CHECK(fields.size() == 3); + return std::make_tuple( + folly::to(fields[0]), + folly::to(fields[1]), + StringPiece(fields[2])); + }) + | get<1>() + | sum; + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_DRAW_LINE() + BENCHMARK(ByLine_Pipes, iters) { std::thread thread; int rfd; @@ -532,59 +608,66 @@ BENCHMARK(ByLine_Pipes, iters) { } } -// Results from a dual core Xeon L5520 @ 2.27GHz: -// // ============================================================================ // folly/experimental/test/GenBenchmark.cpp relative time/iter iters/s // ============================================================================ -// Sum_Basic_NoGen 354.70ns 2.82M -// Sum_Basic_Gen 95.88% 369.92ns 2.70M +// Sum_Basic_NoGen 374.39ns 2.67M +// Sum_Basic_Gen 101.05% 370.48ns 2.70M +// ---------------------------------------------------------------------------- +// Sum_Vector_NoGen 198.84ns 5.03M +// Sum_Vector_Gen 98.14% 202.60ns 4.94M +// ---------------------------------------------------------------------------- +// Member 4.56us 219.11K +// MapMember 400.21% 1.14us 876.89K // ---------------------------------------------------------------------------- -// Sum_Vector_NoGen 211.89ns 4.72M -// Sum_Vector_Gen 97.49% 217.35ns 4.60M +// Count_Vector_NoGen 13.99us 71.47K +// Count_Vector_Gen 106.73% 13.11us 76.28K // ---------------------------------------------------------------------------- -// Count_Vector_NoGen 13.93us 71.78K -// Count_Vector_Gen 106.38% 13.10us 76.36K +// Fib_Sum_NoGen 4.27us 234.07K +// Fib_Sum_Gen 43.18% 9.90us 101.06K +// Fib_Sum_Gen_Static 92.08% 4.64us 215.53K // ---------------------------------------------------------------------------- -// Fib_Sum_NoGen 4.54us 220.07K -// Fib_Sum_Gen 45.81% 9.92us 100.82K -// Fib_Sum_Gen_Static 100.00% 4.54us 220.05K +// VirtualGen_0Virtual 12.07us 82.83K +// VirtualGen_1Virtual 32.46% 37.19us 26.89K +// VirtualGen_2Virtual 24.36% 49.55us 20.18K +// VirtualGen_3Virtual 18.16% 66.49us 15.04K // ---------------------------------------------------------------------------- -// VirtualGen_0Virtual 12.03us 83.14K -// VirtualGen_1Virtual 32.89% 36.57us 27.34K -// VirtualGen_2Virtual 24.98% 48.15us 20.77K -// VirtualGen_3Virtual 17.82% 67.49us 14.82K +// Concat_NoGen 1.90us 527.40K +// Concat_Gen 86.73% 2.19us 457.39K // ---------------------------------------------------------------------------- -// Concat_NoGen 1.92us 520.46K -// Concat_Gen 102.79% 1.87us 534.97K +// Composed_NoGen 546.18ns 1.83M +// Composed_Gen 100.41% 543.93ns 1.84M +// Composed_GenRegular 100.42% 543.92ns 1.84M // ---------------------------------------------------------------------------- -// Composed_NoGen 545.64ns 1.83M -// Composed_Gen 99.65% 547.55ns 1.83M -// Composed_GenRegular 99.64% 547.62ns 1.83M +// Sample 146.68ms 6.82 // ---------------------------------------------------------------------------- -// StringResplitter_Big 120.88us 8.27K -// StringResplitter_Small 14.39% 839.94us 1.19K +// StringResplitter_Big 124.80us 8.01K +// StringResplitter_Small 15.11% 825.74us 1.21K // ---------------------------------------------------------------------------- -// StringSplit_Old 421.09ns 2.37M -// StringSplit_Gen_Vector 97.73% 430.87ns 2.32M +// StringSplit_Old 393.49ns 2.54M +// StringSplit_Gen_Vector 121.47% 323.93ns 3.09M // ---------------------------------------------------------------------------- -// StringSplit_Old_ReuseVector 80.25ns 12.46M -// StringSplit_Gen_ReuseVector 98.99% 81.07ns 12.34M -// StringSplit_Gen 117.23% 68.45ns 14.61M -// StringSplit_Gen_Take 115.23% 69.64ns 14.36M +// StringSplit_Old_ReuseVector 80.77ns 12.38M +// StringSplit_Gen_ReuseVector 102.02% 79.17ns 12.63M +// StringSplit_Gen 123.78% 65.25ns 15.32M +// StringSplit_Gen_Take 123.44% 65.43ns 15.28M // ---------------------------------------------------------------------------- -// StringUnsplit_Old 34.45us 29.02K -// StringUnsplit_Old_ReusedBuffer 100.37% 34.33us 29.13K -// StringUnsplit_Gen 106.27% 32.42us 30.84K -// StringUnsplit_Gen_ReusedBuffer 105.61% 32.62us 30.65K +// StringUnsplit_Old 29.36us 34.06K +// StringUnsplit_Old_ReusedBuffer 100.25% 29.29us 34.14K +// StringUnsplit_Gen 103.38% 28.40us 35.21K +// StringUnsplit_Gen_ReusedBuffer 109.85% 26.73us 37.41K // ---------------------------------------------------------------------------- +// StringUnsplit_Gen(1000) 32.30us 30.96K +// StringUnsplit_Gen(2000) 49.75% 64.93us 15.40K +// StringUnsplit_Gen(4000) 24.74% 130.60us 7.66K +// StringUnsplit_Gen(8000) 12.31% 262.35us 3.81K // ---------------------------------------------------------------------------- -// StringUnsplit_Gen(1000) 32.20us 31.06K -// StringUnsplit_Gen(2000) 49.41% 65.17us 15.34K -// StringUnsplit_Gen(4000) 22.75% 141.52us 7.07K -// StringUnsplit_Gen(8000) 11.20% 287.53us 3.48K +// Records_EachToTuple 75.03ns 13.33M +// Records_VectorStringPieceReused 81.79% 91.74ns 10.90M +// Records_VectorStringPiece 36.47% 205.77ns 4.86M +// Records_VectorString 12.90% 581.70ns 1.72M // ---------------------------------------------------------------------------- -// ByLine_Pipes 126.58ns 7.90M +// ByLine_Pipes 121.68ns 8.22M // ============================================================================ int main(int argc, char *argv[]) { diff --git a/folly/experimental/test/GenTest.cpp b/folly/experimental/test/GenTest.cpp index d732e97e..ddb7bf8a 100644 --- a/folly/experimental/test/GenTest.cpp +++ b/folly/experimental/test/GenTest.cpp @@ -1001,6 +1001,105 @@ TEST(StringGen, EmptyResplit) { } } +TEST(StringGen, EachToTuple) { + { + auto lines = "2:1.414:yo 3:1.732:hi"; + auto actual + = split(lines, ' ') + | eachToTuple(':') + | as(); + vector> expected { + make_tuple(2, 1.414, "yo"), + make_tuple(3, 1.732, "hi"), + }; + EXPECT_EQ(expected, actual); + } + { + auto lines = "2 3"; + auto actual + = split(lines, ' ') + | eachToTuple(',') + | as(); + vector> expected { + make_tuple(2), + make_tuple(3), + }; + EXPECT_EQ(expected, actual); + } + { + // StringPiece target + auto lines = "1:cat 2:dog"; + auto actual + = split(lines, ' ') + | eachToTuple(':') + | as(); + vector> expected { + make_tuple(1, "cat"), + make_tuple(2, "dog"), + }; + EXPECT_EQ(expected, actual); + } + { + // Empty field + auto lines = "2:tjackson:4 3::5"; + auto actual + = split(lines, ' ') + | eachToTuple(':') + | as(); + vector> expected { + make_tuple(2, "tjackson", 4), + make_tuple(3, "", 5), + }; + EXPECT_EQ(expected, actual); + } + { + // Excess fields + auto lines = "1:2 3:4:5"; + EXPECT_THROW((split(lines, ' ') + | eachToTuple(':') + | as()), + std::runtime_error); + } + { + // Missing fields + auto lines = "1:2:3 4:5"; + EXPECT_THROW((split(lines, ' ') + | eachToTuple(':') + | as()), + std::runtime_error); + } +} + +TEST(StringGen, EachToPair) { + { + // char delimiters + auto lines = "2:1.414 3:1.732"; + auto actual + = split(lines, ' ') + | eachToPair(':') + | as>(); + std::map expected { + { 3, 1.732 }, + { 2, 1.414 }, + }; + EXPECT_EQ(expected, actual); + } + { + // string delimiters + auto lines = "ab=>cd ef=>gh"; + auto actual + = split(lines, ' ') + | eachToPair("=>") + | as>(); + std::map expected { + { "ab", "cd" }, + { "ef", "gh" }, + }; + EXPECT_EQ(expected, actual); + } +} + + TEST(StringGen, Resplit) { auto collect = eachTo() | as(); { -- 2.34.1