Fix copyright lines
[folly.git] / folly / experimental / flat_combining / test / FlatCombiningBenchmark.cpp
1 /*
2  * Copyright 2017-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <folly/Benchmark.h>
18 #include <folly/experimental/flat_combining/test/FlatCombiningTestHelpers.h>
19
20 #include <folly/portability/GTest.h>
21 #include <glog/logging.h>
22
23 using namespace folly::test;
24
25 // use option --benchmark to run folly::Benchmark
26 // use option --direct to run direct benchmark measurements
27 DEFINE_bool(direct, false, "run direct measurement");
28 DEFINE_int32(reps, 10, "number of reps");
29 DEFINE_int32(ops, 100000, "number of operations per rep");
30 DEFINE_int32(lines, 5, "number of cache lines accessed per operation");
31 DEFINE_int32(numRecs, 8, "number of records");
32 DEFINE_int32(work, 1000, "amount of unrelated work per operation");
33
34 static std::vector<int> nthr = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64};
35 static int nthreads;
36 static bool fc;
37 static bool simple;
38 static bool dedicated;
39 static bool tc;
40 static bool syncops;
41
42 // baseline - no combining
43 BENCHMARK(no_combining_base, iters) {
44   fc = false;
45   dedicated = false;
46   tc = false;
47   syncops = false;
48   run_test(
49       nthreads,
50       FLAGS_lines,
51       FLAGS_numRecs,
52       FLAGS_work,
53       iters,
54       fc,
55       simple,
56       dedicated,
57       tc,
58       syncops);
59 }
60
61 BENCHMARK_RELATIVE(no_combining_dup, iters) {
62   run_test(
63       nthreads,
64       FLAGS_lines,
65       FLAGS_numRecs,
66       FLAGS_work,
67       iters,
68       fc,
69       simple,
70       dedicated,
71       tc,
72       syncops);
73 }
74 BENCHMARK_DRAW_LINE()
75
76 // dedicated combiner
77
78 BENCHMARK_DRAW_LINE()
79
80 BENCHMARK_RELATIVE(combining_dedicated_notc_sync, iters) {
81   fc = true;
82   dedicated = true;
83   tc = false;
84   syncops = true;
85   run_test(
86       nthreads,
87       FLAGS_lines,
88       FLAGS_numRecs,
89       FLAGS_work,
90       iters,
91       fc,
92       simple,
93       dedicated,
94       tc,
95       syncops);
96 }
97
98 BENCHMARK_RELATIVE(combining_dedicated_notc_sync_dup, iters) {
99   run_test(
100       nthreads,
101       FLAGS_lines,
102       FLAGS_numRecs,
103       FLAGS_work,
104       iters,
105       fc,
106       simple,
107       dedicated,
108       tc,
109       syncops);
110 }
111
112 BENCHMARK_DRAW_LINE()
113
114 BENCHMARK_RELATIVE(combining_dedicated_notc_async, iters) {
115   syncops = false;
116   run_test(
117       nthreads,
118       FLAGS_lines,
119       FLAGS_numRecs,
120       FLAGS_work,
121       iters,
122       fc,
123       simple,
124       dedicated,
125       tc,
126       syncops);
127 }
128
129 BENCHMARK_RELATIVE(combining_dedicated_notc_async_dup, iters) {
130   run_test(
131       nthreads,
132       FLAGS_lines,
133       FLAGS_numRecs,
134       FLAGS_work,
135       iters,
136       fc,
137       simple,
138       dedicated,
139       tc,
140       syncops);
141 }
142
143 BENCHMARK_DRAW_LINE()
144
145 BENCHMARK_RELATIVE(combining_dedicated_tc_sync, iters) {
146   tc = true;
147   syncops = true;
148   run_test(
149       nthreads,
150       FLAGS_lines,
151       FLAGS_numRecs,
152       FLAGS_work,
153       iters,
154       fc,
155       simple,
156       dedicated,
157       tc,
158       syncops);
159 }
160
161 BENCHMARK_RELATIVE(combining_dedicated_tc_sync_dup, iters) {
162   run_test(
163       nthreads,
164       FLAGS_lines,
165       FLAGS_numRecs,
166       FLAGS_work,
167       iters,
168       fc,
169       simple,
170       dedicated,
171       tc,
172       syncops);
173 }
174
175 BENCHMARK_DRAW_LINE()
176
177 BENCHMARK_RELATIVE(combining_dedicated_tc_async, iters) {
178   tc = true;
179   syncops = false;
180   run_test(
181       nthreads,
182       FLAGS_lines,
183       FLAGS_numRecs,
184       FLAGS_work,
185       iters,
186       fc,
187       simple,
188       dedicated,
189       tc,
190       syncops);
191 }
192
193 BENCHMARK_RELATIVE(combining_dedicated_tc_async_dup, iters) {
194   run_test(
195       nthreads,
196       FLAGS_lines,
197       FLAGS_numRecs,
198       FLAGS_work,
199       iters,
200       fc,
201       simple,
202       dedicated,
203       tc,
204       syncops);
205 }
206
207 BENCHMARK_DRAW_LINE()
208
209 // no dedicated combiner
210
211 BENCHMARK_DRAW_LINE()
212
213 BENCHMARK_RELATIVE(combining_no_dedicated_notc_sync, iters) {
214   dedicated = false;
215   tc = false;
216   syncops = true;
217   run_test(
218       nthreads,
219       FLAGS_lines,
220       FLAGS_numRecs,
221       FLAGS_work,
222       iters,
223       fc,
224       simple,
225       dedicated,
226       tc,
227       syncops);
228 }
229
230 BENCHMARK_RELATIVE(combining_no_dedicated_notc_sync_dup, iters) {
231   run_test(
232       nthreads,
233       FLAGS_lines,
234       FLAGS_numRecs,
235       FLAGS_work,
236       iters,
237       fc,
238       simple,
239       dedicated,
240       tc,
241       syncops);
242 }
243
244 BENCHMARK_DRAW_LINE()
245
246 BENCHMARK_RELATIVE(combining_no_dedicated_notc_async, iters) {
247   syncops = false;
248   run_test(
249       nthreads,
250       FLAGS_lines,
251       FLAGS_numRecs,
252       FLAGS_work,
253       iters,
254       fc,
255       simple,
256       dedicated,
257       tc,
258       syncops);
259 }
260
261 BENCHMARK_RELATIVE(combining_no_dedicated_notc_async_dup, iters) {
262   run_test(
263       nthreads,
264       FLAGS_lines,
265       FLAGS_numRecs,
266       FLAGS_work,
267       iters,
268       fc,
269       simple,
270       dedicated,
271       tc,
272       syncops);
273 }
274
275 BENCHMARK_DRAW_LINE()
276
277 BENCHMARK_RELATIVE(combining_no_dedicated_tc_sync, iters) {
278   tc = true;
279   syncops = true;
280   run_test(
281       nthreads,
282       FLAGS_lines,
283       FLAGS_numRecs,
284       FLAGS_work,
285       iters,
286       fc,
287       simple,
288       dedicated,
289       tc,
290       syncops);
291 }
292
293 BENCHMARK_RELATIVE(combining_no_dedicated_tc_sync_dup, iters) {
294   run_test(
295       nthreads,
296       FLAGS_lines,
297       FLAGS_numRecs,
298       FLAGS_work,
299       iters,
300       fc,
301       simple,
302       dedicated,
303       tc,
304       syncops);
305 }
306
307 BENCHMARK_DRAW_LINE()
308
309 BENCHMARK_RELATIVE(combining_no_dedicated_tc_async, iters) {
310   tc = true;
311   syncops = false;
312   run_test(
313       nthreads,
314       FLAGS_lines,
315       FLAGS_numRecs,
316       FLAGS_work,
317       iters,
318       fc,
319       simple,
320       dedicated,
321       tc,
322       syncops);
323 }
324
325 BENCHMARK_RELATIVE(combining_no_dedicated_tc_async_dup, iters) {
326   run_test(
327       nthreads,
328       FLAGS_lines,
329       FLAGS_numRecs,
330       FLAGS_work,
331       iters,
332       fc,
333       simple,
334       dedicated,
335       tc,
336       syncops);
337 }
338
339 BENCHMARK_DRAW_LINE()
340
341 void benchmarkSetup() {
342   int numCores = std::thread::hardware_concurrency();
343   std::cout << "\nRunning benchmarks on machine with " << numCores
344             << " logical cores" << std::endl;
345 }
346
347 TEST(FlatCombining, folly_benchmark) {
348   if (FLAGS_benchmark) {
349     benchmarkSetup();
350     for (bool b : {true, false}) {
351       simple = b;
352       std::string str = simple ? "simple" : "custom";
353       std::cout << "\n------------------------------------ " << str
354                 << " interface" << std::endl;
355       for (int i : nthr) {
356         std::cout << "\n---------------------------------- Number of threads = "
357                   << i << std::endl;
358         nthreads = i;
359         folly::runBenchmarks();
360       }
361     }
362   }
363 }
364
365 // Direct measurement - not using folly::Benchmark
366
367 static uint64_t test(
368     std::string name,
369     bool fc,
370     bool dedicated,
371     bool tc,
372     bool syncops,
373     uint64_t base) {
374   uint64_t min = UINTMAX_MAX;
375   uint64_t max = 0;
376   uint64_t sum = 0;
377
378   for (int i = 0; i < FLAGS_reps; ++i) {
379     uint64_t dur = run_test(
380         nthreads,
381         FLAGS_lines,
382         FLAGS_numRecs,
383         FLAGS_work,
384         FLAGS_ops,
385         fc,
386         simple,
387         dedicated,
388         tc,
389         syncops);
390     sum += dur;
391     min = std::min(min, dur);
392     max = std::max(max, dur);
393   }
394   uint64_t avg = sum / FLAGS_reps;
395
396   uint64_t res = min;
397   std::cout << name;
398   std::cout << "   " << std::setw(4) << max / FLAGS_ops << " ns";
399   std::cout << "   " << std::setw(4) << avg / FLAGS_ops << " ns";
400   std::cout << "   " << std::setw(4) << res / FLAGS_ops << " ns";
401   if (base) {
402     std::cout << " " << std::setw(3) << 100 * base / res << "%";
403   }
404   std::cout << std::endl;
405   return res;
406 }
407
408 TEST(FlatCombining, direct_measurement) {
409   if (!FLAGS_direct) {
410     return;
411   }
412   benchmarkSetup();
413   simple = false;
414   std::string str = simple ? "simple" : "custom";
415   std::cout << "\n------------------------------------ " << str << " interface"
416             << std::endl;
417   for (int i : nthr) {
418     nthreads = i;
419     std::cout << "\n------------------------------------ Number of threads = "
420               << i << "\n"
421               << std::endl;
422     std::cout << "Test_name, Max time, Avg time, Min time, % base min / min\n"
423               << std::endl;
424
425     uint64_t base =
426     test("no_combining - base         ", false, false, false, false, 0);
427     test("no_combining - dup          ", false, false, false, false, base);
428     std::cout << "---------------------------------------" << std::endl;
429
430     std::cout << "---- dedicated-------------------------" << std::endl;
431     test("combining_notc_sync         ", true, true, false, true, base);
432     test("combining_notc_sync - dup   ", true, true, false, true, base);
433     std::cout << "---------------------------------------" << std::endl;
434     test("combining_notc_async        ", true, true, false, false, base);
435     test("combining_notc_async - dup  ", true, true, false, false, base);
436     std::cout << "---------------------------------------" << std::endl;
437     test("combining_tc_sync           ", true, true, true, true, base);
438     test("combining_tc_sync - dup     ", true, true, true, true, base);
439     std::cout << "---------------------------------------" << std::endl;
440     test("combining_tc_async          ", true, true, true, false, base);
441     test("combining_tc_async - dup    ", true, true, true, false, base);
442     std::cout << "---------------------------------------" << std::endl;
443
444     std::cout << "---- no dedicated----------------------" << std::endl;
445     test("combining_notc_sync         ", true, false, false, true, base);
446     test("combining_notc_sync - dup   ", true, false, false, true, base);
447     std::cout << "---------------------------------------" << std::endl;
448     test("combining_notc_async        ", true, false, false, false, base);
449     test("combining_notc_async - dup  ", true, false, false, false, base);
450     std::cout << "---------------------------------------" << std::endl;
451     test("combining_tc_sync           ", true, false, true, true, base);
452     test("combining_tc_sync - dup     ", true, false, true, true, base);
453     std::cout << "---------------------------------------" << std::endl;
454     test("combining_tc_async          ", true, false, true, false, base);
455     test("combining_tc_async - dup    ", true, false, true, false, base);
456     std::cout << "---------------------------------------" << std::endl;
457   }
458 }
459
460 /*
461 See benchmark results in https://phabricator.intern.facebook.com/P57204895
462
463 The results are from a run using the command
464 $ numactl -N 1 flat_combining_benchmark --benchmark --bm_min_iters=100000 --direct
465
466 Using the default parameters of the benchmark: In each iteration, the
467 operation on the shared data structure updates 5 cache lines and
468 performs unrelated work (~300ns) after each operation. The benchmark
469 doesn't do any smart combining (i.e., saving or dropping some work
470 based on understanding the details of the combined operations).
471
472 Direct measurements are used to evaluate the high variance in some cases.
473 Duplicate runs are included in order to assess the relevance of outliers.
474
475 ----
476 [==========] Running 2 tests from 1 test case.
477 [----------] Global test environment set-up.
478 [----------] 2 tests from FlatCombining
479 [ RUN      ] FlatCombining.folly_benchmark
480
481 Running benchmarks on machine with 32 logical cores
482
483 ------------------------------------ simple interface
484
485 ---------------------------------- Number of threads = 1
486 ============================================================================
487 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
488 ============================================================================
489 no_combining_base                                          330.43ns    3.03M
490 no_combining_dup                                 100.09%   330.13ns    3.03M
491 ----------------------------------------------------------------------------
492 ----------------------------------------------------------------------------
493 combining_dedicated_notc_sync                     93.17%   354.66ns    2.82M
494 combining_dedicated_notc_sync_dup                 93.57%   353.15ns    2.83M
495 ----------------------------------------------------------------------------
496 combining_dedicated_notc_async                    99.35%   332.60ns    3.01M
497 combining_dedicated_notc_async_dup                99.07%   333.54ns    3.00M
498 ----------------------------------------------------------------------------
499 combining_dedicated_tc_sync                       93.05%   355.13ns    2.82M
500 combining_dedicated_tc_sync_dup                   92.87%   355.81ns    2.81M
501 ----------------------------------------------------------------------------
502 combining_dedicated_tc_async                      99.17%   333.21ns    3.00M
503 combining_dedicated_tc_async_dup                  99.28%   332.84ns    3.00M
504 ----------------------------------------------------------------------------
505 ----------------------------------------------------------------------------
506 combining_no_dedicated_notc_sync                  93.51%   353.38ns    2.83M
507 combining_no_dedicated_notc_sync_dup              93.27%   354.26ns    2.82M
508 ----------------------------------------------------------------------------
509 combining_no_dedicated_notc_async                 99.40%   332.44ns    3.01M
510 combining_no_dedicated_notc_async_dup             99.13%   333.34ns    3.00M
511 ----------------------------------------------------------------------------
512 combining_no_dedicated_tc_sync                    93.38%   353.86ns    2.83M
513 combining_no_dedicated_tc_sync_dup                93.52%   353.31ns    2.83M
514 ----------------------------------------------------------------------------
515 combining_no_dedicated_tc_async                   99.29%   332.78ns    3.00M
516 combining_no_dedicated_tc_async_dup               99.19%   333.11ns    3.00M
517 ----------------------------------------------------------------------------
518 ============================================================================
519
520 ---------------------------------- Number of threads = 2
521 ============================================================================
522 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
523 ============================================================================
524 no_combining_base                                          213.60ns    4.68M
525 no_combining_dup                                 100.84%   211.82ns    4.72M
526 ----------------------------------------------------------------------------
527 ----------------------------------------------------------------------------
528 combining_dedicated_notc_sync                     89.84%   237.76ns    4.21M
529 combining_dedicated_notc_sync_dup                 89.85%   237.73ns    4.21M
530 ----------------------------------------------------------------------------
531 combining_dedicated_notc_async                    93.80%   227.72ns    4.39M
532 combining_dedicated_notc_async_dup                87.85%   243.15ns    4.11M
533 ----------------------------------------------------------------------------
534 combining_dedicated_tc_sync                       86.81%   246.06ns    4.06M
535 combining_dedicated_tc_sync_dup                   87.15%   245.09ns    4.08M
536 ----------------------------------------------------------------------------
537 combining_dedicated_tc_async                      92.14%   231.82ns    4.31M
538 combining_dedicated_tc_async_dup                  92.04%   232.08ns    4.31M
539 ----------------------------------------------------------------------------
540 ----------------------------------------------------------------------------
541 combining_no_dedicated_notc_sync                  95.20%   224.36ns    4.46M
542 combining_no_dedicated_notc_sync_dup              95.40%   223.91ns    4.47M
543 ----------------------------------------------------------------------------
544 combining_no_dedicated_notc_async                 95.41%   223.89ns    4.47M
545 combining_no_dedicated_notc_async_dup             95.86%   222.82ns    4.49M
546 ----------------------------------------------------------------------------
547 combining_no_dedicated_tc_sync                    94.43%   226.21ns    4.42M
548 combining_no_dedicated_tc_sync_dup                94.28%   226.56ns    4.41M
549 ----------------------------------------------------------------------------
550 combining_no_dedicated_tc_async                   96.62%   221.07ns    4.52M
551 combining_no_dedicated_tc_async_dup               97.24%   219.66ns    4.55M
552 ----------------------------------------------------------------------------
553 ============================================================================
554
555 ---------------------------------- Number of threads = 3
556 ============================================================================
557 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
558 ============================================================================
559 no_combining_base                                          188.20ns    5.31M
560 no_combining_dup                                  94.07%   200.07ns    5.00M
561 ----------------------------------------------------------------------------
562 ----------------------------------------------------------------------------
563 combining_dedicated_notc_sync                     95.39%   197.30ns    5.07M
564 combining_dedicated_notc_sync_dup                 94.50%   199.16ns    5.02M
565 ----------------------------------------------------------------------------
566 combining_dedicated_notc_async                    75.29%   249.96ns    4.00M
567 combining_dedicated_notc_async_dup                72.97%   257.91ns    3.88M
568 ----------------------------------------------------------------------------
569 combining_dedicated_tc_sync                       91.26%   206.22ns    4.85M
570 combining_dedicated_tc_sync_dup                   90.68%   207.54ns    4.82M
571 ----------------------------------------------------------------------------
572 combining_dedicated_tc_async                      89.64%   209.95ns    4.76M
573 combining_dedicated_tc_async_dup                  88.21%   213.36ns    4.69M
574 ----------------------------------------------------------------------------
575 ----------------------------------------------------------------------------
576 combining_no_dedicated_notc_sync                  96.19%   195.66ns    5.11M
577 combining_no_dedicated_notc_sync_dup              93.27%   201.78ns    4.96M
578 ----------------------------------------------------------------------------
579 combining_no_dedicated_notc_async                 81.12%   231.99ns    4.31M
580 combining_no_dedicated_notc_async_dup             82.48%   228.19ns    4.38M
581 ----------------------------------------------------------------------------
582 combining_no_dedicated_tc_sync                    79.48%   236.78ns    4.22M
583 combining_no_dedicated_tc_sync_dup                79.73%   236.04ns    4.24M
584 ----------------------------------------------------------------------------
585 combining_no_dedicated_tc_async                  100.70%   186.90ns    5.35M
586 combining_no_dedicated_tc_async_dup               99.43%   189.27ns    5.28M
587 ----------------------------------------------------------------------------
588 ============================================================================
589
590 ---------------------------------- Number of threads = 4
591 ============================================================================
592 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
593 ============================================================================
594 no_combining_base                                          242.84ns    4.12M
595 no_combining_dup                                 100.78%   240.96ns    4.15M
596 ----------------------------------------------------------------------------
597 ----------------------------------------------------------------------------
598 combining_dedicated_notc_sync                    100.91%   240.65ns    4.16M
599 combining_dedicated_notc_sync_dup                 99.76%   243.42ns    4.11M
600 ----------------------------------------------------------------------------
601 combining_dedicated_notc_async                   102.06%   237.95ns    4.20M
602 combining_dedicated_notc_async_dup               101.63%   238.94ns    4.19M
603 ----------------------------------------------------------------------------
604 combining_dedicated_tc_sync                      109.79%   221.18ns    4.52M
605 combining_dedicated_tc_sync_dup                  108.94%   222.92ns    4.49M
606 ----------------------------------------------------------------------------
607 combining_dedicated_tc_async                     133.01%   182.58ns    5.48M
608 combining_dedicated_tc_async_dup                 134.91%   180.00ns    5.56M
609 ----------------------------------------------------------------------------
610 ----------------------------------------------------------------------------
611 combining_no_dedicated_notc_sync                 108.77%   223.25ns    4.48M
612 combining_no_dedicated_notc_sync_dup             107.64%   225.61ns    4.43M
613 ----------------------------------------------------------------------------
614 combining_no_dedicated_notc_async                115.14%   210.91ns    4.74M
615 combining_no_dedicated_notc_async_dup            115.06%   211.05ns    4.74M
616 ----------------------------------------------------------------------------
617 combining_no_dedicated_tc_sync                   116.36%   208.70ns    4.79M
618 combining_no_dedicated_tc_sync_dup               115.70%   209.89ns    4.76M
619 ----------------------------------------------------------------------------
620 combining_no_dedicated_tc_async                  159.69%   152.07ns    6.58M
621 combining_no_dedicated_tc_async_dup              158.27%   153.43ns    6.52M
622 ----------------------------------------------------------------------------
623 ============================================================================
624
625 ---------------------------------- Number of threads = 6
626 ============================================================================
627 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
628 ============================================================================
629 no_combining_base                                          281.36ns    3.55M
630 no_combining_dup                                  98.56%   285.46ns    3.50M
631 ----------------------------------------------------------------------------
632 ----------------------------------------------------------------------------
633 combining_dedicated_notc_sync                    132.39%   212.51ns    4.71M
634 combining_dedicated_notc_sync_dup                133.10%   211.38ns    4.73M
635 ----------------------------------------------------------------------------
636 combining_dedicated_notc_async                   141.35%   199.05ns    5.02M
637 combining_dedicated_notc_async_dup               143.18%   196.51ns    5.09M
638 ----------------------------------------------------------------------------
639 combining_dedicated_tc_sync                      138.94%   202.50ns    4.94M
640 combining_dedicated_tc_sync_dup                  138.64%   202.93ns    4.93M
641 ----------------------------------------------------------------------------
642 combining_dedicated_tc_async                     199.76%   140.85ns    7.10M
643 combining_dedicated_tc_async_dup                 200.28%   140.48ns    7.12M
644 ----------------------------------------------------------------------------
645 ----------------------------------------------------------------------------
646 combining_no_dedicated_notc_sync                 155.48%   180.96ns    5.53M
647 combining_no_dedicated_notc_sync_dup             150.82%   186.55ns    5.36M
648 ----------------------------------------------------------------------------
649 combining_no_dedicated_notc_async                162.23%   173.43ns    5.77M
650 combining_no_dedicated_notc_async_dup            161.33%   174.39ns    5.73M
651 ----------------------------------------------------------------------------
652 combining_no_dedicated_tc_sync                   167.90%   167.57ns    5.97M
653 combining_no_dedicated_tc_sync_dup               164.84%   170.69ns    5.86M
654 ----------------------------------------------------------------------------
655 combining_no_dedicated_tc_async                  242.51%   116.02ns    8.62M
656 combining_no_dedicated_tc_async_dup              245.67%   114.53ns    8.73M
657 ----------------------------------------------------------------------------
658 ============================================================================
659
660 ---------------------------------- Number of threads = 8
661 ============================================================================
662 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
663 ============================================================================
664 no_combining_base                                          315.57ns    3.17M
665 no_combining_dup                                  98.83%   319.32ns    3.13M
666 ----------------------------------------------------------------------------
667 ----------------------------------------------------------------------------
668 combining_dedicated_notc_sync                    170.48%   185.11ns    5.40M
669 combining_dedicated_notc_sync_dup                174.57%   180.77ns    5.53M
670 ----------------------------------------------------------------------------
671 combining_dedicated_notc_async                   178.57%   176.72ns    5.66M
672 combining_dedicated_notc_async_dup               181.30%   174.06ns    5.75M
673 ----------------------------------------------------------------------------
674 combining_dedicated_tc_sync                      195.40%   161.50ns    6.19M
675 combining_dedicated_tc_sync_dup                  197.18%   160.05ns    6.25M
676 ----------------------------------------------------------------------------
677 combining_dedicated_tc_async                     322.03%    97.99ns   10.20M
678 combining_dedicated_tc_async_dup                 324.51%    97.24ns   10.28M
679 ----------------------------------------------------------------------------
680 ----------------------------------------------------------------------------
681 combining_no_dedicated_notc_sync                 205.61%   153.48ns    6.52M
682 combining_no_dedicated_notc_sync_dup             204.94%   153.98ns    6.49M
683 ----------------------------------------------------------------------------
684 combining_no_dedicated_notc_async                217.81%   144.88ns    6.90M
685 combining_no_dedicated_notc_async_dup            218.58%   144.37ns    6.93M
686 ----------------------------------------------------------------------------
687 combining_no_dedicated_tc_sync                   223.96%   140.91ns    7.10M
688 combining_no_dedicated_tc_sync_dup               224.55%   140.53ns    7.12M
689 ----------------------------------------------------------------------------
690 combining_no_dedicated_tc_async                  364.58%    86.56ns   11.55M
691 combining_no_dedicated_tc_async_dup              363.33%    86.86ns   11.51M
692 ----------------------------------------------------------------------------
693 ============================================================================
694
695 ---------------------------------- Number of threads = 12
696 ============================================================================
697 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
698 ============================================================================
699 no_combining_base                                          353.59ns    2.83M
700 no_combining_dup                                  99.91%   353.91ns    2.83M
701 ----------------------------------------------------------------------------
702 ----------------------------------------------------------------------------
703 combining_dedicated_notc_sync                    276.36%   127.95ns    7.82M
704 combining_dedicated_notc_sync_dup                278.88%   126.79ns    7.89M
705 ----------------------------------------------------------------------------
706 combining_dedicated_notc_async                   249.52%   141.71ns    7.06M
707 combining_dedicated_notc_async_dup               247.26%   143.00ns    6.99M
708 ----------------------------------------------------------------------------
709 combining_dedicated_tc_sync                      318.57%   110.99ns    9.01M
710 combining_dedicated_tc_sync_dup                  326.27%   108.37ns    9.23M
711 ----------------------------------------------------------------------------
712 combining_dedicated_tc_async                     428.50%    82.52ns   12.12M
713 combining_dedicated_tc_async_dup                 429.19%    82.39ns   12.14M
714 ----------------------------------------------------------------------------
715 ----------------------------------------------------------------------------
716 combining_no_dedicated_notc_sync                 276.54%   127.86ns    7.82M
717 combining_no_dedicated_notc_sync_dup             275.59%   128.31ns    7.79M
718 ----------------------------------------------------------------------------
719 combining_no_dedicated_notc_async                298.92%   118.29ns    8.45M
720 combining_no_dedicated_notc_async_dup            298.93%   118.28ns    8.45M
721 ----------------------------------------------------------------------------
722 combining_no_dedicated_tc_sync                   300.56%   117.64ns    8.50M
723 combining_no_dedicated_tc_sync_dup               296.95%   119.07ns    8.40M
724 ----------------------------------------------------------------------------
725 combining_no_dedicated_tc_async                  431.06%    82.03ns   12.19M
726 combining_no_dedicated_tc_async_dup              430.40%    82.15ns   12.17M
727 ----------------------------------------------------------------------------
728 ============================================================================
729
730 ---------------------------------- Number of threads = 16
731 ============================================================================
732 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
733 ============================================================================
734 no_combining_base                                          358.57ns    2.79M
735 no_combining_dup                                  99.97%   358.70ns    2.79M
736 ----------------------------------------------------------------------------
737 ----------------------------------------------------------------------------
738 combining_dedicated_notc_sync                    319.73%   112.15ns    8.92M
739 combining_dedicated_notc_sync_dup                327.86%   109.37ns    9.14M
740 ----------------------------------------------------------------------------
741 combining_dedicated_notc_async                   296.17%   121.07ns    8.26M
742 combining_dedicated_notc_async_dup               306.86%   116.85ns    8.56M
743 ----------------------------------------------------------------------------
744 combining_dedicated_tc_sync                      337.53%   106.24ns    9.41M
745 combining_dedicated_tc_sync_dup                  347.98%   103.04ns    9.70M
746 ----------------------------------------------------------------------------
747 combining_dedicated_tc_async                     423.80%    84.61ns   11.82M
748 combining_dedicated_tc_async_dup                 421.07%    85.16ns   11.74M
749 ----------------------------------------------------------------------------
750 ----------------------------------------------------------------------------
751 combining_no_dedicated_notc_sync                 321.94%   111.38ns    8.98M
752 combining_no_dedicated_notc_sync_dup             318.54%   112.57ns    8.88M
753 ----------------------------------------------------------------------------
754 combining_no_dedicated_notc_async                364.71%    98.32ns   10.17M
755 combining_no_dedicated_notc_async_dup            364.22%    98.45ns   10.16M
756 ----------------------------------------------------------------------------
757 combining_no_dedicated_tc_sync                   322.91%   111.04ns    9.01M
758 combining_no_dedicated_tc_sync_dup               322.42%   111.21ns    8.99M
759 ----------------------------------------------------------------------------
760 combining_no_dedicated_tc_async                  466.30%    76.90ns   13.00M
761 combining_no_dedicated_tc_async_dup              462.76%    77.49ns   12.91M
762 ----------------------------------------------------------------------------
763 ============================================================================
764
765 ---------------------------------- Number of threads = 24
766 ============================================================================
767 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
768 ============================================================================
769 no_combining_base                                          348.54ns    2.87M
770 no_combining_dup                                  99.96%   348.69ns    2.87M
771 ----------------------------------------------------------------------------
772 ----------------------------------------------------------------------------
773 combining_dedicated_notc_sync                    260.21%   133.95ns    7.47M
774 combining_dedicated_notc_sync_dup                257.84%   135.18ns    7.40M
775 ----------------------------------------------------------------------------
776 combining_dedicated_notc_async                   242.25%   143.88ns    6.95M
777 combining_dedicated_notc_async_dup               235.88%   147.76ns    6.77M
778 ----------------------------------------------------------------------------
779 combining_dedicated_tc_sync                      262.45%   132.80ns    7.53M
780 combining_dedicated_tc_sync_dup                  251.14%   138.78ns    7.21M
781 ----------------------------------------------------------------------------
782 combining_dedicated_tc_async                     256.89%   135.68ns    7.37M
783 combining_dedicated_tc_async_dup                 304.76%   114.37ns    8.74M
784 ----------------------------------------------------------------------------
785 ----------------------------------------------------------------------------
786 combining_no_dedicated_notc_sync                 270.20%   129.00ns    7.75M
787 combining_no_dedicated_notc_sync_dup             271.69%   128.29ns    7.80M
788 ----------------------------------------------------------------------------
789 combining_no_dedicated_notc_async                298.35%   116.82ns    8.56M
790 combining_no_dedicated_notc_async_dup            289.04%   120.59ns    8.29M
791 ----------------------------------------------------------------------------
792 combining_no_dedicated_tc_sync                   286.59%   121.62ns    8.22M
793 combining_no_dedicated_tc_sync_dup               292.21%   119.28ns    8.38M
794 ----------------------------------------------------------------------------
795 combining_no_dedicated_tc_async                  471.86%    73.87ns   13.54M
796 combining_no_dedicated_tc_async_dup              458.16%    76.08ns   13.14M
797 ----------------------------------------------------------------------------
798 ============================================================================
799
800 ---------------------------------- Number of threads = 32
801 ============================================================================
802 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
803 ============================================================================
804 no_combining_base                                          337.61ns    2.96M
805 no_combining_dup                                  99.41%   339.60ns    2.94M
806 ----------------------------------------------------------------------------
807 ----------------------------------------------------------------------------
808 combining_dedicated_notc_sync                    204.50%   165.09ns    6.06M
809 combining_dedicated_notc_sync_dup                233.28%   144.72ns    6.91M
810 ----------------------------------------------------------------------------
811 combining_dedicated_notc_async                   187.20%   180.35ns    5.54M
812 combining_dedicated_notc_async_dup               192.76%   175.15ns    5.71M
813 ----------------------------------------------------------------------------
814 combining_dedicated_tc_sync                      220.56%   153.07ns    6.53M
815 combining_dedicated_tc_sync_dup                  207.62%   162.61ns    6.15M
816 ----------------------------------------------------------------------------
817 combining_dedicated_tc_async                     317.11%   106.46ns    9.39M
818 combining_dedicated_tc_async_dup                 318.92%   105.86ns    9.45M
819 ----------------------------------------------------------------------------
820 ----------------------------------------------------------------------------
821 combining_no_dedicated_notc_sync                 259.29%   130.21ns    7.68M
822 combining_no_dedicated_notc_sync_dup             248.33%   135.95ns    7.36M
823 ----------------------------------------------------------------------------
824 combining_no_dedicated_notc_async                290.40%   116.26ns    8.60M
825 combining_no_dedicated_notc_async_dup            299.92%   112.57ns    8.88M
826 ----------------------------------------------------------------------------
827 combining_no_dedicated_tc_sync                   281.91%   119.76ns    8.35M
828 combining_no_dedicated_tc_sync_dup               284.19%   118.80ns    8.42M
829 ----------------------------------------------------------------------------
830 combining_no_dedicated_tc_async                  435.16%    77.58ns   12.89M
831 combining_no_dedicated_tc_async_dup              389.67%    86.64ns   11.54M
832 ----------------------------------------------------------------------------
833 ============================================================================
834
835 ---------------------------------- Number of threads = 48
836 ============================================================================
837 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
838 ============================================================================
839 no_combining_base                                          334.48ns    2.99M
840 no_combining_dup                                 100.00%   334.46ns    2.99M
841 ----------------------------------------------------------------------------
842 ----------------------------------------------------------------------------
843 combining_dedicated_notc_sync                    257.01%   130.14ns    7.68M
844 combining_dedicated_notc_sync_dup                254.13%   131.62ns    7.60M
845 ----------------------------------------------------------------------------
846 combining_dedicated_notc_async                   189.56%   176.45ns    5.67M
847 combining_dedicated_notc_async_dup               247.68%   135.05ns    7.40M
848 ----------------------------------------------------------------------------
849 combining_dedicated_tc_sync                      259.47%   128.91ns    7.76M
850 combining_dedicated_tc_sync_dup                  281.34%   118.89ns    8.41M
851 ----------------------------------------------------------------------------
852 combining_dedicated_tc_async                     301.96%   110.77ns    9.03M
853 combining_dedicated_tc_async_dup                 347.65%    96.21ns   10.39M
854 ----------------------------------------------------------------------------
855 ----------------------------------------------------------------------------
856 combining_no_dedicated_notc_sync                 268.45%   124.60ns    8.03M
857 combining_no_dedicated_notc_sync_dup             272.54%   122.73ns    8.15M
858 ----------------------------------------------------------------------------
859 combining_no_dedicated_notc_async                306.04%   109.29ns    9.15M
860 combining_no_dedicated_notc_async_dup            294.38%   113.62ns    8.80M
861 ----------------------------------------------------------------------------
862 combining_no_dedicated_tc_sync                   280.89%   119.08ns    8.40M
863 combining_no_dedicated_tc_sync_dup               276.01%   121.18ns    8.25M
864 ----------------------------------------------------------------------------
865 combining_no_dedicated_tc_async                  466.45%    71.71ns   13.95M
866 combining_no_dedicated_tc_async_dup              465.45%    71.86ns   13.92M
867 ----------------------------------------------------------------------------
868 ============================================================================
869
870 ---------------------------------- Number of threads = 64
871 ============================================================================
872 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
873 ============================================================================
874 no_combining_base                                          335.68ns    2.98M
875 no_combining_dup                                 101.03%   332.25ns    3.01M
876 ----------------------------------------------------------------------------
877 ----------------------------------------------------------------------------
878 combining_dedicated_notc_sync                    272.91%   123.00ns    8.13M
879 combining_dedicated_notc_sync_dup                270.56%   124.07ns    8.06M
880 ----------------------------------------------------------------------------
881 combining_dedicated_notc_async                   200.44%   167.47ns    5.97M
882 combining_dedicated_notc_async_dup               208.36%   161.10ns    6.21M
883 ----------------------------------------------------------------------------
884 combining_dedicated_tc_sync                      258.40%   129.91ns    7.70M
885 combining_dedicated_tc_sync_dup                  249.16%   134.72ns    7.42M
886 ----------------------------------------------------------------------------
887 combining_dedicated_tc_async                     378.86%    88.60ns   11.29M
888 combining_dedicated_tc_async_dup                 299.32%   112.15ns    8.92M
889 ----------------------------------------------------------------------------
890 ----------------------------------------------------------------------------
891 combining_no_dedicated_notc_sync                 272.18%   123.33ns    8.11M
892 combining_no_dedicated_notc_sync_dup             275.26%   121.95ns    8.20M
893 ----------------------------------------------------------------------------
894 combining_no_dedicated_notc_async                296.23%   113.32ns    8.82M
895 combining_no_dedicated_notc_async_dup            311.17%   107.88ns    9.27M
896 ----------------------------------------------------------------------------
897 combining_no_dedicated_tc_sync                   283.30%   118.49ns    8.44M
898 combining_no_dedicated_tc_sync_dup               263.86%   127.22ns    7.86M
899 ----------------------------------------------------------------------------
900 combining_no_dedicated_tc_async                  426.62%    78.68ns   12.71M
901 combining_no_dedicated_tc_async_dup              445.17%    75.40ns   13.26M
902 ----------------------------------------------------------------------------
903 ============================================================================
904
905 ------------------------------------ custom interface
906
907 ---------------------------------- Number of threads = 1
908 ============================================================================
909 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
910 ============================================================================
911 no_combining_base                                          329.49ns    3.03M
912 no_combining_dup                                  99.91%   329.79ns    3.03M
913 ----------------------------------------------------------------------------
914 ----------------------------------------------------------------------------
915 combining_dedicated_notc_sync                     98.69%   333.88ns    3.00M
916 combining_dedicated_notc_sync_dup                 98.70%   333.83ns    3.00M
917 ----------------------------------------------------------------------------
918 combining_dedicated_notc_async                    98.22%   335.47ns    2.98M
919 combining_dedicated_notc_async_dup                98.16%   335.66ns    2.98M
920 ----------------------------------------------------------------------------
921 combining_dedicated_tc_sync                       98.70%   333.85ns    3.00M
922 combining_dedicated_tc_sync_dup                   98.78%   333.58ns    3.00M
923 ----------------------------------------------------------------------------
924 combining_dedicated_tc_async                      98.14%   335.73ns    2.98M
925 combining_dedicated_tc_async_dup                  97.92%   336.49ns    2.97M
926 ----------------------------------------------------------------------------
927 ----------------------------------------------------------------------------
928 combining_no_dedicated_notc_sync                  98.94%   333.00ns    3.00M
929 combining_no_dedicated_notc_sync_dup              98.86%   333.29ns    3.00M
930 ----------------------------------------------------------------------------
931 combining_no_dedicated_notc_async                 98.36%   334.99ns    2.99M
932 combining_no_dedicated_notc_async_dup             98.61%   334.15ns    2.99M
933 ----------------------------------------------------------------------------
934 combining_no_dedicated_tc_sync                    99.07%   332.58ns    3.01M
935 combining_no_dedicated_tc_sync_dup                99.12%   332.41ns    3.01M
936 ----------------------------------------------------------------------------
937 combining_no_dedicated_tc_async                   97.08%   339.38ns    2.95M
938 combining_no_dedicated_tc_async_dup               97.54%   337.81ns    2.96M
939 ----------------------------------------------------------------------------
940 ============================================================================
941
942 ---------------------------------- Number of threads = 2
943 ============================================================================
944 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
945 ============================================================================
946 no_combining_base                                          216.71ns    4.61M
947 no_combining_dup                                 100.34%   215.97ns    4.63M
948 ----------------------------------------------------------------------------
949 ----------------------------------------------------------------------------
950 combining_dedicated_notc_sync                     95.42%   227.11ns    4.40M
951 combining_dedicated_notc_sync_dup                 94.16%   230.15ns    4.34M
952 ----------------------------------------------------------------------------
953 combining_dedicated_notc_async                    91.84%   235.97ns    4.24M
954 combining_dedicated_notc_async_dup                91.41%   237.08ns    4.22M
955 ----------------------------------------------------------------------------
956 combining_dedicated_tc_sync                       96.79%   223.90ns    4.47M
957 combining_dedicated_tc_sync_dup                   96.54%   224.47ns    4.45M
958 ----------------------------------------------------------------------------
959 combining_dedicated_tc_async                      90.90%   238.41ns    4.19M
960 combining_dedicated_tc_async_dup                  95.45%   227.03ns    4.40M
961 ----------------------------------------------------------------------------
962 ----------------------------------------------------------------------------
963 combining_no_dedicated_notc_sync                 101.13%   214.28ns    4.67M
964 combining_no_dedicated_notc_sync_dup             100.11%   216.48ns    4.62M
965 ----------------------------------------------------------------------------
966 combining_no_dedicated_notc_async                 96.40%   224.80ns    4.45M
967 combining_no_dedicated_notc_async_dup             96.36%   224.90ns    4.45M
968 ----------------------------------------------------------------------------
969 combining_no_dedicated_tc_sync                   100.86%   214.85ns    4.65M
970 combining_no_dedicated_tc_sync_dup               101.91%   212.65ns    4.70M
971 ----------------------------------------------------------------------------
972 combining_no_dedicated_tc_async                   95.66%   226.54ns    4.41M
973 combining_no_dedicated_tc_async_dup               95.88%   226.03ns    4.42M
974 ----------------------------------------------------------------------------
975 ============================================================================
976
977 ---------------------------------- Number of threads = 3
978 ============================================================================
979 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
980 ============================================================================
981 no_combining_base                                          189.61ns    5.27M
982 no_combining_dup                                 100.22%   189.20ns    5.29M
983 ----------------------------------------------------------------------------
984 ----------------------------------------------------------------------------
985 combining_dedicated_notc_sync                    103.18%   183.76ns    5.44M
986 combining_dedicated_notc_sync_dup                103.66%   182.92ns    5.47M
987 ----------------------------------------------------------------------------
988 combining_dedicated_notc_async                    77.14%   245.81ns    4.07M
989 combining_dedicated_notc_async_dup                90.25%   210.10ns    4.76M
990 ----------------------------------------------------------------------------
991 combining_dedicated_tc_sync                       89.88%   210.95ns    4.74M
992 combining_dedicated_tc_sync_dup                   87.83%   215.90ns    4.63M
993 ----------------------------------------------------------------------------
994 combining_dedicated_tc_async                      89.33%   212.26ns    4.71M
995 combining_dedicated_tc_async_dup                  85.19%   222.56ns    4.49M
996 ----------------------------------------------------------------------------
997 ----------------------------------------------------------------------------
998 combining_no_dedicated_notc_sync                  98.43%   192.64ns    5.19M
999 combining_no_dedicated_notc_sync_dup             101.15%   187.46ns    5.33M
1000 ----------------------------------------------------------------------------
1001 combining_no_dedicated_notc_async                 83.77%   226.36ns    4.42M
1002 combining_no_dedicated_notc_async_dup             84.69%   223.89ns    4.47M
1003 ----------------------------------------------------------------------------
1004 combining_no_dedicated_tc_sync                    85.47%   221.85ns    4.51M
1005 combining_no_dedicated_tc_sync_dup                86.32%   219.65ns    4.55M
1006 ----------------------------------------------------------------------------
1007 combining_no_dedicated_tc_async                  105.62%   179.52ns    5.57M
1008 combining_no_dedicated_tc_async_dup              105.26%   180.14ns    5.55M
1009 ----------------------------------------------------------------------------
1010 ============================================================================
1011
1012 ---------------------------------- Number of threads = 4
1013 ============================================================================
1014 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1015 ============================================================================
1016 no_combining_base                                          237.50ns    4.21M
1017 no_combining_dup                                  99.80%   237.97ns    4.20M
1018 ----------------------------------------------------------------------------
1019 ----------------------------------------------------------------------------
1020 combining_dedicated_notc_sync                    112.56%   210.99ns    4.74M
1021 combining_dedicated_notc_sync_dup                104.08%   228.20ns    4.38M
1022 ----------------------------------------------------------------------------
1023 combining_dedicated_notc_async                   101.44%   234.12ns    4.27M
1024 combining_dedicated_notc_async_dup               100.73%   235.77ns    4.24M
1025 ----------------------------------------------------------------------------
1026 combining_dedicated_tc_sync                      111.70%   212.62ns    4.70M
1027 combining_dedicated_tc_sync_dup                  113.00%   210.18ns    4.76M
1028 ----------------------------------------------------------------------------
1029 combining_dedicated_tc_async                     131.11%   181.15ns    5.52M
1030 combining_dedicated_tc_async_dup                 132.65%   179.04ns    5.59M
1031 ----------------------------------------------------------------------------
1032 ----------------------------------------------------------------------------
1033 combining_no_dedicated_notc_sync                 115.76%   205.17ns    4.87M
1034 combining_no_dedicated_notc_sync_dup             114.70%   207.06ns    4.83M
1035 ----------------------------------------------------------------------------
1036 combining_no_dedicated_notc_async                111.63%   212.76ns    4.70M
1037 combining_no_dedicated_notc_async_dup            111.91%   212.22ns    4.71M
1038 ----------------------------------------------------------------------------
1039 combining_no_dedicated_tc_sync                   120.07%   197.80ns    5.06M
1040 combining_no_dedicated_tc_sync_dup               118.25%   200.85ns    4.98M
1041 ----------------------------------------------------------------------------
1042 combining_no_dedicated_tc_async                  153.73%   154.49ns    6.47M
1043 combining_no_dedicated_tc_async_dup              153.08%   155.15ns    6.45M
1044 ----------------------------------------------------------------------------
1045 ============================================================================
1046
1047 ---------------------------------- Number of threads = 6
1048 ============================================================================
1049 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1050 ============================================================================
1051 no_combining_base                                          281.56ns    3.55M
1052 no_combining_dup                                  99.97%   281.65ns    3.55M
1053 ----------------------------------------------------------------------------
1054 ----------------------------------------------------------------------------
1055 combining_dedicated_notc_sync                    144.76%   194.50ns    5.14M
1056 combining_dedicated_notc_sync_dup                149.96%   187.76ns    5.33M
1057 ----------------------------------------------------------------------------
1058 combining_dedicated_notc_async                   147.72%   190.61ns    5.25M
1059 combining_dedicated_notc_async_dup               140.86%   199.89ns    5.00M
1060 ----------------------------------------------------------------------------
1061 combining_dedicated_tc_sync                      154.17%   182.63ns    5.48M
1062 combining_dedicated_tc_sync_dup                  156.60%   179.80ns    5.56M
1063 ----------------------------------------------------------------------------
1064 combining_dedicated_tc_async                     202.42%   139.10ns    7.19M
1065 combining_dedicated_tc_async_dup                 203.44%   138.40ns    7.23M
1066 ----------------------------------------------------------------------------
1067 ----------------------------------------------------------------------------
1068 combining_no_dedicated_notc_sync                 168.33%   167.27ns    5.98M
1069 combining_no_dedicated_notc_sync_dup             166.02%   169.59ns    5.90M
1070 ----------------------------------------------------------------------------
1071 combining_no_dedicated_notc_async                166.44%   169.16ns    5.91M
1072 combining_no_dedicated_notc_async_dup            160.14%   175.82ns    5.69M
1073 ----------------------------------------------------------------------------
1074 combining_no_dedicated_tc_sync                   181.79%   154.88ns    6.46M
1075 combining_no_dedicated_tc_sync_dup               180.25%   156.20ns    6.40M
1076 ----------------------------------------------------------------------------
1077 combining_no_dedicated_tc_async                  240.56%   117.04ns    8.54M
1078 combining_no_dedicated_tc_async_dup              240.74%   116.96ns    8.55M
1079 ----------------------------------------------------------------------------
1080 ============================================================================
1081
1082 ---------------------------------- Number of threads = 8
1083 ============================================================================
1084 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1085 ============================================================================
1086 no_combining_base                                          312.99ns    3.19M
1087 no_combining_dup                                  98.93%   316.37ns    3.16M
1088 ----------------------------------------------------------------------------
1089 ----------------------------------------------------------------------------
1090 combining_dedicated_notc_sync                    182.71%   171.30ns    5.84M
1091 combining_dedicated_notc_sync_dup                183.23%   170.82ns    5.85M
1092 ----------------------------------------------------------------------------
1093 combining_dedicated_notc_async                   183.16%   170.88ns    5.85M
1094 combining_dedicated_notc_async_dup               181.29%   172.64ns    5.79M
1095 ----------------------------------------------------------------------------
1096 combining_dedicated_tc_sync                      191.49%   163.45ns    6.12M
1097 combining_dedicated_tc_sync_dup                  191.04%   163.84ns    6.10M
1098 ----------------------------------------------------------------------------
1099 combining_dedicated_tc_async                     302.89%   103.34ns    9.68M
1100 combining_dedicated_tc_async_dup                 304.07%   102.94ns    9.71M
1101 ----------------------------------------------------------------------------
1102 ----------------------------------------------------------------------------
1103 combining_no_dedicated_notc_sync                 220.41%   142.00ns    7.04M
1104 combining_no_dedicated_notc_sync_dup             219.90%   142.34ns    7.03M
1105 ----------------------------------------------------------------------------
1106 combining_no_dedicated_notc_async                218.66%   143.14ns    6.99M
1107 combining_no_dedicated_notc_async_dup            218.74%   143.09ns    6.99M
1108 ----------------------------------------------------------------------------
1109 combining_no_dedicated_tc_sync                   241.82%   129.43ns    7.73M
1110 combining_no_dedicated_tc_sync_dup               241.72%   129.48ns    7.72M
1111 ----------------------------------------------------------------------------
1112 combining_no_dedicated_tc_async                  352.39%    88.82ns   11.26M
1113 combining_no_dedicated_tc_async_dup              350.17%    89.38ns   11.19M
1114 ----------------------------------------------------------------------------
1115 ============================================================================
1116
1117 ---------------------------------- Number of threads = 12
1118 ============================================================================
1119 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1120 ============================================================================
1121 no_combining_base                                          350.05ns    2.86M
1122 no_combining_dup                                  99.06%   353.37ns    2.83M
1123 ----------------------------------------------------------------------------
1124 ----------------------------------------------------------------------------
1125 combining_dedicated_notc_sync                    266.87%   131.17ns    7.62M
1126 combining_dedicated_notc_sync_dup                245.79%   142.42ns    7.02M
1127 ----------------------------------------------------------------------------
1128 combining_dedicated_notc_async                   238.57%   146.73ns    6.82M
1129 combining_dedicated_notc_async_dup               240.02%   145.84ns    6.86M
1130 ----------------------------------------------------------------------------
1131 combining_dedicated_tc_sync                      316.70%   110.53ns    9.05M
1132 combining_dedicated_tc_sync_dup                  321.05%   109.03ns    9.17M
1133 ----------------------------------------------------------------------------
1134 combining_dedicated_tc_async                     403.10%    86.84ns   11.52M
1135 combining_dedicated_tc_async_dup                 409.94%    85.39ns   11.71M
1136 ----------------------------------------------------------------------------
1137 ----------------------------------------------------------------------------
1138 combining_no_dedicated_notc_sync                 300.23%   116.59ns    8.58M
1139 combining_no_dedicated_notc_sync_dup             299.07%   117.04ns    8.54M
1140 ----------------------------------------------------------------------------
1141 combining_no_dedicated_notc_async                297.79%   117.55ns    8.51M
1142 combining_no_dedicated_notc_async_dup            296.66%   118.00ns    8.47M
1143 ----------------------------------------------------------------------------
1144 combining_no_dedicated_tc_sync                   328.07%   106.70ns    9.37M
1145 combining_no_dedicated_tc_sync_dup               331.52%   105.59ns    9.47M
1146 ----------------------------------------------------------------------------
1147 combining_no_dedicated_tc_async                  424.57%    82.45ns   12.13M
1148 combining_no_dedicated_tc_async_dup              409.47%    85.49ns   11.70M
1149 ----------------------------------------------------------------------------
1150 ============================================================================
1151
1152 ---------------------------------- Number of threads = 16
1153 ============================================================================
1154 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1155 ============================================================================
1156 no_combining_base                                          360.47ns    2.77M
1157 no_combining_dup                                 100.11%   360.07ns    2.78M
1158 ----------------------------------------------------------------------------
1159 ----------------------------------------------------------------------------
1160 combining_dedicated_notc_sync                    320.54%   112.46ns    8.89M
1161 combining_dedicated_notc_sync_dup                313.31%   115.05ns    8.69M
1162 ----------------------------------------------------------------------------
1163 combining_dedicated_notc_async                   296.83%   121.44ns    8.23M
1164 combining_dedicated_notc_async_dup               289.91%   124.34ns    8.04M
1165 ----------------------------------------------------------------------------
1166 combining_dedicated_tc_sync                      364.27%    98.96ns   10.11M
1167 combining_dedicated_tc_sync_dup                  361.10%    99.82ns   10.02M
1168 ----------------------------------------------------------------------------
1169 combining_dedicated_tc_async                     424.43%    84.93ns   11.77M
1170 combining_dedicated_tc_async_dup                 418.07%    86.22ns   11.60M
1171 ----------------------------------------------------------------------------
1172 ----------------------------------------------------------------------------
1173 combining_no_dedicated_notc_sync                 373.13%    96.60ns   10.35M
1174 combining_no_dedicated_notc_sync_dup             364.35%    98.93ns   10.11M
1175 ----------------------------------------------------------------------------
1176 combining_no_dedicated_notc_async                361.40%    99.74ns   10.03M
1177 combining_no_dedicated_notc_async_dup            366.49%    98.36ns   10.17M
1178 ----------------------------------------------------------------------------
1179 combining_no_dedicated_tc_sync                   382.22%    94.31ns   10.60M
1180 combining_no_dedicated_tc_sync_dup               380.64%    94.70ns   10.56M
1181 ----------------------------------------------------------------------------
1182 combining_no_dedicated_tc_async                  461.14%    78.17ns   12.79M
1183 combining_no_dedicated_tc_async_dup              481.50%    74.86ns   13.36M
1184 ----------------------------------------------------------------------------
1185 ============================================================================
1186
1187 ---------------------------------- Number of threads = 24
1188 ============================================================================
1189 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1190 ============================================================================
1191 no_combining_base                                          348.97ns    2.87M
1192 no_combining_dup                                 100.12%   348.54ns    2.87M
1193 ----------------------------------------------------------------------------
1194 ----------------------------------------------------------------------------
1195 combining_dedicated_notc_sync                    234.17%   149.02ns    6.71M
1196 combining_dedicated_notc_sync_dup                205.54%   169.78ns    5.89M
1197 ----------------------------------------------------------------------------
1198 combining_dedicated_notc_async                   248.28%   140.55ns    7.11M
1199 combining_dedicated_notc_async_dup               239.71%   145.58ns    6.87M
1200 ----------------------------------------------------------------------------
1201 combining_dedicated_tc_sync                      272.87%   127.89ns    7.82M
1202 combining_dedicated_tc_sync_dup                  235.76%   148.02ns    6.76M
1203 ----------------------------------------------------------------------------
1204 combining_dedicated_tc_async                     295.71%   118.01ns    8.47M
1205 combining_dedicated_tc_async_dup                 265.87%   131.25ns    7.62M
1206 ----------------------------------------------------------------------------
1207 ----------------------------------------------------------------------------
1208 combining_no_dedicated_notc_sync                 298.96%   116.73ns    8.57M
1209 combining_no_dedicated_notc_sync_dup             297.67%   117.23ns    8.53M
1210 ----------------------------------------------------------------------------
1211 combining_no_dedicated_notc_async                298.44%   116.93ns    8.55M
1212 combining_no_dedicated_notc_async_dup            292.80%   119.18ns    8.39M
1213 ----------------------------------------------------------------------------
1214 combining_no_dedicated_tc_sync                   316.44%   110.28ns    9.07M
1215 combining_no_dedicated_tc_sync_dup               317.52%   109.90ns    9.10M
1216 ----------------------------------------------------------------------------
1217 combining_no_dedicated_tc_async                  432.64%    80.66ns   12.40M
1218 combining_no_dedicated_tc_async_dup              441.55%    79.03ns   12.65M
1219 ----------------------------------------------------------------------------
1220 ============================================================================
1221
1222 ---------------------------------- Number of threads = 32
1223 ============================================================================
1224 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1225 ============================================================================
1226 no_combining_base                                          338.90ns    2.95M
1227 no_combining_dup                                 100.01%   338.87ns    2.95M
1228 ----------------------------------------------------------------------------
1229 ----------------------------------------------------------------------------
1230 combining_dedicated_notc_sync                    204.34%   165.85ns    6.03M
1231 combining_dedicated_notc_sync_dup                202.84%   167.07ns    5.99M
1232 ----------------------------------------------------------------------------
1233 combining_dedicated_notc_async                   192.27%   176.26ns    5.67M
1234 combining_dedicated_notc_async_dup               188.61%   179.68ns    5.57M
1235 ----------------------------------------------------------------------------
1236 combining_dedicated_tc_sync                      247.57%   136.89ns    7.31M
1237 combining_dedicated_tc_sync_dup                  285.53%   118.69ns    8.43M
1238 ----------------------------------------------------------------------------
1239 combining_dedicated_tc_async                     277.97%   121.92ns    8.20M
1240 combining_dedicated_tc_async_dup                 231.11%   146.64ns    6.82M
1241 ----------------------------------------------------------------------------
1242 ----------------------------------------------------------------------------
1243 combining_no_dedicated_notc_sync                 299.20%   113.27ns    8.83M
1244 combining_no_dedicated_notc_sync_dup             289.53%   117.05ns    8.54M
1245 ----------------------------------------------------------------------------
1246 combining_no_dedicated_notc_async                282.29%   120.05ns    8.33M
1247 combining_no_dedicated_notc_async_dup            305.09%   111.08ns    9.00M
1248 ----------------------------------------------------------------------------
1249 combining_no_dedicated_tc_sync                   312.52%   108.44ns    9.22M
1250 combining_no_dedicated_tc_sync_dup               324.88%   104.31ns    9.59M
1251 ----------------------------------------------------------------------------
1252 combining_no_dedicated_tc_async                  420.99%    80.50ns   12.42M
1253 combining_no_dedicated_tc_async_dup              406.58%    83.35ns   12.00M
1254 ----------------------------------------------------------------------------
1255 ============================================================================
1256
1257 ---------------------------------- Number of threads = 48
1258 ============================================================================
1259 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1260 ============================================================================
1261 no_combining_base                                          334.84ns    2.99M
1262 no_combining_dup                                  99.57%   336.29ns    2.97M
1263 ----------------------------------------------------------------------------
1264 ----------------------------------------------------------------------------
1265 combining_dedicated_notc_sync                    212.82%   157.34ns    6.36M
1266 combining_dedicated_notc_sync_dup                198.39%   168.78ns    5.93M
1267 ----------------------------------------------------------------------------
1268 combining_dedicated_notc_async                   166.74%   200.82ns    4.98M
1269 combining_dedicated_notc_async_dup               197.07%   169.91ns    5.89M
1270 ----------------------------------------------------------------------------
1271 combining_dedicated_tc_sync                      246.35%   135.92ns    7.36M
1272 combining_dedicated_tc_sync_dup                  209.52%   159.81ns    6.26M
1273 ----------------------------------------------------------------------------
1274 combining_dedicated_tc_async                     293.94%   113.91ns    8.78M
1275 combining_dedicated_tc_async_dup                 280.74%   119.27ns    8.38M
1276 ----------------------------------------------------------------------------
1277 ----------------------------------------------------------------------------
1278 combining_no_dedicated_notc_sync                 301.60%   111.02ns    9.01M
1279 combining_no_dedicated_notc_sync_dup             296.10%   113.09ns    8.84M
1280 ----------------------------------------------------------------------------
1281 combining_no_dedicated_notc_async                308.91%   108.40ns    9.23M
1282 combining_no_dedicated_notc_async_dup            298.48%   112.18ns    8.91M
1283 ----------------------------------------------------------------------------
1284 combining_no_dedicated_tc_sync                   331.11%   101.13ns    9.89M
1285 combining_no_dedicated_tc_sync_dup               329.37%   101.66ns    9.84M
1286 ----------------------------------------------------------------------------
1287 combining_no_dedicated_tc_async                  451.58%    74.15ns   13.49M
1288 combining_no_dedicated_tc_async_dup              431.37%    77.62ns   12.88M
1289 ----------------------------------------------------------------------------
1290 ============================================================================
1291
1292 ---------------------------------- Number of threads = 64
1293 ============================================================================
1294 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative  time/iter  iters/s
1295 ============================================================================
1296 no_combining_base                                          336.22ns    2.97M
1297 no_combining_dup                                 100.69%   333.92ns    2.99M
1298 ----------------------------------------------------------------------------
1299 ----------------------------------------------------------------------------
1300 combining_dedicated_notc_sync                    230.57%   145.82ns    6.86M
1301 combining_dedicated_notc_sync_dup                221.08%   152.08ns    6.58M
1302 ----------------------------------------------------------------------------
1303 combining_dedicated_notc_async                   232.38%   144.69ns    6.91M
1304 combining_dedicated_notc_async_dup               192.77%   174.41ns    5.73M
1305 ----------------------------------------------------------------------------
1306 combining_dedicated_tc_sync                      284.07%   118.36ns    8.45M
1307 combining_dedicated_tc_sync_dup                  298.03%   112.81ns    8.86M
1308 ----------------------------------------------------------------------------
1309 combining_dedicated_tc_async                     361.07%    93.12ns   10.74M
1310 combining_dedicated_tc_async_dup                 324.11%   103.74ns    9.64M
1311 ----------------------------------------------------------------------------
1312 ----------------------------------------------------------------------------
1313 combining_no_dedicated_notc_sync                 284.58%   118.15ns    8.46M
1314 combining_no_dedicated_notc_sync_dup             301.73%   111.43ns    8.97M
1315 ----------------------------------------------------------------------------
1316 combining_no_dedicated_notc_async                294.87%   114.02ns    8.77M
1317 combining_no_dedicated_notc_async_dup            287.51%   116.94ns    8.55M
1318 ----------------------------------------------------------------------------
1319 combining_no_dedicated_tc_sync                   317.96%   105.74ns    9.46M
1320 combining_no_dedicated_tc_sync_dup               332.45%   101.13ns    9.89M
1321 ----------------------------------------------------------------------------
1322 combining_no_dedicated_tc_async                  441.96%    76.07ns   13.15M
1323 combining_no_dedicated_tc_async_dup              393.82%    85.37ns   11.71M
1324 ----------------------------------------------------------------------------
1325 ============================================================================
1326 [       OK ] FlatCombining.folly_benchmark (455269 ms)
1327 [ RUN      ] FlatCombining.direct_measurement
1328
1329 Running benchmarks on machine with 32 logical cores
1330
1331 ------------------------------------ custom interface
1332
1333 ------------------------------------ Number of threads = 1
1334
1335 Test_name, Max time, Avg time, Min time, % base min / min
1336
1337 no_combining - base             334 ns    331 ns    329 ns
1338 no_combining - dup              335 ns    332 ns    331 ns  99%
1339 ---------------------------------------
1340 ---- dedicated-------------------------
1341 combining_notc_sync             340 ns    335 ns    332 ns  99%
1342 combining_notc_sync - dup       337 ns    335 ns    333 ns  98%
1343 ---------------------------------------
1344 combining_notc_async            360 ns    343 ns    338 ns  97%
1345 combining_notc_async - dup      339 ns    337 ns    336 ns  98%
1346 ---------------------------------------
1347 combining_tc_sync               337 ns    335 ns    333 ns  98%
1348 combining_tc_sync - dup         346 ns    336 ns    332 ns  99%
1349 ---------------------------------------
1350 combining_tc_async              338 ns    336 ns    335 ns  98%
1351 combining_tc_async - dup        338 ns    336 ns    335 ns  98%
1352 ---------------------------------------
1353 ---- no dedicated----------------------
1354 combining_notc_sync             338 ns    335 ns    333 ns  98%
1355 combining_notc_sync - dup       337 ns    334 ns    333 ns  98%
1356 ---------------------------------------
1357 combining_notc_async            339 ns    336 ns    335 ns  98%
1358 combining_notc_async - dup      347 ns    340 ns    336 ns  98%
1359 ---------------------------------------
1360 combining_tc_sync               337 ns    335 ns    333 ns  98%
1361 combining_tc_sync - dup         436 ns    386 ns    333 ns  98%
1362 ---------------------------------------
1363 combining_tc_async              340 ns    337 ns    335 ns  98%
1364 combining_tc_async - dup        338 ns    336 ns    335 ns  98%
1365 ---------------------------------------
1366
1367 ------------------------------------ Number of threads = 2
1368
1369 Test_name, Max time, Avg time, Min time, % base min / min
1370
1371 no_combining - base             315 ns    226 ns    211 ns
1372 no_combining - dup              217 ns    216 ns    213 ns  98%
1373 ---------------------------------------
1374 ---- dedicated-------------------------
1375 combining_notc_sync             251 ns    237 ns    229 ns  92%
1376 combining_notc_sync - dup       250 ns    241 ns    226 ns  93%
1377 ---------------------------------------
1378 combining_notc_async            278 ns    268 ns    252 ns  83%
1379 combining_notc_async - dup      297 ns    263 ns    245 ns  86%
1380 ---------------------------------------
1381 combining_tc_sync               254 ns    246 ns    234 ns  90%
1382 combining_tc_sync - dup         335 ns    252 ns    230 ns  91%
1383 ---------------------------------------
1384 combining_tc_async              305 ns    282 ns    245 ns  86%
1385 combining_tc_async - dup        284 ns    256 ns    239 ns  88%
1386 ---------------------------------------
1387 ---- no dedicated----------------------
1388 combining_notc_sync             230 ns    222 ns    217 ns  97%
1389 combining_notc_sync - dup       231 ns    225 ns    218 ns  96%
1390 ---------------------------------------
1391 combining_notc_async            244 ns    238 ns    233 ns  90%
1392 combining_notc_async - dup      241 ns    236 ns    231 ns  91%
1393 ---------------------------------------
1394 combining_tc_sync               283 ns    239 ns    221 ns  95%
1395 combining_tc_sync - dup         299 ns    247 ns    225 ns  93%
1396 ---------------------------------------
1397 combining_tc_async              290 ns    270 ns    244 ns  86%
1398 combining_tc_async - dup        290 ns    251 ns    238 ns  88%
1399 ---------------------------------------
1400
1401 ------------------------------------ Number of threads = 3
1402
1403 Test_name, Max time, Avg time, Min time, % base min / min
1404
1405 no_combining - base             211 ns    197 ns    190 ns
1406 no_combining - dup              209 ns    201 ns    195 ns  97%
1407 ---------------------------------------
1408 ---- dedicated-------------------------
1409 combining_notc_sync             258 ns    197 ns    168 ns 112%
1410 combining_notc_sync - dup       274 ns    200 ns    162 ns 117%
1411 ---------------------------------------
1412 combining_notc_async            307 ns    281 ns    260 ns  73%
1413 combining_notc_async - dup      284 ns    258 ns    216 ns  88%
1414 ---------------------------------------
1415 combining_tc_sync               228 ns    215 ns    192 ns  98%
1416 combining_tc_sync - dup         216 ns    203 ns    178 ns 107%
1417 ---------------------------------------
1418 combining_tc_async              246 ns    233 ns    220 ns  86%
1419 combining_tc_async - dup        236 ns    221 ns    208 ns  91%
1420 ---------------------------------------
1421 ---- no dedicated----------------------
1422 combining_notc_sync             204 ns    198 ns    184 ns 103%
1423 combining_notc_sync - dup       203 ns    198 ns    193 ns  98%
1424 ---------------------------------------
1425 combining_notc_async            238 ns    225 ns    218 ns  87%
1426 combining_notc_async - dup      231 ns    227 ns    223 ns  85%
1427 ---------------------------------------
1428 combining_tc_sync               220 ns    216 ns    211 ns  90%
1429 combining_tc_sync - dup         227 ns    223 ns    219 ns  87%
1430 ---------------------------------------
1431 combining_tc_async              182 ns    181 ns    179 ns 106%
1432 combining_tc_async - dup        186 ns    181 ns    180 ns 105%
1433 ---------------------------------------
1434
1435 ------------------------------------ Number of threads = 4
1436
1437 Test_name, Max time, Avg time, Min time, % base min / min
1438
1439 no_combining - base             258 ns    245 ns    238 ns
1440 no_combining - dup              262 ns    249 ns    245 ns  97%
1441 ---------------------------------------
1442 ---- dedicated-------------------------
1443 combining_notc_sync             264 ns    250 ns    220 ns 107%
1444 combining_notc_sync - dup       260 ns    254 ns    231 ns 102%
1445 ---------------------------------------
1446 combining_notc_async            266 ns    255 ns    233 ns 102%
1447 combining_notc_async - dup      268 ns    260 ns    252 ns  94%
1448 ---------------------------------------
1449 combining_tc_sync               250 ns    240 ns    215 ns 110%
1450 combining_tc_sync - dup         252 ns    242 ns    217 ns 109%
1451 ---------------------------------------
1452 combining_tc_async              199 ns    190 ns    183 ns 129%
1453 combining_tc_async - dup        199 ns    189 ns    178 ns 133%
1454 ---------------------------------------
1455 ---- no dedicated----------------------
1456 combining_notc_sync             223 ns    211 ns    203 ns 116%
1457 combining_notc_sync - dup       218 ns    211 ns    202 ns 117%
1458 ---------------------------------------
1459 combining_notc_async            222 ns    213 ns    207 ns 114%
1460 combining_notc_async - dup      236 ns    222 ns    215 ns 110%
1461 ---------------------------------------
1462 combining_tc_sync               202 ns    199 ns    197 ns 120%
1463 combining_tc_sync - dup         207 ns    199 ns    194 ns 122%
1464 ---------------------------------------
1465 combining_tc_async              162 ns    157 ns    152 ns 155%
1466 combining_tc_async - dup        188 ns    161 ns    154 ns 154%
1467 ---------------------------------------
1468
1469 ------------------------------------ Number of threads = 6
1470
1471 Test_name, Max time, Avg time, Min time, % base min / min
1472
1473 no_combining - base             298 ns    292 ns    281 ns
1474 no_combining - dup              296 ns    289 ns    270 ns 104%
1475 ---------------------------------------
1476 ---- dedicated-------------------------
1477 combining_notc_sync             221 ns    211 ns    196 ns 143%
1478 combining_notc_sync - dup       247 ns    211 ns    192 ns 146%
1479 ---------------------------------------
1480 combining_notc_async            216 ns    205 ns    194 ns 144%
1481 combining_notc_async - dup      215 ns    206 ns    197 ns 142%
1482 ---------------------------------------
1483 combining_tc_sync               225 ns    204 ns    185 ns 151%
1484 combining_tc_sync - dup         229 ns    210 ns    186 ns 151%
1485 ---------------------------------------
1486 combining_tc_async              165 ns    152 ns    144 ns 194%
1487 combining_tc_async - dup        166 ns    150 ns    143 ns 195%
1488 ---------------------------------------
1489 ---- no dedicated----------------------
1490 combining_notc_sync             184 ns    182 ns    180 ns 155%
1491 combining_notc_sync - dup       176 ns    174 ns    172 ns 163%
1492 ---------------------------------------
1493 combining_notc_async            179 ns    177 ns    174 ns 161%
1494 combining_notc_async - dup      186 ns    181 ns    177 ns 158%
1495 ---------------------------------------
1496 combining_tc_sync               164 ns    163 ns    160 ns 174%
1497 combining_tc_sync - dup         171 ns    168 ns    161 ns 173%
1498 ---------------------------------------
1499 combining_tc_async              142 ns    139 ns    138 ns 202%
1500 combining_tc_async - dup        141 ns    136 ns    119 ns 235%
1501 ---------------------------------------
1502
1503 ------------------------------------ Number of threads = 8
1504
1505 Test_name, Max time, Avg time, Min time, % base min / min
1506
1507 no_combining - base             333 ns    328 ns    315 ns
1508 no_combining - dup              336 ns    330 ns    327 ns  96%
1509 ---------------------------------------
1510 ---- dedicated-------------------------
1511 combining_notc_sync             203 ns    179 ns    172 ns 183%
1512 combining_notc_sync - dup       190 ns    177 ns    171 ns 183%
1513 ---------------------------------------
1514 combining_notc_async            204 ns    183 ns    170 ns 185%
1515 combining_notc_async - dup      201 ns    187 ns    176 ns 179%
1516 ---------------------------------------
1517 combining_tc_sync               177 ns    170 ns    165 ns 190%
1518 combining_tc_sync - dup         178 ns    167 ns    164 ns 192%
1519 ---------------------------------------
1520 combining_tc_async              134 ns    115 ns    105 ns 300%
1521 combining_tc_async - dup        132 ns    115 ns    103 ns 304%
1522 ---------------------------------------
1523 ---- no dedicated----------------------
1524 combining_notc_sync             154 ns    145 ns    143 ns 220%
1525 combining_notc_sync - dup       153 ns    144 ns    142 ns 222%
1526 ---------------------------------------
1527 combining_notc_async            145 ns    144 ns    143 ns 219%
1528 combining_notc_async - dup      157 ns    148 ns    144 ns 218%
1529 ---------------------------------------
1530 combining_tc_sync               142 ns    134 ns    130 ns 241%
1531 combining_tc_sync - dup         144 ns    136 ns    130 ns 241%
1532 ---------------------------------------
1533 combining_tc_async              118 ns     99 ns     91 ns 344%
1534 combining_tc_async - dup        118 ns     95 ns     91 ns 344%
1535 ---------------------------------------
1536
1537 ------------------------------------ Number of threads = 12
1538
1539 Test_name, Max time, Avg time, Min time, % base min / min
1540
1541 no_combining - base             361 ns    357 ns    353 ns
1542 no_combining - dup              361 ns    357 ns    355 ns  99%
1543 ---------------------------------------
1544 ---- dedicated-------------------------
1545 combining_notc_sync             190 ns    157 ns    138 ns 255%
1546 combining_notc_sync - dup       162 ns    149 ns    138 ns 255%
1547 ---------------------------------------
1548 combining_notc_async            163 ns    153 ns    145 ns 242%
1549 combining_notc_async - dup      194 ns    158 ns    152 ns 231%
1550 ---------------------------------------
1551 combining_tc_sync               181 ns    128 ns    111 ns 316%
1552 combining_tc_sync - dup         183 ns    148 ns    121 ns 289%
1553 ---------------------------------------
1554 combining_tc_async               92 ns     89 ns     87 ns 402%
1555 combining_tc_async - dup        152 ns    105 ns     87 ns 405%
1556 ---------------------------------------
1557 ---- no dedicated----------------------
1558 combining_notc_sync             120 ns    119 ns    118 ns 298%
1559 combining_notc_sync - dup       120 ns    119 ns    118 ns 298%
1560 ---------------------------------------
1561 combining_notc_async            122 ns    120 ns    120 ns 294%
1562 combining_notc_async - dup      121 ns    120 ns    118 ns 297%
1563 ---------------------------------------
1564 combining_tc_sync               110 ns    108 ns    106 ns 331%
1565 combining_tc_sync - dup         110 ns    109 ns    107 ns 327%
1566 ---------------------------------------
1567 combining_tc_async               88 ns     87 ns     85 ns 411%
1568 combining_tc_async - dup         90 ns     88 ns     85 ns 411%
1569 ---------------------------------------
1570
1571 ------------------------------------ Number of threads = 16
1572
1573 Test_name, Max time, Avg time, Min time, % base min / min
1574
1575 no_combining - base             363 ns    361 ns    360 ns
1576 no_combining - dup              362 ns    361 ns    358 ns 100%
1577 ---------------------------------------
1578 ---- dedicated-------------------------
1579 combining_notc_sync             177 ns    136 ns    111 ns 323%
1580 combining_notc_sync - dup       185 ns    148 ns    112 ns 320%
1581 ---------------------------------------
1582 combining_notc_async            191 ns    151 ns    122 ns 294%
1583 combining_notc_async - dup      179 ns    157 ns    118 ns 305%
1584 ---------------------------------------
1585 combining_tc_sync               154 ns    125 ns    100 ns 360%
1586 combining_tc_sync - dup         166 ns    130 ns     98 ns 367%
1587 ---------------------------------------
1588 combining_tc_async              143 ns    107 ns     86 ns 418%
1589 combining_tc_async - dup        132 ns    112 ns     88 ns 407%
1590 ---------------------------------------
1591 ---- no dedicated----------------------
1592 combining_notc_sync             121 ns    103 ns     98 ns 367%
1593 combining_notc_sync - dup       117 ns    104 ns     99 ns 362%
1594 ---------------------------------------
1595 combining_notc_async            116 ns    105 ns     99 ns 363%
1596 combining_notc_async - dup      112 ns    104 ns    100 ns 359%
1597 ---------------------------------------
1598 combining_tc_sync               111 ns    101 ns     94 ns 381%
1599 combining_tc_sync - dup         113 ns     98 ns     93 ns 387%
1600 ---------------------------------------
1601 combining_tc_async               97 ns     85 ns     74 ns 484%
1602 combining_tc_async - dup         98 ns     86 ns     78 ns 457%
1603 ---------------------------------------
1604
1605 ------------------------------------ Number of threads = 24
1606
1607 Test_name, Max time, Avg time, Min time, % base min / min
1608
1609 no_combining - base             352 ns    351 ns    349 ns
1610 no_combining - dup              352 ns    351 ns    348 ns 100%
1611 ---------------------------------------
1612 ---- dedicated-------------------------
1613 combining_notc_sync             214 ns    173 ns    149 ns 234%
1614 combining_notc_sync - dup       212 ns    166 ns    137 ns 254%
1615 ---------------------------------------
1616 combining_notc_async            232 ns    198 ns    161 ns 216%
1617 combining_notc_async - dup      225 ns    191 ns    149 ns 234%
1618 ---------------------------------------
1619 combining_tc_sync               192 ns    152 ns    129 ns 270%
1620 combining_tc_sync - dup         176 ns    156 ns    121 ns 286%
1621 ---------------------------------------
1622 combining_tc_async              202 ns    147 ns    118 ns 296%
1623 combining_tc_async - dup        200 ns    158 ns    120 ns 291%
1624 ---------------------------------------
1625 ---- no dedicated----------------------
1626 combining_notc_sync             161 ns    125 ns    115 ns 303%
1627 combining_notc_sync - dup       144 ns    127 ns    116 ns 299%
1628 ---------------------------------------
1629 combining_notc_async            135 ns    122 ns    116 ns 298%
1630 combining_notc_async - dup      341 ns    148 ns    117 ns 298%
1631 ---------------------------------------
1632 combining_tc_sync               130 ns    118 ns    109 ns 319%
1633 combining_tc_sync - dup         116 ns    110 ns    105 ns 332%
1634 ---------------------------------------
1635 combining_tc_async               97 ns     86 ns     79 ns 442%
1636 combining_tc_async - dup         95 ns     86 ns     79 ns 440%
1637 ---------------------------------------
1638
1639 ------------------------------------ Number of threads = 32
1640
1641 Test_name, Max time, Avg time, Min time, % base min / min
1642
1643 no_combining - base             337 ns    336 ns    333 ns
1644 no_combining - dup              338 ns    336 ns    333 ns  99%
1645 ---------------------------------------
1646 ---- dedicated-------------------------
1647 combining_notc_sync             193 ns    177 ns    162 ns 204%
1648 combining_notc_sync - dup       211 ns    181 ns    156 ns 213%
1649 ---------------------------------------
1650 combining_notc_async            245 ns    200 ns    162 ns 205%
1651 combining_notc_async - dup      216 ns    197 ns    149 ns 223%
1652 ---------------------------------------
1653 combining_tc_sync               195 ns    167 ns    121 ns 274%
1654 combining_tc_sync - dup         179 ns    164 ns    143 ns 231%
1655 ---------------------------------------
1656 combining_tc_async              187 ns    152 ns    108 ns 307%
1657 combining_tc_async - dup        182 ns    151 ns    125 ns 266%
1658 ---------------------------------------
1659 ---- no dedicated----------------------
1660 combining_notc_sync             189 ns    127 ns    114 ns 290%
1661 combining_notc_sync - dup       126 ns    118 ns    110 ns 302%
1662 ---------------------------------------
1663 combining_notc_async            233 ns    129 ns    112 ns 297%
1664 combining_notc_async - dup      170 ns    126 ns    113 ns 293%
1665 ---------------------------------------
1666 combining_tc_sync               948 ns    212 ns    107 ns 309%
1667 combining_tc_sync - dup         137 ns    112 ns    104 ns 318%
1668 ---------------------------------------
1669 combining_tc_async               90 ns     86 ns     79 ns 421%
1670 combining_tc_async - dup         94 ns     87 ns     80 ns 414%
1671 ---------------------------------------
1672
1673 ------------------------------------ Number of threads = 48
1674
1675 Test_name, Max time, Avg time, Min time, % base min / min
1676
1677 no_combining - base             340 ns    336 ns    334 ns
1678 no_combining - dup              336 ns    335 ns    334 ns 100%
1679 ---------------------------------------
1680 ---- dedicated-------------------------
1681 combining_notc_sync             214 ns    176 ns    137 ns 243%
1682 combining_notc_sync - dup       210 ns    173 ns    128 ns 260%
1683 ---------------------------------------
1684 combining_notc_async            217 ns    186 ns    162 ns 205%
1685 combining_notc_async - dup      215 ns    186 ns    149 ns 224%
1686 ---------------------------------------
1687 combining_tc_sync               206 ns    171 ns    145 ns 230%
1688 combining_tc_sync - dup         179 ns    149 ns    126 ns 265%
1689 ---------------------------------------
1690 combining_tc_async              175 ns    138 ns    108 ns 309%
1691 combining_tc_async - dup        169 ns    134 ns    110 ns 301%
1692 ---------------------------------------
1693 ---- no dedicated----------------------
1694 combining_notc_sync            1798 ns    293 ns    118 ns 282%
1695 combining_notc_sync - dup       171 ns    122 ns    105 ns 318%
1696 ---------------------------------------
1697 combining_notc_async            227 ns    132 ns    110 ns 302%
1698 combining_notc_async - dup      226 ns    137 ns    111 ns 301%
1699 ---------------------------------------
1700 combining_tc_sync               111 ns    106 ns    102 ns 327%
1701 combining_tc_sync - dup         127 ns    110 ns    104 ns 321%
1702 ---------------------------------------
1703 combining_tc_async              297 ns    117 ns     77 ns 433%
1704 combining_tc_async - dup        742 ns    149 ns     77 ns 432%
1705 ---------------------------------------
1706
1707 ------------------------------------ Number of threads = 64
1708
1709 Test_name, Max time, Avg time, Min time, % base min / min
1710
1711 no_combining - base             338 ns    333 ns    331 ns
1712 no_combining - dup              335 ns    333 ns    331 ns  99%
1713 ---------------------------------------
1714 ---- dedicated-------------------------
1715 combining_notc_sync             198 ns    163 ns    148 ns 223%
1716 combining_notc_sync - dup       172 ns    154 ns    124 ns 266%
1717 ---------------------------------------
1718 combining_notc_async            211 ns    177 ns    158 ns 209%
1719 combining_notc_async - dup      182 ns    166 ns    152 ns 216%
1720 ---------------------------------------
1721 combining_tc_sync               195 ns    133 ns    112 ns 294%
1722 combining_tc_sync - dup         158 ns    135 ns    108 ns 305%
1723 ---------------------------------------
1724 combining_tc_async              145 ns    119 ns     95 ns 347%
1725 combining_tc_async - dup        159 ns    130 ns     95 ns 346%
1726 ---------------------------------------
1727 ---- no dedicated----------------------
1728 combining_notc_sync             188 ns    123 ns    107 ns 308%
1729 combining_notc_sync - dup       546 ns    159 ns    107 ns 307%
1730 ---------------------------------------
1731 combining_notc_async            558 ns    160 ns    108 ns 304%
1732 combining_notc_async - dup      192 ns    127 ns    107 ns 308%
1733 ---------------------------------------
1734 combining_tc_sync               325 ns    130 ns    101 ns 325%
1735 combining_tc_sync - dup        1766 ns    273 ns    101 ns 325%
1736 ---------------------------------------
1737 combining_tc_async              417 ns    118 ns     74 ns 446%
1738 combining_tc_async - dup        838 ns    212 ns     72 ns 455%
1739 ---------------------------------------
1740 [       OK ] FlatCombining.direct_measurement (178622 ms)
1741 [----------] 2 tests from FlatCombining (633891 ms total)
1742
1743 [----------] Global test environment tear-down
1744 [==========] 2 tests from 1 test case ran. (633891 ms total)
1745 [  PASSED  ] 2 tests.
1746
1747 ---
1748
1749 $ lscpu
1750
1751 Architecture:          x86_64
1752 CPU op-mode(s):        32-bit, 64-bit
1753 Byte Order:            Little Endian
1754 CPU(s):                32
1755 On-line CPU(s) list:   0-31
1756 Thread(s) per core:    2
1757 Core(s) per socket:    8
1758 Socket(s):             2
1759 NUMA node(s):          2
1760 Vendor ID:             GenuineIntel
1761 CPU family:            6
1762 Model:                 45
1763 Model name:            Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
1764 Stepping:              6
1765 CPU MHz:               2200.000
1766 CPU max MHz:           2200.0000
1767 CPU min MHz:           1200.0000
1768 BogoMIPS:              4399.87
1769 Virtualization:        VT-x
1770 L1d cache:             32K
1771 L1i cache:             32K
1772 L2 cache:              256K
1773 L3 cache:              20480K
1774 NUMA node0 CPU(s):     0-7,16-23
1775 NUMA node1 CPU(s):     8-15,24-31
1776
1777 Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep
1778 mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
1779 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
1780 rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq
1781 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca
1782 sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx lahf_lm
1783 epb tpr_shadow vnmi flexpriority ept vpid xsaveopt dtherm arat pln pts
1784
1785 ---
1786
1787  */