2 * Copyright 2017-present Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/Benchmark.h>
18 #include <folly/experimental/flat_combining/test/FlatCombiningTestHelpers.h>
20 #include <folly/portability/GTest.h>
21 #include <glog/logging.h>
23 using namespace folly::test;
25 // use option --benchmark to run folly::Benchmark
26 // use option --direct to run direct benchmark measurements
27 DEFINE_bool(direct, false, "run direct measurement");
28 DEFINE_int32(reps, 10, "number of reps");
29 DEFINE_int32(ops, 100000, "number of operations per rep");
30 DEFINE_int32(lines, 5, "number of cache lines accessed per operation");
31 DEFINE_int32(numRecs, 8, "number of records");
32 DEFINE_int32(work, 1000, "amount of unrelated work per operation");
34 static std::vector<int> nthr = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64};
38 static bool dedicated;
42 // baseline - no combining
43 BENCHMARK(no_combining_base, iters) {
61 BENCHMARK_RELATIVE(no_combining_dup, iters) {
80 BENCHMARK_RELATIVE(combining_dedicated_notc_sync, iters) {
98 BENCHMARK_RELATIVE(combining_dedicated_notc_sync_dup, iters) {
112 BENCHMARK_DRAW_LINE()
114 BENCHMARK_RELATIVE(combining_dedicated_notc_async, iters) {
129 BENCHMARK_RELATIVE(combining_dedicated_notc_async_dup, iters) {
143 BENCHMARK_DRAW_LINE()
145 BENCHMARK_RELATIVE(combining_dedicated_tc_sync, iters) {
161 BENCHMARK_RELATIVE(combining_dedicated_tc_sync_dup, iters) {
175 BENCHMARK_DRAW_LINE()
177 BENCHMARK_RELATIVE(combining_dedicated_tc_async, iters) {
193 BENCHMARK_RELATIVE(combining_dedicated_tc_async_dup, iters) {
207 BENCHMARK_DRAW_LINE()
209 // no dedicated combiner
211 BENCHMARK_DRAW_LINE()
213 BENCHMARK_RELATIVE(combining_no_dedicated_notc_sync, iters) {
230 BENCHMARK_RELATIVE(combining_no_dedicated_notc_sync_dup, iters) {
244 BENCHMARK_DRAW_LINE()
246 BENCHMARK_RELATIVE(combining_no_dedicated_notc_async, iters) {
261 BENCHMARK_RELATIVE(combining_no_dedicated_notc_async_dup, iters) {
275 BENCHMARK_DRAW_LINE()
277 BENCHMARK_RELATIVE(combining_no_dedicated_tc_sync, iters) {
293 BENCHMARK_RELATIVE(combining_no_dedicated_tc_sync_dup, iters) {
307 BENCHMARK_DRAW_LINE()
309 BENCHMARK_RELATIVE(combining_no_dedicated_tc_async, iters) {
325 BENCHMARK_RELATIVE(combining_no_dedicated_tc_async_dup, iters) {
339 BENCHMARK_DRAW_LINE()
341 void benchmarkSetup() {
342 int numCores = std::thread::hardware_concurrency();
343 std::cout << "\nRunning benchmarks on machine with " << numCores
344 << " logical cores" << std::endl;
347 TEST(FlatCombining, folly_benchmark) {
348 if (FLAGS_benchmark) {
350 for (bool b : {true, false}) {
352 std::string str = simple ? "simple" : "custom";
353 std::cout << "\n------------------------------------ " << str
354 << " interface" << std::endl;
356 std::cout << "\n---------------------------------- Number of threads = "
359 folly::runBenchmarks();
365 // Direct measurement - not using folly::Benchmark
367 static uint64_t test(
374 uint64_t min = UINTMAX_MAX;
378 for (int i = 0; i < FLAGS_reps; ++i) {
379 uint64_t dur = run_test(
391 min = std::min(min, dur);
392 max = std::max(max, dur);
394 uint64_t avg = sum / FLAGS_reps;
398 std::cout << " " << std::setw(4) << max / FLAGS_ops << " ns";
399 std::cout << " " << std::setw(4) << avg / FLAGS_ops << " ns";
400 std::cout << " " << std::setw(4) << res / FLAGS_ops << " ns";
402 std::cout << " " << std::setw(3) << 100 * base / res << "%";
404 std::cout << std::endl;
408 TEST(FlatCombining, direct_measurement) {
414 std::string str = simple ? "simple" : "custom";
415 std::cout << "\n------------------------------------ " << str << " interface"
419 std::cout << "\n------------------------------------ Number of threads = "
422 std::cout << "Test_name, Max time, Avg time, Min time, % base min / min\n"
426 test("no_combining - base ", false, false, false, false, 0);
427 test("no_combining - dup ", false, false, false, false, base);
428 std::cout << "---------------------------------------" << std::endl;
430 std::cout << "---- dedicated-------------------------" << std::endl;
431 test("combining_notc_sync ", true, true, false, true, base);
432 test("combining_notc_sync - dup ", true, true, false, true, base);
433 std::cout << "---------------------------------------" << std::endl;
434 test("combining_notc_async ", true, true, false, false, base);
435 test("combining_notc_async - dup ", true, true, false, false, base);
436 std::cout << "---------------------------------------" << std::endl;
437 test("combining_tc_sync ", true, true, true, true, base);
438 test("combining_tc_sync - dup ", true, true, true, true, base);
439 std::cout << "---------------------------------------" << std::endl;
440 test("combining_tc_async ", true, true, true, false, base);
441 test("combining_tc_async - dup ", true, true, true, false, base);
442 std::cout << "---------------------------------------" << std::endl;
444 std::cout << "---- no dedicated----------------------" << std::endl;
445 test("combining_notc_sync ", true, false, false, true, base);
446 test("combining_notc_sync - dup ", true, false, false, true, base);
447 std::cout << "---------------------------------------" << std::endl;
448 test("combining_notc_async ", true, false, false, false, base);
449 test("combining_notc_async - dup ", true, false, false, false, base);
450 std::cout << "---------------------------------------" << std::endl;
451 test("combining_tc_sync ", true, false, true, true, base);
452 test("combining_tc_sync - dup ", true, false, true, true, base);
453 std::cout << "---------------------------------------" << std::endl;
454 test("combining_tc_async ", true, false, true, false, base);
455 test("combining_tc_async - dup ", true, false, true, false, base);
456 std::cout << "---------------------------------------" << std::endl;
461 See benchmark results in https://phabricator.intern.facebook.com/P57204895
463 The results are from a run using the command
464 $ numactl -N 1 flat_combining_benchmark --benchmark --bm_min_iters=100000 --direct
466 Using the default parameters of the benchmark: In each iteration, the
467 operation on the shared data structure updates 5 cache lines and
468 performs unrelated work (~300ns) after each operation. The benchmark
469 doesn't do any smart combining (i.e., saving or dropping some work
470 based on understanding the details of the combined operations).
472 Direct measurements are used to evaluate the high variance in some cases.
473 Duplicate runs are included in order to assess the relevance of outliers.
476 [==========] Running 2 tests from 1 test case.
477 [----------] Global test environment set-up.
478 [----------] 2 tests from FlatCombining
479 [ RUN ] FlatCombining.folly_benchmark
481 Running benchmarks on machine with 32 logical cores
483 ------------------------------------ simple interface
485 ---------------------------------- Number of threads = 1
486 ============================================================================
487 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
488 ============================================================================
489 no_combining_base 330.43ns 3.03M
490 no_combining_dup 100.09% 330.13ns 3.03M
491 ----------------------------------------------------------------------------
492 ----------------------------------------------------------------------------
493 combining_dedicated_notc_sync 93.17% 354.66ns 2.82M
494 combining_dedicated_notc_sync_dup 93.57% 353.15ns 2.83M
495 ----------------------------------------------------------------------------
496 combining_dedicated_notc_async 99.35% 332.60ns 3.01M
497 combining_dedicated_notc_async_dup 99.07% 333.54ns 3.00M
498 ----------------------------------------------------------------------------
499 combining_dedicated_tc_sync 93.05% 355.13ns 2.82M
500 combining_dedicated_tc_sync_dup 92.87% 355.81ns 2.81M
501 ----------------------------------------------------------------------------
502 combining_dedicated_tc_async 99.17% 333.21ns 3.00M
503 combining_dedicated_tc_async_dup 99.28% 332.84ns 3.00M
504 ----------------------------------------------------------------------------
505 ----------------------------------------------------------------------------
506 combining_no_dedicated_notc_sync 93.51% 353.38ns 2.83M
507 combining_no_dedicated_notc_sync_dup 93.27% 354.26ns 2.82M
508 ----------------------------------------------------------------------------
509 combining_no_dedicated_notc_async 99.40% 332.44ns 3.01M
510 combining_no_dedicated_notc_async_dup 99.13% 333.34ns 3.00M
511 ----------------------------------------------------------------------------
512 combining_no_dedicated_tc_sync 93.38% 353.86ns 2.83M
513 combining_no_dedicated_tc_sync_dup 93.52% 353.31ns 2.83M
514 ----------------------------------------------------------------------------
515 combining_no_dedicated_tc_async 99.29% 332.78ns 3.00M
516 combining_no_dedicated_tc_async_dup 99.19% 333.11ns 3.00M
517 ----------------------------------------------------------------------------
518 ============================================================================
520 ---------------------------------- Number of threads = 2
521 ============================================================================
522 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
523 ============================================================================
524 no_combining_base 213.60ns 4.68M
525 no_combining_dup 100.84% 211.82ns 4.72M
526 ----------------------------------------------------------------------------
527 ----------------------------------------------------------------------------
528 combining_dedicated_notc_sync 89.84% 237.76ns 4.21M
529 combining_dedicated_notc_sync_dup 89.85% 237.73ns 4.21M
530 ----------------------------------------------------------------------------
531 combining_dedicated_notc_async 93.80% 227.72ns 4.39M
532 combining_dedicated_notc_async_dup 87.85% 243.15ns 4.11M
533 ----------------------------------------------------------------------------
534 combining_dedicated_tc_sync 86.81% 246.06ns 4.06M
535 combining_dedicated_tc_sync_dup 87.15% 245.09ns 4.08M
536 ----------------------------------------------------------------------------
537 combining_dedicated_tc_async 92.14% 231.82ns 4.31M
538 combining_dedicated_tc_async_dup 92.04% 232.08ns 4.31M
539 ----------------------------------------------------------------------------
540 ----------------------------------------------------------------------------
541 combining_no_dedicated_notc_sync 95.20% 224.36ns 4.46M
542 combining_no_dedicated_notc_sync_dup 95.40% 223.91ns 4.47M
543 ----------------------------------------------------------------------------
544 combining_no_dedicated_notc_async 95.41% 223.89ns 4.47M
545 combining_no_dedicated_notc_async_dup 95.86% 222.82ns 4.49M
546 ----------------------------------------------------------------------------
547 combining_no_dedicated_tc_sync 94.43% 226.21ns 4.42M
548 combining_no_dedicated_tc_sync_dup 94.28% 226.56ns 4.41M
549 ----------------------------------------------------------------------------
550 combining_no_dedicated_tc_async 96.62% 221.07ns 4.52M
551 combining_no_dedicated_tc_async_dup 97.24% 219.66ns 4.55M
552 ----------------------------------------------------------------------------
553 ============================================================================
555 ---------------------------------- Number of threads = 3
556 ============================================================================
557 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
558 ============================================================================
559 no_combining_base 188.20ns 5.31M
560 no_combining_dup 94.07% 200.07ns 5.00M
561 ----------------------------------------------------------------------------
562 ----------------------------------------------------------------------------
563 combining_dedicated_notc_sync 95.39% 197.30ns 5.07M
564 combining_dedicated_notc_sync_dup 94.50% 199.16ns 5.02M
565 ----------------------------------------------------------------------------
566 combining_dedicated_notc_async 75.29% 249.96ns 4.00M
567 combining_dedicated_notc_async_dup 72.97% 257.91ns 3.88M
568 ----------------------------------------------------------------------------
569 combining_dedicated_tc_sync 91.26% 206.22ns 4.85M
570 combining_dedicated_tc_sync_dup 90.68% 207.54ns 4.82M
571 ----------------------------------------------------------------------------
572 combining_dedicated_tc_async 89.64% 209.95ns 4.76M
573 combining_dedicated_tc_async_dup 88.21% 213.36ns 4.69M
574 ----------------------------------------------------------------------------
575 ----------------------------------------------------------------------------
576 combining_no_dedicated_notc_sync 96.19% 195.66ns 5.11M
577 combining_no_dedicated_notc_sync_dup 93.27% 201.78ns 4.96M
578 ----------------------------------------------------------------------------
579 combining_no_dedicated_notc_async 81.12% 231.99ns 4.31M
580 combining_no_dedicated_notc_async_dup 82.48% 228.19ns 4.38M
581 ----------------------------------------------------------------------------
582 combining_no_dedicated_tc_sync 79.48% 236.78ns 4.22M
583 combining_no_dedicated_tc_sync_dup 79.73% 236.04ns 4.24M
584 ----------------------------------------------------------------------------
585 combining_no_dedicated_tc_async 100.70% 186.90ns 5.35M
586 combining_no_dedicated_tc_async_dup 99.43% 189.27ns 5.28M
587 ----------------------------------------------------------------------------
588 ============================================================================
590 ---------------------------------- Number of threads = 4
591 ============================================================================
592 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
593 ============================================================================
594 no_combining_base 242.84ns 4.12M
595 no_combining_dup 100.78% 240.96ns 4.15M
596 ----------------------------------------------------------------------------
597 ----------------------------------------------------------------------------
598 combining_dedicated_notc_sync 100.91% 240.65ns 4.16M
599 combining_dedicated_notc_sync_dup 99.76% 243.42ns 4.11M
600 ----------------------------------------------------------------------------
601 combining_dedicated_notc_async 102.06% 237.95ns 4.20M
602 combining_dedicated_notc_async_dup 101.63% 238.94ns 4.19M
603 ----------------------------------------------------------------------------
604 combining_dedicated_tc_sync 109.79% 221.18ns 4.52M
605 combining_dedicated_tc_sync_dup 108.94% 222.92ns 4.49M
606 ----------------------------------------------------------------------------
607 combining_dedicated_tc_async 133.01% 182.58ns 5.48M
608 combining_dedicated_tc_async_dup 134.91% 180.00ns 5.56M
609 ----------------------------------------------------------------------------
610 ----------------------------------------------------------------------------
611 combining_no_dedicated_notc_sync 108.77% 223.25ns 4.48M
612 combining_no_dedicated_notc_sync_dup 107.64% 225.61ns 4.43M
613 ----------------------------------------------------------------------------
614 combining_no_dedicated_notc_async 115.14% 210.91ns 4.74M
615 combining_no_dedicated_notc_async_dup 115.06% 211.05ns 4.74M
616 ----------------------------------------------------------------------------
617 combining_no_dedicated_tc_sync 116.36% 208.70ns 4.79M
618 combining_no_dedicated_tc_sync_dup 115.70% 209.89ns 4.76M
619 ----------------------------------------------------------------------------
620 combining_no_dedicated_tc_async 159.69% 152.07ns 6.58M
621 combining_no_dedicated_tc_async_dup 158.27% 153.43ns 6.52M
622 ----------------------------------------------------------------------------
623 ============================================================================
625 ---------------------------------- Number of threads = 6
626 ============================================================================
627 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
628 ============================================================================
629 no_combining_base 281.36ns 3.55M
630 no_combining_dup 98.56% 285.46ns 3.50M
631 ----------------------------------------------------------------------------
632 ----------------------------------------------------------------------------
633 combining_dedicated_notc_sync 132.39% 212.51ns 4.71M
634 combining_dedicated_notc_sync_dup 133.10% 211.38ns 4.73M
635 ----------------------------------------------------------------------------
636 combining_dedicated_notc_async 141.35% 199.05ns 5.02M
637 combining_dedicated_notc_async_dup 143.18% 196.51ns 5.09M
638 ----------------------------------------------------------------------------
639 combining_dedicated_tc_sync 138.94% 202.50ns 4.94M
640 combining_dedicated_tc_sync_dup 138.64% 202.93ns 4.93M
641 ----------------------------------------------------------------------------
642 combining_dedicated_tc_async 199.76% 140.85ns 7.10M
643 combining_dedicated_tc_async_dup 200.28% 140.48ns 7.12M
644 ----------------------------------------------------------------------------
645 ----------------------------------------------------------------------------
646 combining_no_dedicated_notc_sync 155.48% 180.96ns 5.53M
647 combining_no_dedicated_notc_sync_dup 150.82% 186.55ns 5.36M
648 ----------------------------------------------------------------------------
649 combining_no_dedicated_notc_async 162.23% 173.43ns 5.77M
650 combining_no_dedicated_notc_async_dup 161.33% 174.39ns 5.73M
651 ----------------------------------------------------------------------------
652 combining_no_dedicated_tc_sync 167.90% 167.57ns 5.97M
653 combining_no_dedicated_tc_sync_dup 164.84% 170.69ns 5.86M
654 ----------------------------------------------------------------------------
655 combining_no_dedicated_tc_async 242.51% 116.02ns 8.62M
656 combining_no_dedicated_tc_async_dup 245.67% 114.53ns 8.73M
657 ----------------------------------------------------------------------------
658 ============================================================================
660 ---------------------------------- Number of threads = 8
661 ============================================================================
662 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
663 ============================================================================
664 no_combining_base 315.57ns 3.17M
665 no_combining_dup 98.83% 319.32ns 3.13M
666 ----------------------------------------------------------------------------
667 ----------------------------------------------------------------------------
668 combining_dedicated_notc_sync 170.48% 185.11ns 5.40M
669 combining_dedicated_notc_sync_dup 174.57% 180.77ns 5.53M
670 ----------------------------------------------------------------------------
671 combining_dedicated_notc_async 178.57% 176.72ns 5.66M
672 combining_dedicated_notc_async_dup 181.30% 174.06ns 5.75M
673 ----------------------------------------------------------------------------
674 combining_dedicated_tc_sync 195.40% 161.50ns 6.19M
675 combining_dedicated_tc_sync_dup 197.18% 160.05ns 6.25M
676 ----------------------------------------------------------------------------
677 combining_dedicated_tc_async 322.03% 97.99ns 10.20M
678 combining_dedicated_tc_async_dup 324.51% 97.24ns 10.28M
679 ----------------------------------------------------------------------------
680 ----------------------------------------------------------------------------
681 combining_no_dedicated_notc_sync 205.61% 153.48ns 6.52M
682 combining_no_dedicated_notc_sync_dup 204.94% 153.98ns 6.49M
683 ----------------------------------------------------------------------------
684 combining_no_dedicated_notc_async 217.81% 144.88ns 6.90M
685 combining_no_dedicated_notc_async_dup 218.58% 144.37ns 6.93M
686 ----------------------------------------------------------------------------
687 combining_no_dedicated_tc_sync 223.96% 140.91ns 7.10M
688 combining_no_dedicated_tc_sync_dup 224.55% 140.53ns 7.12M
689 ----------------------------------------------------------------------------
690 combining_no_dedicated_tc_async 364.58% 86.56ns 11.55M
691 combining_no_dedicated_tc_async_dup 363.33% 86.86ns 11.51M
692 ----------------------------------------------------------------------------
693 ============================================================================
695 ---------------------------------- Number of threads = 12
696 ============================================================================
697 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
698 ============================================================================
699 no_combining_base 353.59ns 2.83M
700 no_combining_dup 99.91% 353.91ns 2.83M
701 ----------------------------------------------------------------------------
702 ----------------------------------------------------------------------------
703 combining_dedicated_notc_sync 276.36% 127.95ns 7.82M
704 combining_dedicated_notc_sync_dup 278.88% 126.79ns 7.89M
705 ----------------------------------------------------------------------------
706 combining_dedicated_notc_async 249.52% 141.71ns 7.06M
707 combining_dedicated_notc_async_dup 247.26% 143.00ns 6.99M
708 ----------------------------------------------------------------------------
709 combining_dedicated_tc_sync 318.57% 110.99ns 9.01M
710 combining_dedicated_tc_sync_dup 326.27% 108.37ns 9.23M
711 ----------------------------------------------------------------------------
712 combining_dedicated_tc_async 428.50% 82.52ns 12.12M
713 combining_dedicated_tc_async_dup 429.19% 82.39ns 12.14M
714 ----------------------------------------------------------------------------
715 ----------------------------------------------------------------------------
716 combining_no_dedicated_notc_sync 276.54% 127.86ns 7.82M
717 combining_no_dedicated_notc_sync_dup 275.59% 128.31ns 7.79M
718 ----------------------------------------------------------------------------
719 combining_no_dedicated_notc_async 298.92% 118.29ns 8.45M
720 combining_no_dedicated_notc_async_dup 298.93% 118.28ns 8.45M
721 ----------------------------------------------------------------------------
722 combining_no_dedicated_tc_sync 300.56% 117.64ns 8.50M
723 combining_no_dedicated_tc_sync_dup 296.95% 119.07ns 8.40M
724 ----------------------------------------------------------------------------
725 combining_no_dedicated_tc_async 431.06% 82.03ns 12.19M
726 combining_no_dedicated_tc_async_dup 430.40% 82.15ns 12.17M
727 ----------------------------------------------------------------------------
728 ============================================================================
730 ---------------------------------- Number of threads = 16
731 ============================================================================
732 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
733 ============================================================================
734 no_combining_base 358.57ns 2.79M
735 no_combining_dup 99.97% 358.70ns 2.79M
736 ----------------------------------------------------------------------------
737 ----------------------------------------------------------------------------
738 combining_dedicated_notc_sync 319.73% 112.15ns 8.92M
739 combining_dedicated_notc_sync_dup 327.86% 109.37ns 9.14M
740 ----------------------------------------------------------------------------
741 combining_dedicated_notc_async 296.17% 121.07ns 8.26M
742 combining_dedicated_notc_async_dup 306.86% 116.85ns 8.56M
743 ----------------------------------------------------------------------------
744 combining_dedicated_tc_sync 337.53% 106.24ns 9.41M
745 combining_dedicated_tc_sync_dup 347.98% 103.04ns 9.70M
746 ----------------------------------------------------------------------------
747 combining_dedicated_tc_async 423.80% 84.61ns 11.82M
748 combining_dedicated_tc_async_dup 421.07% 85.16ns 11.74M
749 ----------------------------------------------------------------------------
750 ----------------------------------------------------------------------------
751 combining_no_dedicated_notc_sync 321.94% 111.38ns 8.98M
752 combining_no_dedicated_notc_sync_dup 318.54% 112.57ns 8.88M
753 ----------------------------------------------------------------------------
754 combining_no_dedicated_notc_async 364.71% 98.32ns 10.17M
755 combining_no_dedicated_notc_async_dup 364.22% 98.45ns 10.16M
756 ----------------------------------------------------------------------------
757 combining_no_dedicated_tc_sync 322.91% 111.04ns 9.01M
758 combining_no_dedicated_tc_sync_dup 322.42% 111.21ns 8.99M
759 ----------------------------------------------------------------------------
760 combining_no_dedicated_tc_async 466.30% 76.90ns 13.00M
761 combining_no_dedicated_tc_async_dup 462.76% 77.49ns 12.91M
762 ----------------------------------------------------------------------------
763 ============================================================================
765 ---------------------------------- Number of threads = 24
766 ============================================================================
767 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
768 ============================================================================
769 no_combining_base 348.54ns 2.87M
770 no_combining_dup 99.96% 348.69ns 2.87M
771 ----------------------------------------------------------------------------
772 ----------------------------------------------------------------------------
773 combining_dedicated_notc_sync 260.21% 133.95ns 7.47M
774 combining_dedicated_notc_sync_dup 257.84% 135.18ns 7.40M
775 ----------------------------------------------------------------------------
776 combining_dedicated_notc_async 242.25% 143.88ns 6.95M
777 combining_dedicated_notc_async_dup 235.88% 147.76ns 6.77M
778 ----------------------------------------------------------------------------
779 combining_dedicated_tc_sync 262.45% 132.80ns 7.53M
780 combining_dedicated_tc_sync_dup 251.14% 138.78ns 7.21M
781 ----------------------------------------------------------------------------
782 combining_dedicated_tc_async 256.89% 135.68ns 7.37M
783 combining_dedicated_tc_async_dup 304.76% 114.37ns 8.74M
784 ----------------------------------------------------------------------------
785 ----------------------------------------------------------------------------
786 combining_no_dedicated_notc_sync 270.20% 129.00ns 7.75M
787 combining_no_dedicated_notc_sync_dup 271.69% 128.29ns 7.80M
788 ----------------------------------------------------------------------------
789 combining_no_dedicated_notc_async 298.35% 116.82ns 8.56M
790 combining_no_dedicated_notc_async_dup 289.04% 120.59ns 8.29M
791 ----------------------------------------------------------------------------
792 combining_no_dedicated_tc_sync 286.59% 121.62ns 8.22M
793 combining_no_dedicated_tc_sync_dup 292.21% 119.28ns 8.38M
794 ----------------------------------------------------------------------------
795 combining_no_dedicated_tc_async 471.86% 73.87ns 13.54M
796 combining_no_dedicated_tc_async_dup 458.16% 76.08ns 13.14M
797 ----------------------------------------------------------------------------
798 ============================================================================
800 ---------------------------------- Number of threads = 32
801 ============================================================================
802 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
803 ============================================================================
804 no_combining_base 337.61ns 2.96M
805 no_combining_dup 99.41% 339.60ns 2.94M
806 ----------------------------------------------------------------------------
807 ----------------------------------------------------------------------------
808 combining_dedicated_notc_sync 204.50% 165.09ns 6.06M
809 combining_dedicated_notc_sync_dup 233.28% 144.72ns 6.91M
810 ----------------------------------------------------------------------------
811 combining_dedicated_notc_async 187.20% 180.35ns 5.54M
812 combining_dedicated_notc_async_dup 192.76% 175.15ns 5.71M
813 ----------------------------------------------------------------------------
814 combining_dedicated_tc_sync 220.56% 153.07ns 6.53M
815 combining_dedicated_tc_sync_dup 207.62% 162.61ns 6.15M
816 ----------------------------------------------------------------------------
817 combining_dedicated_tc_async 317.11% 106.46ns 9.39M
818 combining_dedicated_tc_async_dup 318.92% 105.86ns 9.45M
819 ----------------------------------------------------------------------------
820 ----------------------------------------------------------------------------
821 combining_no_dedicated_notc_sync 259.29% 130.21ns 7.68M
822 combining_no_dedicated_notc_sync_dup 248.33% 135.95ns 7.36M
823 ----------------------------------------------------------------------------
824 combining_no_dedicated_notc_async 290.40% 116.26ns 8.60M
825 combining_no_dedicated_notc_async_dup 299.92% 112.57ns 8.88M
826 ----------------------------------------------------------------------------
827 combining_no_dedicated_tc_sync 281.91% 119.76ns 8.35M
828 combining_no_dedicated_tc_sync_dup 284.19% 118.80ns 8.42M
829 ----------------------------------------------------------------------------
830 combining_no_dedicated_tc_async 435.16% 77.58ns 12.89M
831 combining_no_dedicated_tc_async_dup 389.67% 86.64ns 11.54M
832 ----------------------------------------------------------------------------
833 ============================================================================
835 ---------------------------------- Number of threads = 48
836 ============================================================================
837 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
838 ============================================================================
839 no_combining_base 334.48ns 2.99M
840 no_combining_dup 100.00% 334.46ns 2.99M
841 ----------------------------------------------------------------------------
842 ----------------------------------------------------------------------------
843 combining_dedicated_notc_sync 257.01% 130.14ns 7.68M
844 combining_dedicated_notc_sync_dup 254.13% 131.62ns 7.60M
845 ----------------------------------------------------------------------------
846 combining_dedicated_notc_async 189.56% 176.45ns 5.67M
847 combining_dedicated_notc_async_dup 247.68% 135.05ns 7.40M
848 ----------------------------------------------------------------------------
849 combining_dedicated_tc_sync 259.47% 128.91ns 7.76M
850 combining_dedicated_tc_sync_dup 281.34% 118.89ns 8.41M
851 ----------------------------------------------------------------------------
852 combining_dedicated_tc_async 301.96% 110.77ns 9.03M
853 combining_dedicated_tc_async_dup 347.65% 96.21ns 10.39M
854 ----------------------------------------------------------------------------
855 ----------------------------------------------------------------------------
856 combining_no_dedicated_notc_sync 268.45% 124.60ns 8.03M
857 combining_no_dedicated_notc_sync_dup 272.54% 122.73ns 8.15M
858 ----------------------------------------------------------------------------
859 combining_no_dedicated_notc_async 306.04% 109.29ns 9.15M
860 combining_no_dedicated_notc_async_dup 294.38% 113.62ns 8.80M
861 ----------------------------------------------------------------------------
862 combining_no_dedicated_tc_sync 280.89% 119.08ns 8.40M
863 combining_no_dedicated_tc_sync_dup 276.01% 121.18ns 8.25M
864 ----------------------------------------------------------------------------
865 combining_no_dedicated_tc_async 466.45% 71.71ns 13.95M
866 combining_no_dedicated_tc_async_dup 465.45% 71.86ns 13.92M
867 ----------------------------------------------------------------------------
868 ============================================================================
870 ---------------------------------- Number of threads = 64
871 ============================================================================
872 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
873 ============================================================================
874 no_combining_base 335.68ns 2.98M
875 no_combining_dup 101.03% 332.25ns 3.01M
876 ----------------------------------------------------------------------------
877 ----------------------------------------------------------------------------
878 combining_dedicated_notc_sync 272.91% 123.00ns 8.13M
879 combining_dedicated_notc_sync_dup 270.56% 124.07ns 8.06M
880 ----------------------------------------------------------------------------
881 combining_dedicated_notc_async 200.44% 167.47ns 5.97M
882 combining_dedicated_notc_async_dup 208.36% 161.10ns 6.21M
883 ----------------------------------------------------------------------------
884 combining_dedicated_tc_sync 258.40% 129.91ns 7.70M
885 combining_dedicated_tc_sync_dup 249.16% 134.72ns 7.42M
886 ----------------------------------------------------------------------------
887 combining_dedicated_tc_async 378.86% 88.60ns 11.29M
888 combining_dedicated_tc_async_dup 299.32% 112.15ns 8.92M
889 ----------------------------------------------------------------------------
890 ----------------------------------------------------------------------------
891 combining_no_dedicated_notc_sync 272.18% 123.33ns 8.11M
892 combining_no_dedicated_notc_sync_dup 275.26% 121.95ns 8.20M
893 ----------------------------------------------------------------------------
894 combining_no_dedicated_notc_async 296.23% 113.32ns 8.82M
895 combining_no_dedicated_notc_async_dup 311.17% 107.88ns 9.27M
896 ----------------------------------------------------------------------------
897 combining_no_dedicated_tc_sync 283.30% 118.49ns 8.44M
898 combining_no_dedicated_tc_sync_dup 263.86% 127.22ns 7.86M
899 ----------------------------------------------------------------------------
900 combining_no_dedicated_tc_async 426.62% 78.68ns 12.71M
901 combining_no_dedicated_tc_async_dup 445.17% 75.40ns 13.26M
902 ----------------------------------------------------------------------------
903 ============================================================================
905 ------------------------------------ custom interface
907 ---------------------------------- Number of threads = 1
908 ============================================================================
909 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
910 ============================================================================
911 no_combining_base 329.49ns 3.03M
912 no_combining_dup 99.91% 329.79ns 3.03M
913 ----------------------------------------------------------------------------
914 ----------------------------------------------------------------------------
915 combining_dedicated_notc_sync 98.69% 333.88ns 3.00M
916 combining_dedicated_notc_sync_dup 98.70% 333.83ns 3.00M
917 ----------------------------------------------------------------------------
918 combining_dedicated_notc_async 98.22% 335.47ns 2.98M
919 combining_dedicated_notc_async_dup 98.16% 335.66ns 2.98M
920 ----------------------------------------------------------------------------
921 combining_dedicated_tc_sync 98.70% 333.85ns 3.00M
922 combining_dedicated_tc_sync_dup 98.78% 333.58ns 3.00M
923 ----------------------------------------------------------------------------
924 combining_dedicated_tc_async 98.14% 335.73ns 2.98M
925 combining_dedicated_tc_async_dup 97.92% 336.49ns 2.97M
926 ----------------------------------------------------------------------------
927 ----------------------------------------------------------------------------
928 combining_no_dedicated_notc_sync 98.94% 333.00ns 3.00M
929 combining_no_dedicated_notc_sync_dup 98.86% 333.29ns 3.00M
930 ----------------------------------------------------------------------------
931 combining_no_dedicated_notc_async 98.36% 334.99ns 2.99M
932 combining_no_dedicated_notc_async_dup 98.61% 334.15ns 2.99M
933 ----------------------------------------------------------------------------
934 combining_no_dedicated_tc_sync 99.07% 332.58ns 3.01M
935 combining_no_dedicated_tc_sync_dup 99.12% 332.41ns 3.01M
936 ----------------------------------------------------------------------------
937 combining_no_dedicated_tc_async 97.08% 339.38ns 2.95M
938 combining_no_dedicated_tc_async_dup 97.54% 337.81ns 2.96M
939 ----------------------------------------------------------------------------
940 ============================================================================
942 ---------------------------------- Number of threads = 2
943 ============================================================================
944 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
945 ============================================================================
946 no_combining_base 216.71ns 4.61M
947 no_combining_dup 100.34% 215.97ns 4.63M
948 ----------------------------------------------------------------------------
949 ----------------------------------------------------------------------------
950 combining_dedicated_notc_sync 95.42% 227.11ns 4.40M
951 combining_dedicated_notc_sync_dup 94.16% 230.15ns 4.34M
952 ----------------------------------------------------------------------------
953 combining_dedicated_notc_async 91.84% 235.97ns 4.24M
954 combining_dedicated_notc_async_dup 91.41% 237.08ns 4.22M
955 ----------------------------------------------------------------------------
956 combining_dedicated_tc_sync 96.79% 223.90ns 4.47M
957 combining_dedicated_tc_sync_dup 96.54% 224.47ns 4.45M
958 ----------------------------------------------------------------------------
959 combining_dedicated_tc_async 90.90% 238.41ns 4.19M
960 combining_dedicated_tc_async_dup 95.45% 227.03ns 4.40M
961 ----------------------------------------------------------------------------
962 ----------------------------------------------------------------------------
963 combining_no_dedicated_notc_sync 101.13% 214.28ns 4.67M
964 combining_no_dedicated_notc_sync_dup 100.11% 216.48ns 4.62M
965 ----------------------------------------------------------------------------
966 combining_no_dedicated_notc_async 96.40% 224.80ns 4.45M
967 combining_no_dedicated_notc_async_dup 96.36% 224.90ns 4.45M
968 ----------------------------------------------------------------------------
969 combining_no_dedicated_tc_sync 100.86% 214.85ns 4.65M
970 combining_no_dedicated_tc_sync_dup 101.91% 212.65ns 4.70M
971 ----------------------------------------------------------------------------
972 combining_no_dedicated_tc_async 95.66% 226.54ns 4.41M
973 combining_no_dedicated_tc_async_dup 95.88% 226.03ns 4.42M
974 ----------------------------------------------------------------------------
975 ============================================================================
977 ---------------------------------- Number of threads = 3
978 ============================================================================
979 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
980 ============================================================================
981 no_combining_base 189.61ns 5.27M
982 no_combining_dup 100.22% 189.20ns 5.29M
983 ----------------------------------------------------------------------------
984 ----------------------------------------------------------------------------
985 combining_dedicated_notc_sync 103.18% 183.76ns 5.44M
986 combining_dedicated_notc_sync_dup 103.66% 182.92ns 5.47M
987 ----------------------------------------------------------------------------
988 combining_dedicated_notc_async 77.14% 245.81ns 4.07M
989 combining_dedicated_notc_async_dup 90.25% 210.10ns 4.76M
990 ----------------------------------------------------------------------------
991 combining_dedicated_tc_sync 89.88% 210.95ns 4.74M
992 combining_dedicated_tc_sync_dup 87.83% 215.90ns 4.63M
993 ----------------------------------------------------------------------------
994 combining_dedicated_tc_async 89.33% 212.26ns 4.71M
995 combining_dedicated_tc_async_dup 85.19% 222.56ns 4.49M
996 ----------------------------------------------------------------------------
997 ----------------------------------------------------------------------------
998 combining_no_dedicated_notc_sync 98.43% 192.64ns 5.19M
999 combining_no_dedicated_notc_sync_dup 101.15% 187.46ns 5.33M
1000 ----------------------------------------------------------------------------
1001 combining_no_dedicated_notc_async 83.77% 226.36ns 4.42M
1002 combining_no_dedicated_notc_async_dup 84.69% 223.89ns 4.47M
1003 ----------------------------------------------------------------------------
1004 combining_no_dedicated_tc_sync 85.47% 221.85ns 4.51M
1005 combining_no_dedicated_tc_sync_dup 86.32% 219.65ns 4.55M
1006 ----------------------------------------------------------------------------
1007 combining_no_dedicated_tc_async 105.62% 179.52ns 5.57M
1008 combining_no_dedicated_tc_async_dup 105.26% 180.14ns 5.55M
1009 ----------------------------------------------------------------------------
1010 ============================================================================
1012 ---------------------------------- Number of threads = 4
1013 ============================================================================
1014 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1015 ============================================================================
1016 no_combining_base 237.50ns 4.21M
1017 no_combining_dup 99.80% 237.97ns 4.20M
1018 ----------------------------------------------------------------------------
1019 ----------------------------------------------------------------------------
1020 combining_dedicated_notc_sync 112.56% 210.99ns 4.74M
1021 combining_dedicated_notc_sync_dup 104.08% 228.20ns 4.38M
1022 ----------------------------------------------------------------------------
1023 combining_dedicated_notc_async 101.44% 234.12ns 4.27M
1024 combining_dedicated_notc_async_dup 100.73% 235.77ns 4.24M
1025 ----------------------------------------------------------------------------
1026 combining_dedicated_tc_sync 111.70% 212.62ns 4.70M
1027 combining_dedicated_tc_sync_dup 113.00% 210.18ns 4.76M
1028 ----------------------------------------------------------------------------
1029 combining_dedicated_tc_async 131.11% 181.15ns 5.52M
1030 combining_dedicated_tc_async_dup 132.65% 179.04ns 5.59M
1031 ----------------------------------------------------------------------------
1032 ----------------------------------------------------------------------------
1033 combining_no_dedicated_notc_sync 115.76% 205.17ns 4.87M
1034 combining_no_dedicated_notc_sync_dup 114.70% 207.06ns 4.83M
1035 ----------------------------------------------------------------------------
1036 combining_no_dedicated_notc_async 111.63% 212.76ns 4.70M
1037 combining_no_dedicated_notc_async_dup 111.91% 212.22ns 4.71M
1038 ----------------------------------------------------------------------------
1039 combining_no_dedicated_tc_sync 120.07% 197.80ns 5.06M
1040 combining_no_dedicated_tc_sync_dup 118.25% 200.85ns 4.98M
1041 ----------------------------------------------------------------------------
1042 combining_no_dedicated_tc_async 153.73% 154.49ns 6.47M
1043 combining_no_dedicated_tc_async_dup 153.08% 155.15ns 6.45M
1044 ----------------------------------------------------------------------------
1045 ============================================================================
1047 ---------------------------------- Number of threads = 6
1048 ============================================================================
1049 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1050 ============================================================================
1051 no_combining_base 281.56ns 3.55M
1052 no_combining_dup 99.97% 281.65ns 3.55M
1053 ----------------------------------------------------------------------------
1054 ----------------------------------------------------------------------------
1055 combining_dedicated_notc_sync 144.76% 194.50ns 5.14M
1056 combining_dedicated_notc_sync_dup 149.96% 187.76ns 5.33M
1057 ----------------------------------------------------------------------------
1058 combining_dedicated_notc_async 147.72% 190.61ns 5.25M
1059 combining_dedicated_notc_async_dup 140.86% 199.89ns 5.00M
1060 ----------------------------------------------------------------------------
1061 combining_dedicated_tc_sync 154.17% 182.63ns 5.48M
1062 combining_dedicated_tc_sync_dup 156.60% 179.80ns 5.56M
1063 ----------------------------------------------------------------------------
1064 combining_dedicated_tc_async 202.42% 139.10ns 7.19M
1065 combining_dedicated_tc_async_dup 203.44% 138.40ns 7.23M
1066 ----------------------------------------------------------------------------
1067 ----------------------------------------------------------------------------
1068 combining_no_dedicated_notc_sync 168.33% 167.27ns 5.98M
1069 combining_no_dedicated_notc_sync_dup 166.02% 169.59ns 5.90M
1070 ----------------------------------------------------------------------------
1071 combining_no_dedicated_notc_async 166.44% 169.16ns 5.91M
1072 combining_no_dedicated_notc_async_dup 160.14% 175.82ns 5.69M
1073 ----------------------------------------------------------------------------
1074 combining_no_dedicated_tc_sync 181.79% 154.88ns 6.46M
1075 combining_no_dedicated_tc_sync_dup 180.25% 156.20ns 6.40M
1076 ----------------------------------------------------------------------------
1077 combining_no_dedicated_tc_async 240.56% 117.04ns 8.54M
1078 combining_no_dedicated_tc_async_dup 240.74% 116.96ns 8.55M
1079 ----------------------------------------------------------------------------
1080 ============================================================================
1082 ---------------------------------- Number of threads = 8
1083 ============================================================================
1084 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1085 ============================================================================
1086 no_combining_base 312.99ns 3.19M
1087 no_combining_dup 98.93% 316.37ns 3.16M
1088 ----------------------------------------------------------------------------
1089 ----------------------------------------------------------------------------
1090 combining_dedicated_notc_sync 182.71% 171.30ns 5.84M
1091 combining_dedicated_notc_sync_dup 183.23% 170.82ns 5.85M
1092 ----------------------------------------------------------------------------
1093 combining_dedicated_notc_async 183.16% 170.88ns 5.85M
1094 combining_dedicated_notc_async_dup 181.29% 172.64ns 5.79M
1095 ----------------------------------------------------------------------------
1096 combining_dedicated_tc_sync 191.49% 163.45ns 6.12M
1097 combining_dedicated_tc_sync_dup 191.04% 163.84ns 6.10M
1098 ----------------------------------------------------------------------------
1099 combining_dedicated_tc_async 302.89% 103.34ns 9.68M
1100 combining_dedicated_tc_async_dup 304.07% 102.94ns 9.71M
1101 ----------------------------------------------------------------------------
1102 ----------------------------------------------------------------------------
1103 combining_no_dedicated_notc_sync 220.41% 142.00ns 7.04M
1104 combining_no_dedicated_notc_sync_dup 219.90% 142.34ns 7.03M
1105 ----------------------------------------------------------------------------
1106 combining_no_dedicated_notc_async 218.66% 143.14ns 6.99M
1107 combining_no_dedicated_notc_async_dup 218.74% 143.09ns 6.99M
1108 ----------------------------------------------------------------------------
1109 combining_no_dedicated_tc_sync 241.82% 129.43ns 7.73M
1110 combining_no_dedicated_tc_sync_dup 241.72% 129.48ns 7.72M
1111 ----------------------------------------------------------------------------
1112 combining_no_dedicated_tc_async 352.39% 88.82ns 11.26M
1113 combining_no_dedicated_tc_async_dup 350.17% 89.38ns 11.19M
1114 ----------------------------------------------------------------------------
1115 ============================================================================
1117 ---------------------------------- Number of threads = 12
1118 ============================================================================
1119 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1120 ============================================================================
1121 no_combining_base 350.05ns 2.86M
1122 no_combining_dup 99.06% 353.37ns 2.83M
1123 ----------------------------------------------------------------------------
1124 ----------------------------------------------------------------------------
1125 combining_dedicated_notc_sync 266.87% 131.17ns 7.62M
1126 combining_dedicated_notc_sync_dup 245.79% 142.42ns 7.02M
1127 ----------------------------------------------------------------------------
1128 combining_dedicated_notc_async 238.57% 146.73ns 6.82M
1129 combining_dedicated_notc_async_dup 240.02% 145.84ns 6.86M
1130 ----------------------------------------------------------------------------
1131 combining_dedicated_tc_sync 316.70% 110.53ns 9.05M
1132 combining_dedicated_tc_sync_dup 321.05% 109.03ns 9.17M
1133 ----------------------------------------------------------------------------
1134 combining_dedicated_tc_async 403.10% 86.84ns 11.52M
1135 combining_dedicated_tc_async_dup 409.94% 85.39ns 11.71M
1136 ----------------------------------------------------------------------------
1137 ----------------------------------------------------------------------------
1138 combining_no_dedicated_notc_sync 300.23% 116.59ns 8.58M
1139 combining_no_dedicated_notc_sync_dup 299.07% 117.04ns 8.54M
1140 ----------------------------------------------------------------------------
1141 combining_no_dedicated_notc_async 297.79% 117.55ns 8.51M
1142 combining_no_dedicated_notc_async_dup 296.66% 118.00ns 8.47M
1143 ----------------------------------------------------------------------------
1144 combining_no_dedicated_tc_sync 328.07% 106.70ns 9.37M
1145 combining_no_dedicated_tc_sync_dup 331.52% 105.59ns 9.47M
1146 ----------------------------------------------------------------------------
1147 combining_no_dedicated_tc_async 424.57% 82.45ns 12.13M
1148 combining_no_dedicated_tc_async_dup 409.47% 85.49ns 11.70M
1149 ----------------------------------------------------------------------------
1150 ============================================================================
1152 ---------------------------------- Number of threads = 16
1153 ============================================================================
1154 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1155 ============================================================================
1156 no_combining_base 360.47ns 2.77M
1157 no_combining_dup 100.11% 360.07ns 2.78M
1158 ----------------------------------------------------------------------------
1159 ----------------------------------------------------------------------------
1160 combining_dedicated_notc_sync 320.54% 112.46ns 8.89M
1161 combining_dedicated_notc_sync_dup 313.31% 115.05ns 8.69M
1162 ----------------------------------------------------------------------------
1163 combining_dedicated_notc_async 296.83% 121.44ns 8.23M
1164 combining_dedicated_notc_async_dup 289.91% 124.34ns 8.04M
1165 ----------------------------------------------------------------------------
1166 combining_dedicated_tc_sync 364.27% 98.96ns 10.11M
1167 combining_dedicated_tc_sync_dup 361.10% 99.82ns 10.02M
1168 ----------------------------------------------------------------------------
1169 combining_dedicated_tc_async 424.43% 84.93ns 11.77M
1170 combining_dedicated_tc_async_dup 418.07% 86.22ns 11.60M
1171 ----------------------------------------------------------------------------
1172 ----------------------------------------------------------------------------
1173 combining_no_dedicated_notc_sync 373.13% 96.60ns 10.35M
1174 combining_no_dedicated_notc_sync_dup 364.35% 98.93ns 10.11M
1175 ----------------------------------------------------------------------------
1176 combining_no_dedicated_notc_async 361.40% 99.74ns 10.03M
1177 combining_no_dedicated_notc_async_dup 366.49% 98.36ns 10.17M
1178 ----------------------------------------------------------------------------
1179 combining_no_dedicated_tc_sync 382.22% 94.31ns 10.60M
1180 combining_no_dedicated_tc_sync_dup 380.64% 94.70ns 10.56M
1181 ----------------------------------------------------------------------------
1182 combining_no_dedicated_tc_async 461.14% 78.17ns 12.79M
1183 combining_no_dedicated_tc_async_dup 481.50% 74.86ns 13.36M
1184 ----------------------------------------------------------------------------
1185 ============================================================================
1187 ---------------------------------- Number of threads = 24
1188 ============================================================================
1189 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1190 ============================================================================
1191 no_combining_base 348.97ns 2.87M
1192 no_combining_dup 100.12% 348.54ns 2.87M
1193 ----------------------------------------------------------------------------
1194 ----------------------------------------------------------------------------
1195 combining_dedicated_notc_sync 234.17% 149.02ns 6.71M
1196 combining_dedicated_notc_sync_dup 205.54% 169.78ns 5.89M
1197 ----------------------------------------------------------------------------
1198 combining_dedicated_notc_async 248.28% 140.55ns 7.11M
1199 combining_dedicated_notc_async_dup 239.71% 145.58ns 6.87M
1200 ----------------------------------------------------------------------------
1201 combining_dedicated_tc_sync 272.87% 127.89ns 7.82M
1202 combining_dedicated_tc_sync_dup 235.76% 148.02ns 6.76M
1203 ----------------------------------------------------------------------------
1204 combining_dedicated_tc_async 295.71% 118.01ns 8.47M
1205 combining_dedicated_tc_async_dup 265.87% 131.25ns 7.62M
1206 ----------------------------------------------------------------------------
1207 ----------------------------------------------------------------------------
1208 combining_no_dedicated_notc_sync 298.96% 116.73ns 8.57M
1209 combining_no_dedicated_notc_sync_dup 297.67% 117.23ns 8.53M
1210 ----------------------------------------------------------------------------
1211 combining_no_dedicated_notc_async 298.44% 116.93ns 8.55M
1212 combining_no_dedicated_notc_async_dup 292.80% 119.18ns 8.39M
1213 ----------------------------------------------------------------------------
1214 combining_no_dedicated_tc_sync 316.44% 110.28ns 9.07M
1215 combining_no_dedicated_tc_sync_dup 317.52% 109.90ns 9.10M
1216 ----------------------------------------------------------------------------
1217 combining_no_dedicated_tc_async 432.64% 80.66ns 12.40M
1218 combining_no_dedicated_tc_async_dup 441.55% 79.03ns 12.65M
1219 ----------------------------------------------------------------------------
1220 ============================================================================
1222 ---------------------------------- Number of threads = 32
1223 ============================================================================
1224 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1225 ============================================================================
1226 no_combining_base 338.90ns 2.95M
1227 no_combining_dup 100.01% 338.87ns 2.95M
1228 ----------------------------------------------------------------------------
1229 ----------------------------------------------------------------------------
1230 combining_dedicated_notc_sync 204.34% 165.85ns 6.03M
1231 combining_dedicated_notc_sync_dup 202.84% 167.07ns 5.99M
1232 ----------------------------------------------------------------------------
1233 combining_dedicated_notc_async 192.27% 176.26ns 5.67M
1234 combining_dedicated_notc_async_dup 188.61% 179.68ns 5.57M
1235 ----------------------------------------------------------------------------
1236 combining_dedicated_tc_sync 247.57% 136.89ns 7.31M
1237 combining_dedicated_tc_sync_dup 285.53% 118.69ns 8.43M
1238 ----------------------------------------------------------------------------
1239 combining_dedicated_tc_async 277.97% 121.92ns 8.20M
1240 combining_dedicated_tc_async_dup 231.11% 146.64ns 6.82M
1241 ----------------------------------------------------------------------------
1242 ----------------------------------------------------------------------------
1243 combining_no_dedicated_notc_sync 299.20% 113.27ns 8.83M
1244 combining_no_dedicated_notc_sync_dup 289.53% 117.05ns 8.54M
1245 ----------------------------------------------------------------------------
1246 combining_no_dedicated_notc_async 282.29% 120.05ns 8.33M
1247 combining_no_dedicated_notc_async_dup 305.09% 111.08ns 9.00M
1248 ----------------------------------------------------------------------------
1249 combining_no_dedicated_tc_sync 312.52% 108.44ns 9.22M
1250 combining_no_dedicated_tc_sync_dup 324.88% 104.31ns 9.59M
1251 ----------------------------------------------------------------------------
1252 combining_no_dedicated_tc_async 420.99% 80.50ns 12.42M
1253 combining_no_dedicated_tc_async_dup 406.58% 83.35ns 12.00M
1254 ----------------------------------------------------------------------------
1255 ============================================================================
1257 ---------------------------------- Number of threads = 48
1258 ============================================================================
1259 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1260 ============================================================================
1261 no_combining_base 334.84ns 2.99M
1262 no_combining_dup 99.57% 336.29ns 2.97M
1263 ----------------------------------------------------------------------------
1264 ----------------------------------------------------------------------------
1265 combining_dedicated_notc_sync 212.82% 157.34ns 6.36M
1266 combining_dedicated_notc_sync_dup 198.39% 168.78ns 5.93M
1267 ----------------------------------------------------------------------------
1268 combining_dedicated_notc_async 166.74% 200.82ns 4.98M
1269 combining_dedicated_notc_async_dup 197.07% 169.91ns 5.89M
1270 ----------------------------------------------------------------------------
1271 combining_dedicated_tc_sync 246.35% 135.92ns 7.36M
1272 combining_dedicated_tc_sync_dup 209.52% 159.81ns 6.26M
1273 ----------------------------------------------------------------------------
1274 combining_dedicated_tc_async 293.94% 113.91ns 8.78M
1275 combining_dedicated_tc_async_dup 280.74% 119.27ns 8.38M
1276 ----------------------------------------------------------------------------
1277 ----------------------------------------------------------------------------
1278 combining_no_dedicated_notc_sync 301.60% 111.02ns 9.01M
1279 combining_no_dedicated_notc_sync_dup 296.10% 113.09ns 8.84M
1280 ----------------------------------------------------------------------------
1281 combining_no_dedicated_notc_async 308.91% 108.40ns 9.23M
1282 combining_no_dedicated_notc_async_dup 298.48% 112.18ns 8.91M
1283 ----------------------------------------------------------------------------
1284 combining_no_dedicated_tc_sync 331.11% 101.13ns 9.89M
1285 combining_no_dedicated_tc_sync_dup 329.37% 101.66ns 9.84M
1286 ----------------------------------------------------------------------------
1287 combining_no_dedicated_tc_async 451.58% 74.15ns 13.49M
1288 combining_no_dedicated_tc_async_dup 431.37% 77.62ns 12.88M
1289 ----------------------------------------------------------------------------
1290 ============================================================================
1292 ---------------------------------- Number of threads = 64
1293 ============================================================================
1294 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1295 ============================================================================
1296 no_combining_base 336.22ns 2.97M
1297 no_combining_dup 100.69% 333.92ns 2.99M
1298 ----------------------------------------------------------------------------
1299 ----------------------------------------------------------------------------
1300 combining_dedicated_notc_sync 230.57% 145.82ns 6.86M
1301 combining_dedicated_notc_sync_dup 221.08% 152.08ns 6.58M
1302 ----------------------------------------------------------------------------
1303 combining_dedicated_notc_async 232.38% 144.69ns 6.91M
1304 combining_dedicated_notc_async_dup 192.77% 174.41ns 5.73M
1305 ----------------------------------------------------------------------------
1306 combining_dedicated_tc_sync 284.07% 118.36ns 8.45M
1307 combining_dedicated_tc_sync_dup 298.03% 112.81ns 8.86M
1308 ----------------------------------------------------------------------------
1309 combining_dedicated_tc_async 361.07% 93.12ns 10.74M
1310 combining_dedicated_tc_async_dup 324.11% 103.74ns 9.64M
1311 ----------------------------------------------------------------------------
1312 ----------------------------------------------------------------------------
1313 combining_no_dedicated_notc_sync 284.58% 118.15ns 8.46M
1314 combining_no_dedicated_notc_sync_dup 301.73% 111.43ns 8.97M
1315 ----------------------------------------------------------------------------
1316 combining_no_dedicated_notc_async 294.87% 114.02ns 8.77M
1317 combining_no_dedicated_notc_async_dup 287.51% 116.94ns 8.55M
1318 ----------------------------------------------------------------------------
1319 combining_no_dedicated_tc_sync 317.96% 105.74ns 9.46M
1320 combining_no_dedicated_tc_sync_dup 332.45% 101.13ns 9.89M
1321 ----------------------------------------------------------------------------
1322 combining_no_dedicated_tc_async 441.96% 76.07ns 13.15M
1323 combining_no_dedicated_tc_async_dup 393.82% 85.37ns 11.71M
1324 ----------------------------------------------------------------------------
1325 ============================================================================
1326 [ OK ] FlatCombining.folly_benchmark (455269 ms)
1327 [ RUN ] FlatCombining.direct_measurement
1329 Running benchmarks on machine with 32 logical cores
1331 ------------------------------------ custom interface
1333 ------------------------------------ Number of threads = 1
1335 Test_name, Max time, Avg time, Min time, % base min / min
1337 no_combining - base 334 ns 331 ns 329 ns
1338 no_combining - dup 335 ns 332 ns 331 ns 99%
1339 ---------------------------------------
1340 ---- dedicated-------------------------
1341 combining_notc_sync 340 ns 335 ns 332 ns 99%
1342 combining_notc_sync - dup 337 ns 335 ns 333 ns 98%
1343 ---------------------------------------
1344 combining_notc_async 360 ns 343 ns 338 ns 97%
1345 combining_notc_async - dup 339 ns 337 ns 336 ns 98%
1346 ---------------------------------------
1347 combining_tc_sync 337 ns 335 ns 333 ns 98%
1348 combining_tc_sync - dup 346 ns 336 ns 332 ns 99%
1349 ---------------------------------------
1350 combining_tc_async 338 ns 336 ns 335 ns 98%
1351 combining_tc_async - dup 338 ns 336 ns 335 ns 98%
1352 ---------------------------------------
1353 ---- no dedicated----------------------
1354 combining_notc_sync 338 ns 335 ns 333 ns 98%
1355 combining_notc_sync - dup 337 ns 334 ns 333 ns 98%
1356 ---------------------------------------
1357 combining_notc_async 339 ns 336 ns 335 ns 98%
1358 combining_notc_async - dup 347 ns 340 ns 336 ns 98%
1359 ---------------------------------------
1360 combining_tc_sync 337 ns 335 ns 333 ns 98%
1361 combining_tc_sync - dup 436 ns 386 ns 333 ns 98%
1362 ---------------------------------------
1363 combining_tc_async 340 ns 337 ns 335 ns 98%
1364 combining_tc_async - dup 338 ns 336 ns 335 ns 98%
1365 ---------------------------------------
1367 ------------------------------------ Number of threads = 2
1369 Test_name, Max time, Avg time, Min time, % base min / min
1371 no_combining - base 315 ns 226 ns 211 ns
1372 no_combining - dup 217 ns 216 ns 213 ns 98%
1373 ---------------------------------------
1374 ---- dedicated-------------------------
1375 combining_notc_sync 251 ns 237 ns 229 ns 92%
1376 combining_notc_sync - dup 250 ns 241 ns 226 ns 93%
1377 ---------------------------------------
1378 combining_notc_async 278 ns 268 ns 252 ns 83%
1379 combining_notc_async - dup 297 ns 263 ns 245 ns 86%
1380 ---------------------------------------
1381 combining_tc_sync 254 ns 246 ns 234 ns 90%
1382 combining_tc_sync - dup 335 ns 252 ns 230 ns 91%
1383 ---------------------------------------
1384 combining_tc_async 305 ns 282 ns 245 ns 86%
1385 combining_tc_async - dup 284 ns 256 ns 239 ns 88%
1386 ---------------------------------------
1387 ---- no dedicated----------------------
1388 combining_notc_sync 230 ns 222 ns 217 ns 97%
1389 combining_notc_sync - dup 231 ns 225 ns 218 ns 96%
1390 ---------------------------------------
1391 combining_notc_async 244 ns 238 ns 233 ns 90%
1392 combining_notc_async - dup 241 ns 236 ns 231 ns 91%
1393 ---------------------------------------
1394 combining_tc_sync 283 ns 239 ns 221 ns 95%
1395 combining_tc_sync - dup 299 ns 247 ns 225 ns 93%
1396 ---------------------------------------
1397 combining_tc_async 290 ns 270 ns 244 ns 86%
1398 combining_tc_async - dup 290 ns 251 ns 238 ns 88%
1399 ---------------------------------------
1401 ------------------------------------ Number of threads = 3
1403 Test_name, Max time, Avg time, Min time, % base min / min
1405 no_combining - base 211 ns 197 ns 190 ns
1406 no_combining - dup 209 ns 201 ns 195 ns 97%
1407 ---------------------------------------
1408 ---- dedicated-------------------------
1409 combining_notc_sync 258 ns 197 ns 168 ns 112%
1410 combining_notc_sync - dup 274 ns 200 ns 162 ns 117%
1411 ---------------------------------------
1412 combining_notc_async 307 ns 281 ns 260 ns 73%
1413 combining_notc_async - dup 284 ns 258 ns 216 ns 88%
1414 ---------------------------------------
1415 combining_tc_sync 228 ns 215 ns 192 ns 98%
1416 combining_tc_sync - dup 216 ns 203 ns 178 ns 107%
1417 ---------------------------------------
1418 combining_tc_async 246 ns 233 ns 220 ns 86%
1419 combining_tc_async - dup 236 ns 221 ns 208 ns 91%
1420 ---------------------------------------
1421 ---- no dedicated----------------------
1422 combining_notc_sync 204 ns 198 ns 184 ns 103%
1423 combining_notc_sync - dup 203 ns 198 ns 193 ns 98%
1424 ---------------------------------------
1425 combining_notc_async 238 ns 225 ns 218 ns 87%
1426 combining_notc_async - dup 231 ns 227 ns 223 ns 85%
1427 ---------------------------------------
1428 combining_tc_sync 220 ns 216 ns 211 ns 90%
1429 combining_tc_sync - dup 227 ns 223 ns 219 ns 87%
1430 ---------------------------------------
1431 combining_tc_async 182 ns 181 ns 179 ns 106%
1432 combining_tc_async - dup 186 ns 181 ns 180 ns 105%
1433 ---------------------------------------
1435 ------------------------------------ Number of threads = 4
1437 Test_name, Max time, Avg time, Min time, % base min / min
1439 no_combining - base 258 ns 245 ns 238 ns
1440 no_combining - dup 262 ns 249 ns 245 ns 97%
1441 ---------------------------------------
1442 ---- dedicated-------------------------
1443 combining_notc_sync 264 ns 250 ns 220 ns 107%
1444 combining_notc_sync - dup 260 ns 254 ns 231 ns 102%
1445 ---------------------------------------
1446 combining_notc_async 266 ns 255 ns 233 ns 102%
1447 combining_notc_async - dup 268 ns 260 ns 252 ns 94%
1448 ---------------------------------------
1449 combining_tc_sync 250 ns 240 ns 215 ns 110%
1450 combining_tc_sync - dup 252 ns 242 ns 217 ns 109%
1451 ---------------------------------------
1452 combining_tc_async 199 ns 190 ns 183 ns 129%
1453 combining_tc_async - dup 199 ns 189 ns 178 ns 133%
1454 ---------------------------------------
1455 ---- no dedicated----------------------
1456 combining_notc_sync 223 ns 211 ns 203 ns 116%
1457 combining_notc_sync - dup 218 ns 211 ns 202 ns 117%
1458 ---------------------------------------
1459 combining_notc_async 222 ns 213 ns 207 ns 114%
1460 combining_notc_async - dup 236 ns 222 ns 215 ns 110%
1461 ---------------------------------------
1462 combining_tc_sync 202 ns 199 ns 197 ns 120%
1463 combining_tc_sync - dup 207 ns 199 ns 194 ns 122%
1464 ---------------------------------------
1465 combining_tc_async 162 ns 157 ns 152 ns 155%
1466 combining_tc_async - dup 188 ns 161 ns 154 ns 154%
1467 ---------------------------------------
1469 ------------------------------------ Number of threads = 6
1471 Test_name, Max time, Avg time, Min time, % base min / min
1473 no_combining - base 298 ns 292 ns 281 ns
1474 no_combining - dup 296 ns 289 ns 270 ns 104%
1475 ---------------------------------------
1476 ---- dedicated-------------------------
1477 combining_notc_sync 221 ns 211 ns 196 ns 143%
1478 combining_notc_sync - dup 247 ns 211 ns 192 ns 146%
1479 ---------------------------------------
1480 combining_notc_async 216 ns 205 ns 194 ns 144%
1481 combining_notc_async - dup 215 ns 206 ns 197 ns 142%
1482 ---------------------------------------
1483 combining_tc_sync 225 ns 204 ns 185 ns 151%
1484 combining_tc_sync - dup 229 ns 210 ns 186 ns 151%
1485 ---------------------------------------
1486 combining_tc_async 165 ns 152 ns 144 ns 194%
1487 combining_tc_async - dup 166 ns 150 ns 143 ns 195%
1488 ---------------------------------------
1489 ---- no dedicated----------------------
1490 combining_notc_sync 184 ns 182 ns 180 ns 155%
1491 combining_notc_sync - dup 176 ns 174 ns 172 ns 163%
1492 ---------------------------------------
1493 combining_notc_async 179 ns 177 ns 174 ns 161%
1494 combining_notc_async - dup 186 ns 181 ns 177 ns 158%
1495 ---------------------------------------
1496 combining_tc_sync 164 ns 163 ns 160 ns 174%
1497 combining_tc_sync - dup 171 ns 168 ns 161 ns 173%
1498 ---------------------------------------
1499 combining_tc_async 142 ns 139 ns 138 ns 202%
1500 combining_tc_async - dup 141 ns 136 ns 119 ns 235%
1501 ---------------------------------------
1503 ------------------------------------ Number of threads = 8
1505 Test_name, Max time, Avg time, Min time, % base min / min
1507 no_combining - base 333 ns 328 ns 315 ns
1508 no_combining - dup 336 ns 330 ns 327 ns 96%
1509 ---------------------------------------
1510 ---- dedicated-------------------------
1511 combining_notc_sync 203 ns 179 ns 172 ns 183%
1512 combining_notc_sync - dup 190 ns 177 ns 171 ns 183%
1513 ---------------------------------------
1514 combining_notc_async 204 ns 183 ns 170 ns 185%
1515 combining_notc_async - dup 201 ns 187 ns 176 ns 179%
1516 ---------------------------------------
1517 combining_tc_sync 177 ns 170 ns 165 ns 190%
1518 combining_tc_sync - dup 178 ns 167 ns 164 ns 192%
1519 ---------------------------------------
1520 combining_tc_async 134 ns 115 ns 105 ns 300%
1521 combining_tc_async - dup 132 ns 115 ns 103 ns 304%
1522 ---------------------------------------
1523 ---- no dedicated----------------------
1524 combining_notc_sync 154 ns 145 ns 143 ns 220%
1525 combining_notc_sync - dup 153 ns 144 ns 142 ns 222%
1526 ---------------------------------------
1527 combining_notc_async 145 ns 144 ns 143 ns 219%
1528 combining_notc_async - dup 157 ns 148 ns 144 ns 218%
1529 ---------------------------------------
1530 combining_tc_sync 142 ns 134 ns 130 ns 241%
1531 combining_tc_sync - dup 144 ns 136 ns 130 ns 241%
1532 ---------------------------------------
1533 combining_tc_async 118 ns 99 ns 91 ns 344%
1534 combining_tc_async - dup 118 ns 95 ns 91 ns 344%
1535 ---------------------------------------
1537 ------------------------------------ Number of threads = 12
1539 Test_name, Max time, Avg time, Min time, % base min / min
1541 no_combining - base 361 ns 357 ns 353 ns
1542 no_combining - dup 361 ns 357 ns 355 ns 99%
1543 ---------------------------------------
1544 ---- dedicated-------------------------
1545 combining_notc_sync 190 ns 157 ns 138 ns 255%
1546 combining_notc_sync - dup 162 ns 149 ns 138 ns 255%
1547 ---------------------------------------
1548 combining_notc_async 163 ns 153 ns 145 ns 242%
1549 combining_notc_async - dup 194 ns 158 ns 152 ns 231%
1550 ---------------------------------------
1551 combining_tc_sync 181 ns 128 ns 111 ns 316%
1552 combining_tc_sync - dup 183 ns 148 ns 121 ns 289%
1553 ---------------------------------------
1554 combining_tc_async 92 ns 89 ns 87 ns 402%
1555 combining_tc_async - dup 152 ns 105 ns 87 ns 405%
1556 ---------------------------------------
1557 ---- no dedicated----------------------
1558 combining_notc_sync 120 ns 119 ns 118 ns 298%
1559 combining_notc_sync - dup 120 ns 119 ns 118 ns 298%
1560 ---------------------------------------
1561 combining_notc_async 122 ns 120 ns 120 ns 294%
1562 combining_notc_async - dup 121 ns 120 ns 118 ns 297%
1563 ---------------------------------------
1564 combining_tc_sync 110 ns 108 ns 106 ns 331%
1565 combining_tc_sync - dup 110 ns 109 ns 107 ns 327%
1566 ---------------------------------------
1567 combining_tc_async 88 ns 87 ns 85 ns 411%
1568 combining_tc_async - dup 90 ns 88 ns 85 ns 411%
1569 ---------------------------------------
1571 ------------------------------------ Number of threads = 16
1573 Test_name, Max time, Avg time, Min time, % base min / min
1575 no_combining - base 363 ns 361 ns 360 ns
1576 no_combining - dup 362 ns 361 ns 358 ns 100%
1577 ---------------------------------------
1578 ---- dedicated-------------------------
1579 combining_notc_sync 177 ns 136 ns 111 ns 323%
1580 combining_notc_sync - dup 185 ns 148 ns 112 ns 320%
1581 ---------------------------------------
1582 combining_notc_async 191 ns 151 ns 122 ns 294%
1583 combining_notc_async - dup 179 ns 157 ns 118 ns 305%
1584 ---------------------------------------
1585 combining_tc_sync 154 ns 125 ns 100 ns 360%
1586 combining_tc_sync - dup 166 ns 130 ns 98 ns 367%
1587 ---------------------------------------
1588 combining_tc_async 143 ns 107 ns 86 ns 418%
1589 combining_tc_async - dup 132 ns 112 ns 88 ns 407%
1590 ---------------------------------------
1591 ---- no dedicated----------------------
1592 combining_notc_sync 121 ns 103 ns 98 ns 367%
1593 combining_notc_sync - dup 117 ns 104 ns 99 ns 362%
1594 ---------------------------------------
1595 combining_notc_async 116 ns 105 ns 99 ns 363%
1596 combining_notc_async - dup 112 ns 104 ns 100 ns 359%
1597 ---------------------------------------
1598 combining_tc_sync 111 ns 101 ns 94 ns 381%
1599 combining_tc_sync - dup 113 ns 98 ns 93 ns 387%
1600 ---------------------------------------
1601 combining_tc_async 97 ns 85 ns 74 ns 484%
1602 combining_tc_async - dup 98 ns 86 ns 78 ns 457%
1603 ---------------------------------------
1605 ------------------------------------ Number of threads = 24
1607 Test_name, Max time, Avg time, Min time, % base min / min
1609 no_combining - base 352 ns 351 ns 349 ns
1610 no_combining - dup 352 ns 351 ns 348 ns 100%
1611 ---------------------------------------
1612 ---- dedicated-------------------------
1613 combining_notc_sync 214 ns 173 ns 149 ns 234%
1614 combining_notc_sync - dup 212 ns 166 ns 137 ns 254%
1615 ---------------------------------------
1616 combining_notc_async 232 ns 198 ns 161 ns 216%
1617 combining_notc_async - dup 225 ns 191 ns 149 ns 234%
1618 ---------------------------------------
1619 combining_tc_sync 192 ns 152 ns 129 ns 270%
1620 combining_tc_sync - dup 176 ns 156 ns 121 ns 286%
1621 ---------------------------------------
1622 combining_tc_async 202 ns 147 ns 118 ns 296%
1623 combining_tc_async - dup 200 ns 158 ns 120 ns 291%
1624 ---------------------------------------
1625 ---- no dedicated----------------------
1626 combining_notc_sync 161 ns 125 ns 115 ns 303%
1627 combining_notc_sync - dup 144 ns 127 ns 116 ns 299%
1628 ---------------------------------------
1629 combining_notc_async 135 ns 122 ns 116 ns 298%
1630 combining_notc_async - dup 341 ns 148 ns 117 ns 298%
1631 ---------------------------------------
1632 combining_tc_sync 130 ns 118 ns 109 ns 319%
1633 combining_tc_sync - dup 116 ns 110 ns 105 ns 332%
1634 ---------------------------------------
1635 combining_tc_async 97 ns 86 ns 79 ns 442%
1636 combining_tc_async - dup 95 ns 86 ns 79 ns 440%
1637 ---------------------------------------
1639 ------------------------------------ Number of threads = 32
1641 Test_name, Max time, Avg time, Min time, % base min / min
1643 no_combining - base 337 ns 336 ns 333 ns
1644 no_combining - dup 338 ns 336 ns 333 ns 99%
1645 ---------------------------------------
1646 ---- dedicated-------------------------
1647 combining_notc_sync 193 ns 177 ns 162 ns 204%
1648 combining_notc_sync - dup 211 ns 181 ns 156 ns 213%
1649 ---------------------------------------
1650 combining_notc_async 245 ns 200 ns 162 ns 205%
1651 combining_notc_async - dup 216 ns 197 ns 149 ns 223%
1652 ---------------------------------------
1653 combining_tc_sync 195 ns 167 ns 121 ns 274%
1654 combining_tc_sync - dup 179 ns 164 ns 143 ns 231%
1655 ---------------------------------------
1656 combining_tc_async 187 ns 152 ns 108 ns 307%
1657 combining_tc_async - dup 182 ns 151 ns 125 ns 266%
1658 ---------------------------------------
1659 ---- no dedicated----------------------
1660 combining_notc_sync 189 ns 127 ns 114 ns 290%
1661 combining_notc_sync - dup 126 ns 118 ns 110 ns 302%
1662 ---------------------------------------
1663 combining_notc_async 233 ns 129 ns 112 ns 297%
1664 combining_notc_async - dup 170 ns 126 ns 113 ns 293%
1665 ---------------------------------------
1666 combining_tc_sync 948 ns 212 ns 107 ns 309%
1667 combining_tc_sync - dup 137 ns 112 ns 104 ns 318%
1668 ---------------------------------------
1669 combining_tc_async 90 ns 86 ns 79 ns 421%
1670 combining_tc_async - dup 94 ns 87 ns 80 ns 414%
1671 ---------------------------------------
1673 ------------------------------------ Number of threads = 48
1675 Test_name, Max time, Avg time, Min time, % base min / min
1677 no_combining - base 340 ns 336 ns 334 ns
1678 no_combining - dup 336 ns 335 ns 334 ns 100%
1679 ---------------------------------------
1680 ---- dedicated-------------------------
1681 combining_notc_sync 214 ns 176 ns 137 ns 243%
1682 combining_notc_sync - dup 210 ns 173 ns 128 ns 260%
1683 ---------------------------------------
1684 combining_notc_async 217 ns 186 ns 162 ns 205%
1685 combining_notc_async - dup 215 ns 186 ns 149 ns 224%
1686 ---------------------------------------
1687 combining_tc_sync 206 ns 171 ns 145 ns 230%
1688 combining_tc_sync - dup 179 ns 149 ns 126 ns 265%
1689 ---------------------------------------
1690 combining_tc_async 175 ns 138 ns 108 ns 309%
1691 combining_tc_async - dup 169 ns 134 ns 110 ns 301%
1692 ---------------------------------------
1693 ---- no dedicated----------------------
1694 combining_notc_sync 1798 ns 293 ns 118 ns 282%
1695 combining_notc_sync - dup 171 ns 122 ns 105 ns 318%
1696 ---------------------------------------
1697 combining_notc_async 227 ns 132 ns 110 ns 302%
1698 combining_notc_async - dup 226 ns 137 ns 111 ns 301%
1699 ---------------------------------------
1700 combining_tc_sync 111 ns 106 ns 102 ns 327%
1701 combining_tc_sync - dup 127 ns 110 ns 104 ns 321%
1702 ---------------------------------------
1703 combining_tc_async 297 ns 117 ns 77 ns 433%
1704 combining_tc_async - dup 742 ns 149 ns 77 ns 432%
1705 ---------------------------------------
1707 ------------------------------------ Number of threads = 64
1709 Test_name, Max time, Avg time, Min time, % base min / min
1711 no_combining - base 338 ns 333 ns 331 ns
1712 no_combining - dup 335 ns 333 ns 331 ns 99%
1713 ---------------------------------------
1714 ---- dedicated-------------------------
1715 combining_notc_sync 198 ns 163 ns 148 ns 223%
1716 combining_notc_sync - dup 172 ns 154 ns 124 ns 266%
1717 ---------------------------------------
1718 combining_notc_async 211 ns 177 ns 158 ns 209%
1719 combining_notc_async - dup 182 ns 166 ns 152 ns 216%
1720 ---------------------------------------
1721 combining_tc_sync 195 ns 133 ns 112 ns 294%
1722 combining_tc_sync - dup 158 ns 135 ns 108 ns 305%
1723 ---------------------------------------
1724 combining_tc_async 145 ns 119 ns 95 ns 347%
1725 combining_tc_async - dup 159 ns 130 ns 95 ns 346%
1726 ---------------------------------------
1727 ---- no dedicated----------------------
1728 combining_notc_sync 188 ns 123 ns 107 ns 308%
1729 combining_notc_sync - dup 546 ns 159 ns 107 ns 307%
1730 ---------------------------------------
1731 combining_notc_async 558 ns 160 ns 108 ns 304%
1732 combining_notc_async - dup 192 ns 127 ns 107 ns 308%
1733 ---------------------------------------
1734 combining_tc_sync 325 ns 130 ns 101 ns 325%
1735 combining_tc_sync - dup 1766 ns 273 ns 101 ns 325%
1736 ---------------------------------------
1737 combining_tc_async 417 ns 118 ns 74 ns 446%
1738 combining_tc_async - dup 838 ns 212 ns 72 ns 455%
1739 ---------------------------------------
1740 [ OK ] FlatCombining.direct_measurement (178622 ms)
1741 [----------] 2 tests from FlatCombining (633891 ms total)
1743 [----------] Global test environment tear-down
1744 [==========] 2 tests from 1 test case ran. (633891 ms total)
1751 Architecture: x86_64
1752 CPU op-mode(s): 32-bit, 64-bit
1753 Byte Order: Little Endian
1755 On-line CPU(s) list: 0-31
1756 Thread(s) per core: 2
1757 Core(s) per socket: 8
1760 Vendor ID: GenuineIntel
1763 Model name: Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
1766 CPU max MHz: 2200.0000
1767 CPU min MHz: 1200.0000
1769 Virtualization: VT-x
1774 NUMA node0 CPU(s): 0-7,16-23
1775 NUMA node1 CPU(s): 8-15,24-31
1777 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep
1778 mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
1779 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
1780 rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq
1781 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca
1782 sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx lahf_lm
1783 epb tpr_shadow vnmi flexpriority ept vpid xsaveopt dtherm arat pln pts