2 * Copyright 2017-present Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/concurrency/DynamicBoundedQueue.h>
18 #include <folly/MPMCQueue.h>
19 #include <folly/ProducerConsumerQueue.h>
20 #include <folly/portability/GTest.h>
22 #include <glog/logging.h>
27 DEFINE_bool(bench, false, "run benchmark");
28 DEFINE_int32(reps, 10, "number of reps");
29 DEFINE_int32(ops, 1000000, "number of operations per rep");
30 DEFINE_int64(capacity, 1000000, "capacity");
32 template <typename T, bool MayBlock, typename WeightFn>
33 using DSPSC = folly::DSPSCQueue<T, MayBlock, 8, 7, WeightFn>;
35 template <typename T, bool MayBlock, typename WeightFn>
36 using DMPSC = folly::DMPSCQueue<T, MayBlock, 8, 7, WeightFn>;
38 template <typename T, bool MayBlock, typename WeightFn>
39 using DSPMC = folly::DSPMCQueue<T, MayBlock, 8, 7, WeightFn>;
41 template <typename T, bool MayBlock, typename WeightFn>
42 using DMPMC = folly::DMPMCQueue<T, MayBlock, 8, 7, WeightFn>;
44 template <template <typename, bool, typename> class Q, bool MayBlock>
46 auto dur = std::chrono::microseconds(100);
47 auto deadline = std::chrono::steady_clock::now() + dur;
49 struct CustomWeightFn {
50 uint64_t operator()(int val) {
55 Q<int, MayBlock, CustomWeightFn> q(10000);
57 ASSERT_TRUE(q.empty());
58 ASSERT_EQ(q.size(), 0);
60 ASSERT_FALSE(q.try_dequeue(v));
63 ASSERT_TRUE(q.try_enqueue(2));
64 ASSERT_TRUE(q.try_enqueue_until(3, deadline));
65 ASSERT_TRUE(q.try_enqueue_for(4, dur));
67 ASSERT_EQ(q.size(), 4);
68 ASSERT_EQ(q.weight(), 1000);
69 ASSERT_FALSE(q.empty());
73 ASSERT_TRUE(q.try_dequeue(v));
75 ASSERT_TRUE(q.try_dequeue_until(v, deadline));
77 ASSERT_TRUE(q.try_dequeue_for(v, dur));
80 ASSERT_TRUE(q.empty());
81 ASSERT_EQ(q.size(), 0);
82 ASSERT_EQ(q.weight(), 0);
85 TEST(DynamicBoundedQueue, basic) {
86 basic_test<DSPSC, false>();
87 basic_test<DMPSC, false>();
88 basic_test<DSPMC, false>();
89 basic_test<DMPMC, false>();
90 basic_test<DSPSC, true>();
91 basic_test<DMPSC, true>();
92 basic_test<DSPMC, true>();
93 basic_test<DMPMC, true>();
96 TEST(DynamicBoundedQueue, size) {
98 folly::DynamicBoundedQueue<int, true, true, true> q(10);
99 ASSERT_EQ(sizeof(q), 640);
102 folly::DynamicBoundedQueue<uint64_t, false, false, false, 7, 4> q(10);
103 ASSERT_EQ(sizeof(q), 80);
107 template <template <typename, bool, typename> class Q, bool MayBlock>
111 explicit Foo(int v) noexcept : v_(v) {}
112 Foo(const Foo&) = delete;
113 Foo& operator=(const Foo&) = delete;
114 Foo(Foo&& other) noexcept : v_(other.v_) {}
115 Foo& operator=(Foo&& other) noexcept {
121 struct CustomWeightFn {
122 uint64_t operator()(Foo&&) {
127 auto dur = std::chrono::microseconds(100);
128 auto deadline = std::chrono::steady_clock::now() + dur;
130 Q<Foo, MayBlock, CustomWeightFn> q(100);
132 q.enqueue(std::move(v));
133 ASSERT_TRUE(q.try_enqueue(std::move(v)));
134 ASSERT_TRUE(q.try_enqueue_until(std::move(v), deadline));
135 ASSERT_TRUE(q.try_enqueue_for(std::move(v), dur));
137 ASSERT_EQ(q.size(), 4);
138 ASSERT_EQ(q.weight(), 40);
141 TEST(DynamicBoundedQueue, move) {
142 move_test<DSPSC, false>();
143 move_test<DMPSC, false>();
144 move_test<DSPMC, false>();
145 move_test<DMPMC, false>();
146 move_test<DSPSC, true>();
147 move_test<DMPSC, true>();
148 move_test<DSPMC, true>();
149 move_test<DMPMC, true>();
152 template <template <typename, bool, typename> class Q, bool MayBlock>
153 void capacity_test() {
154 struct CustomWeightFn {
155 uint64_t operator()(int val) {
160 Q<int, MayBlock, CustomWeightFn> q(1000);
161 ASSERT_EQ(q.weight(), 0);
164 ASSERT_EQ(q.weight(), 100);
166 ASSERT_EQ(q.weight(), 400);
167 ASSERT_FALSE(q.try_enqueue(1200));
168 q.reset_capacity(2000); // reset capacityy to 2000
169 ASSERT_TRUE(q.try_enqueue(1200));
170 ASSERT_EQ(q.weight(), 1600);
171 ASSERT_EQ(q.size(), 3);
172 q.reset_capacity(1000); // reset capacity back to 1000
173 ASSERT_FALSE(q.try_enqueue(100));
176 ASSERT_EQ(q.weight(), 1500);
179 ASSERT_EQ(q.weight(), 1200);
182 TEST(DynamicBoundedQueue, capacity) {
183 capacity_test<DSPSC, false>();
184 capacity_test<DMPSC, false>();
185 capacity_test<DSPMC, false>();
186 capacity_test<DMPMC, false>();
187 capacity_test<DSPSC, true>();
188 capacity_test<DMPSC, true>();
189 capacity_test<DSPMC, true>();
190 capacity_test<DMPMC, true>();
193 template <typename ProdFunc, typename ConsFunc, typename EndFunc>
194 inline uint64_t run_once(
197 const ProdFunc& prodFn,
198 const ConsFunc& consFn,
199 const EndFunc& endFn) {
200 std::atomic<bool> start{false};
201 std::atomic<int> ready{0};
204 std::vector<std::thread> prodThr(nprod);
205 for (int tid = 0; tid < nprod; ++tid) {
206 prodThr[tid] = std::thread([&, tid] {
208 while (!start.load()) {
216 std::vector<std::thread> consThr(ncons);
217 for (int tid = 0; tid < ncons; ++tid) {
218 consThr[tid] = std::thread([&, tid] {
220 while (!start.load()) {
227 /* wait for all producers and consumers to be ready */
228 while (ready.load() < (nprod + ncons)) {
232 /* begin time measurement */
233 auto tbegin = std::chrono::steady_clock::now();
236 /* wait for completion */
237 for (int i = 0; i < nprod; ++i) {
240 for (int i = 0; i < ncons; ++i) {
244 /* end time measurement */
245 auto tend = std::chrono::steady_clock::now();
247 return std::chrono::duration_cast<std::chrono::nanoseconds>(tend - tbegin)
251 template <bool SingleProducer, bool SingleConsumer, bool MayBlock>
252 void enq_deq_test(const int nprod, const int ncons) {
253 if (SingleProducer) {
256 if (SingleConsumer) {
261 folly::DynamicBoundedQueue<int, SingleProducer, SingleConsumer, MayBlock, 2>
263 std::atomic<uint64_t> sum(0);
265 auto prod = [&](int tid) {
266 for (int i = tid; i < ops; i += nprod) {
268 while (!q.try_enqueue(i)) {
271 } else if ((i % 3) == 1) {
272 auto dur = std::chrono::microseconds(100);
273 while (!q.try_enqueue_for(i, dur)) {
282 auto cons = [&](int tid) {
284 for (int i = tid; i < ops; i += ncons) {
287 while (!q.try_dequeue(v)) {
290 } else if ((i % 3) == 1) {
291 auto dur = std::chrono::microseconds(100);
292 while (!q.try_dequeue_for(v, dur)) {
298 if (nprod == 1 && ncons == 1) {
303 sum.fetch_add(mysum);
307 uint64_t expected = ops;
310 ASSERT_EQ(sum.load(), expected);
312 run_once(nprod, ncons, prod, cons, endfn);
315 TEST(DynamicBoundedQueue, enq_deq) {
317 enq_deq_test<true, true, false>(1, 1);
318 enq_deq_test<true, true, true>(1, 1);
320 enq_deq_test<false, true, false>(1, 1);
321 enq_deq_test<false, true, true>(1, 1);
322 enq_deq_test<false, true, false>(2, 1);
323 enq_deq_test<false, true, true>(2, 1);
324 enq_deq_test<false, true, false>(10, 1);
325 enq_deq_test<false, true, true>(10, 1);
327 enq_deq_test<true, false, false>(1, 1);
328 enq_deq_test<true, false, true>(1, 1);
329 enq_deq_test<true, false, false>(1, 2);
330 enq_deq_test<true, false, true>(1, 2);
331 enq_deq_test<true, false, false>(1, 10);
332 enq_deq_test<true, false, true>(1, 10);
334 enq_deq_test<false, false, false>(1, 1);
335 enq_deq_test<false, false, true>(1, 1);
336 enq_deq_test<false, false, false>(2, 1);
337 enq_deq_test<false, false, true>(2, 1);
338 enq_deq_test<false, false, false>(10, 1);
339 enq_deq_test<false, false, true>(10, 1);
340 enq_deq_test<false, false, false>(1, 2);
341 enq_deq_test<false, false, true>(1, 2);
342 enq_deq_test<false, false, false>(1, 10);
343 enq_deq_test<false, false, true>(1, 10);
344 enq_deq_test<false, false, false>(2, 2);
345 enq_deq_test<false, false, true>(2, 2);
346 enq_deq_test<false, false, false>(10, 10);
347 enq_deq_test<false, false, true>(10, 10);
350 template <typename RepFunc>
351 uint64_t runBench(const std::string& name, int ops, const RepFunc& repFn) {
352 int reps = FLAGS_reps;
353 uint64_t min = UINTMAX_MAX;
357 repFn(); // sometimes first run is outlier
358 for (int r = 0; r < reps; ++r) {
359 uint64_t dur = repFn();
361 min = std::min(min, dur);
362 max = std::max(max, dur);
363 // if each rep takes too long run at least 2 reps
364 const uint64_t minute = 60000000000ULL;
365 if (sum > minute && r >= 1) {
371 const std::string unit = " ns";
372 uint64_t avg = sum / reps;
375 std::cout << " " << std::setw(4) << max / ops << unit;
376 std::cout << " " << std::setw(4) << avg / ops << unit;
377 std::cout << " " << std::setw(4) << res / ops << unit;
378 std::cout << std::endl;
382 template <template <typename, bool, typename> class Q, typename T, int Op>
383 uint64_t bench(const int nprod, const int ncons, const std::string& name) {
386 Q<T, Op == 3 || Op == 4 || Op == 5, folly::DefaultWeightFn<T>> q(
388 std::atomic<uint64_t> sum(0);
389 auto prod = [&](int tid) {
390 for (int i = tid; i < ops; i += nprod) {
391 if (Op == 0 || Op == 3) {
392 while (!q.try_enqueue(i)) {
395 } else if (Op == 1 || Op == 4) {
396 while (!q.try_enqueue_for(i, std::chrono::microseconds(1000))) {
404 auto cons = [&](int tid) {
407 for (int i = tid; i < ops; i += ncons) {
408 if (Op == 0 || Op == 3) {
409 while (!q.try_dequeue(v)) {
412 } else if (Op == 1 || Op == 4) {
413 while (!q.try_dequeue_for(v, std::chrono::microseconds(1000))) {
419 if (nprod == 1 && ncons == 1) {
420 DCHECK_EQ(int(v), i);
424 sum.fetch_add(mysum);
427 uint64_t expected = ops;
430 ASSERT_EQ(sum.load(), expected);
432 return run_once(nprod, ncons, prod, cons, endfn);
434 return runBench(name, ops, repFn);
437 /* For performance comparison */
438 template <typename T>
440 folly::MPMCQueue<T> q_;
443 explicit MPMC(uint64_t capacity) : q_(capacity) {}
445 void enqueue(const T& v) {
449 void enqueue(T&& v) {
450 q_.blockingWrite(std::move(v));
453 bool try_enqueue(const T& v) {
457 bool try_enqueue(const T&& v) {
458 return q_.write(std::move(v));
461 template <typename Rep, typename Period>
462 bool try_enqueue_for(
464 const std::chrono::duration<Rep, Period>& duration) {
465 return q_.tryWriteUntil(std::chrono::steady_clock::now() + duration, v);
468 void dequeue(T& item) {
469 q_.blockingRead(item);
472 bool try_dequeue(T& item) {
473 return q_.read(item);
476 template <typename Rep, typename Period>
477 bool try_dequeue_for(
479 const std::chrono::duration<Rep, Period>& duration) {
480 return q_.tryReadUntil(std::chrono::steady_clock::now() + duration, item);
484 template <typename T, bool, typename>
485 using FMPMC = MPMC<T>;
487 template <typename T>
489 folly::ProducerConsumerQueue<T> q_;
492 explicit PCQ(uint64_t capacity) : q_(capacity) {}
494 void enqueue(const T&) {
498 bool try_enqueue(const T& v) {
502 bool try_enqueue(T&& v) {
503 return q_.write(std::move(v));
506 template <typename Rep, typename Period>
507 bool try_enqueue_for(const T&, const std::chrono::duration<Rep, Period>&) {
515 bool try_dequeue(T& item) {
516 return q_.read(item);
519 template <typename Rep, typename Period>
520 bool try_dequeue_for(T&, const std::chrono::duration<Rep, Period>&) {
525 template <typename T, bool, typename>
532 /* implicit */ IntArray(int v) {
533 for (size_t i = 0; i < M; ++i) {
543 std::cout << ".............................................................."
547 template <typename T>
548 void type_benches(const int np, const int nc, const std::string& name) {
550 << "===========================================" << std::endl;
551 if (np == 1 && nc == 1) {
552 bench<DSPSC, T, 0>(1, 1, "DSPSC try spin only ");
553 bench<DSPSC, T, 1>(1, 1, "DSPSC timed spin only ");
554 bench<DSPSC, T, 2>(1, 1, "DSPSC wait spin only ");
555 bench<DSPSC, T, 3>(1, 1, "DSPSC try may block ");
556 bench<DSPSC, T, 4>(1, 1, "DSPSC timed may block ");
557 bench<DSPSC, T, 5>(1, 1, "DSPSC wait may block ");
561 bench<DMPSC, T, 0>(np, 1, "DMPSC try spin only ");
562 bench<DMPSC, T, 1>(np, 1, "DMPSC timed spin only ");
563 bench<DMPSC, T, 2>(np, 1, "DMPSC wait spin only ");
564 bench<DMPSC, T, 3>(np, 1, "DMPSC try may block ");
565 bench<DMPSC, T, 4>(np, 1, "DMPSC timed may block ");
566 bench<DMPSC, T, 5>(np, 1, "DMPSC wait may block ");
570 bench<DSPMC, T, 0>(1, nc, "DSPMC try spin only ");
571 bench<DSPMC, T, 1>(1, nc, "DSPMC timed spin only ");
572 bench<DSPMC, T, 2>(1, nc, "DSPMC wait spin only ");
573 bench<DSPMC, T, 3>(1, nc, "DSPMC try may block ");
574 bench<DSPMC, T, 4>(1, nc, "DSPMC timed may block ");
575 bench<DSPMC, T, 5>(1, nc, "DSPMC wait may block ");
578 bench<DMPMC, T, 0>(np, nc, "DMPMC try spin only ");
579 bench<DMPMC, T, 1>(np, nc, "DMPMC timed spin only ");
580 bench<DMPMC, T, 2>(np, nc, "DMPMC wait spin only ");
581 bench<DMPMC, T, 3>(np, nc, "DMPMC try may block ");
582 bench<DMPMC, T, 4>(np, nc, "DMPMC timed may block ");
583 bench<DMPMC, T, 5>(np, nc, "DMPMC wait may block ");
585 if (np == 1 && nc == 1) {
586 bench<FPCQ, T, 0>(1, 1, "folly::PCQ read ");
589 bench<FMPMC, T, 3>(np, nc, "folly::MPMC read ");
590 bench<FMPMC, T, 4>(np, nc, "folly::MPMC tryReadUntil ");
591 bench<FMPMC, T, 5>(np, nc, "folly::MPMC blockingRead ");
592 std::cout << "=============================================================="
596 void benches(const int np, const int nc) {
597 std::cout << "====================== " << std::setw(2) << np << " prod"
598 << " " << std::setw(2) << nc << " cons"
599 << " ======================" << std::endl;
600 type_benches<uint32_t>(np, nc, "=== uint32_t ======");
601 // Benchmarks for other element sizes can be added as follows:
602 // type_benches<IntArray<4>>(np, nc, "=== IntArray<4> ===");
605 TEST(DynamicBoundedQueue, bench) {
609 std::cout << "=============================================================="
611 std::cout << std::setw(2) << FLAGS_reps << " reps of " << std::setw(8)
612 << FLAGS_ops << " handoffs\n";
614 std::cout << "Using capacity " << FLAGS_capacity << " for all queues\n";
615 std::cout << "=============================================================="
617 std::cout << "Test name Max time Avg time Min time"
620 for (int np : {1, 8, 32}) {
621 for (int nc : {1, 8, 32}) {
628 $ numactl -N 1 dynamic_bounded_queue_test --bench --capacity=1000000
629 ==============================================================
630 10 reps of 1000000 handoffs
631 ..............................................................
632 Using capacity 1000000 for all queues
633 ==============================================================
634 Test name Max time Avg time Min time
635 ====================== 1 prod 1 cons ======================
636 === uint32_t =================================================
637 DSPSC try spin only 7 ns 7 ns 7 ns
638 DSPSC timed spin only 9 ns 9 ns 9 ns
639 DSPSC wait spin only 7 ns 7 ns 7 ns
640 DSPSC try may block 39 ns 36 ns 33 ns
641 DSPSC timed may block 39 ns 38 ns 37 ns
642 DSPSC wait may block 37 ns 34 ns 33 ns
643 ..............................................................
644 DMPSC try spin only 54 ns 53 ns 52 ns
645 DMPSC timed spin only 53 ns 52 ns 51 ns
646 DMPSC wait spin only 53 ns 52 ns 51 ns
647 DMPSC try may block 67 ns 65 ns 64 ns
648 DMPSC timed may block 64 ns 62 ns 60 ns
649 DMPSC wait may block 64 ns 62 ns 60 ns
650 ..............................................................
651 DSPMC try spin only 25 ns 24 ns 23 ns
652 DSPMC timed spin only 24 ns 23 ns 23 ns
653 DSPMC wait spin only 22 ns 21 ns 21 ns
654 DSPMC try may block 30 ns 26 ns 21 ns
655 DSPMC timed may block 25 ns 24 ns 24 ns
656 DSPMC wait may block 22 ns 22 ns 21 ns
657 ..............................................................
658 DMPMC try spin only 48 ns 45 ns 39 ns
659 DMPMC timed spin only 31 ns 30 ns 24 ns
660 DMPMC wait spin only 49 ns 47 ns 43 ns
661 DMPMC try may block 63 ns 62 ns 61 ns
662 DMPMC timed may block 64 ns 60 ns 46 ns
663 DMPMC wait may block 61 ns 60 ns 58 ns
664 ..............................................................
665 folly::PCQ read 8 ns 7 ns 7 ns
666 ..............................................................
667 folly::MPMC read 53 ns 51 ns 49 ns
668 folly::MPMC tryReadUntil 112 ns 106 ns 103 ns
669 folly::MPMC blockingRead 50 ns 47 ns 46 ns
670 ==============================================================
671 ====================== 1 prod 8 cons ======================
672 === uint32_t =================================================
673 DSPMC try spin only 166 ns 159 ns 153 ns
674 DSPMC timed spin only 169 ns 163 ns 156 ns
675 DSPMC wait spin only 60 ns 57 ns 54 ns
676 DSPMC try may block 170 ns 163 ns 153 ns
677 DSPMC timed may block 165 ns 157 ns 150 ns
678 DSPMC wait may block 94 ns 78 ns 52 ns
679 ..............................................................
680 DMPMC try spin only 170 ns 161 ns 149 ns
681 DMPMC timed spin only 167 ns 158 ns 149 ns
682 DMPMC wait spin only 93 ns 80 ns 51 ns
683 DMPMC try may block 164 ns 161 ns 154 ns
684 DMPMC timed may block 163 ns 156 ns 145 ns
685 DMPMC wait may block 117 ns 102 ns 87 ns
686 ..............................................................
687 folly::MPMC read 176 ns 168 ns 159 ns
688 folly::MPMC tryReadUntil 1846 ns 900 ns 521 ns
689 folly::MPMC blockingRead 219 ns 193 ns 178 ns
690 ==============================================================
691 ====================== 1 prod 32 cons ======================
692 === uint32_t =================================================
693 DSPMC try spin only 224 ns 213 ns 204 ns
694 DSPMC timed spin only 215 ns 209 ns 199 ns
695 DSPMC wait spin only 334 ns 114 ns 52 ns
696 DSPMC try may block 240 ns 215 ns 202 ns
697 DSPMC timed may block 245 ns 221 ns 200 ns
698 DSPMC wait may block 215 ns 151 ns 98 ns
699 ..............................................................
700 DMPMC try spin only 348 ns 252 ns 204 ns
701 DMPMC timed spin only 379 ns 244 ns 198 ns
702 DMPMC wait spin only 173 ns 116 ns 89 ns
703 DMPMC try may block 362 ns 231 ns 173 ns
704 DMPMC timed may block 282 ns 236 ns 206 ns
705 DMPMC wait may block 252 ns 172 ns 134 ns
706 ..............................................................
707 folly::MPMC read 540 ns 290 ns 186 ns
708 folly::MPMC tryReadUntil 24946 ns 24280 ns 23113 ns
709 folly::MPMC blockingRead 1345 ns 1297 ns 1265 ns
710 ==============================================================
711 ====================== 8 prod 1 cons ======================
712 === uint32_t =================================================
713 DMPSC try spin only 68 ns 64 ns 60 ns
714 DMPSC timed spin only 69 ns 66 ns 61 ns
715 DMPSC wait spin only 67 ns 65 ns 62 ns
716 DMPSC try may block 77 ns 73 ns 67 ns
717 DMPSC timed may block 75 ns 74 ns 68 ns
718 DMPSC wait may block 76 ns 73 ns 69 ns
719 ..............................................................
720 DMPMC try spin only 76 ns 66 ns 60 ns
721 DMPMC timed spin only 77 ns 68 ns 63 ns
722 DMPMC wait spin only 68 ns 65 ns 63 ns
723 DMPMC try may block 76 ns 72 ns 64 ns
724 DMPMC timed may block 82 ns 74 ns 67 ns
725 DMPMC wait may block 77 ns 72 ns 68 ns
726 ..............................................................
727 folly::MPMC read 170 ns 166 ns 161 ns
728 folly::MPMC tryReadUntil 184 ns 179 ns 173 ns
729 folly::MPMC blockingRead 79 ns 73 ns 53 ns
730 ==============================================================
731 ====================== 8 prod 8 cons ======================
732 === uint32_t =================================================
733 DMPMC try spin only 181 ns 169 ns 133 ns
734 DMPMC timed spin only 179 ns 168 ns 148 ns
735 DMPMC wait spin only 77 ns 76 ns 71 ns
736 DMPMC try may block 180 ns 179 ns 176 ns
737 DMPMC timed may block 174 ns 166 ns 153 ns
738 DMPMC wait may block 79 ns 78 ns 75 ns
739 ..............................................................
740 folly::MPMC read 219 ns 206 ns 183 ns
741 folly::MPMC tryReadUntil 262 ns 244 ns 213 ns
742 folly::MPMC blockingRead 61 ns 58 ns 54 ns
743 ==============================================================
744 ====================== 8 prod 32 cons ======================
745 === uint32_t =================================================
746 DMPMC try spin only 265 ns 217 ns 203 ns
747 DMPMC timed spin only 236 ns 215 ns 202 ns
748 DMPMC wait spin only 93 ns 83 ns 77 ns
749 DMPMC try may block 325 ns 234 ns 200 ns
750 DMPMC timed may block 206 ns 202 ns 193 ns
751 DMPMC wait may block 139 ns 93 ns 76 ns
752 ..............................................................
753 folly::MPMC read 259 ns 214 ns 201 ns
754 folly::MPMC tryReadUntil 281 ns 274 ns 267 ns
755 folly::MPMC blockingRead 62 ns 59 ns 57 ns
756 ==============================================================
757 ====================== 32 prod 1 cons ======================
758 === uint32_t =================================================
759 DMPSC try spin only 95 ns 57 ns 45 ns
760 DMPSC timed spin only 94 ns 52 ns 46 ns
761 DMPSC wait spin only 104 ns 54 ns 43 ns
762 DMPSC try may block 59 ns 54 ns 51 ns
763 DMPSC timed may block 86 ns 58 ns 52 ns
764 DMPSC wait may block 76 ns 57 ns 53 ns
765 ..............................................................
766 DMPMC try spin only 68 ns 64 ns 60 ns
767 DMPMC timed spin only 137 ns 73 ns 61 ns
768 DMPMC wait spin only 86 ns 65 ns 58 ns
769 DMPMC try may block 89 ns 71 ns 65 ns
770 DMPMC timed may block 82 ns 69 ns 65 ns
771 DMPMC wait may block 84 ns 71 ns 66 ns
772 ..............................................................
773 folly::MPMC read 222 ns 203 ns 192 ns
774 folly::MPMC tryReadUntil 239 ns 232 ns 191 ns
775 folly::MPMC blockingRead 78 ns 68 ns 64 ns
776 ==============================================================
777 ====================== 32 prod 8 cons ======================
778 === uint32_t =================================================
779 DMPMC try spin only 183 ns 138 ns 107 ns
780 DMPMC timed spin only 237 ns 158 ns 98 ns
781 DMPMC wait spin only 87 ns 70 ns 58 ns
782 DMPMC try may block 169 ns 132 ns 92 ns
783 DMPMC timed may block 172 ns 133 ns 79 ns
784 DMPMC wait may block 166 ns 89 ns 66 ns
785 ..............................................................
786 folly::MPMC read 221 ns 194 ns 183 ns
787 folly::MPMC tryReadUntil 258 ns 244 ns 230 ns
788 folly::MPMC blockingRead 60 ns 54 ns 47 ns
789 ==============================================================
790 ====================== 32 prod 32 cons ======================
791 === uint32_t =================================================
792 DMPMC try spin only 419 ns 252 ns 181 ns
793 DMPMC timed spin only 252 ns 212 ns 179 ns
794 DMPMC wait spin only 153 ns 79 ns 62 ns
795 DMPMC try may block 302 ns 236 ns 182 ns
796 DMPMC timed may block 266 ns 213 ns 170 ns
797 DMPMC wait may block 262 ns 120 ns 64 ns
798 ..............................................................
799 folly::MPMC read 269 ns 245 ns 199 ns
800 folly::MPMC tryReadUntil 257 ns 245 ns 235 ns
801 folly::MPMC blockingRead 53 ns 48 ns 45 ns
802 ==============================================================
804 $ numactl -N 1 dynamic_bounded_queue_test --bench --capacity=1000
805 ==============================================================
806 10 reps of 1000000 handoffs
807 ..............................................................
808 Using capacity 1000 for all queues
809 ==============================================================
810 Test name Max time Avg time Min time
811 ====================== 1 prod 1 cons ======================
812 === uint32_t =================================================
813 DSPSC try spin only 7 ns 7 ns 7 ns
814 DSPSC timed spin only 9 ns 9 ns 9 ns
815 DSPSC wait spin only 7 ns 7 ns 7 ns
816 DSPSC try may block 34 ns 33 ns 31 ns
817 DSPSC timed may block 34 ns 34 ns 33 ns
818 DSPSC wait may block 30 ns 30 ns 29 ns
819 ..............................................................
820 DMPSC try spin only 60 ns 57 ns 55 ns
821 DMPSC timed spin only 55 ns 52 ns 51 ns
822 DMPSC wait spin only 57 ns 54 ns 52 ns
823 DMPSC try may block 66 ns 62 ns 39 ns
824 DMPSC timed may block 67 ns 64 ns 62 ns
825 DMPSC wait may block 67 ns 65 ns 64 ns
826 ..............................................................
827 DSPMC try spin only 27 ns 25 ns 24 ns
828 DSPMC timed spin only 25 ns 25 ns 24 ns
829 DSPMC wait spin only 23 ns 23 ns 22 ns
830 DSPMC try may block 31 ns 26 ns 24 ns
831 DSPMC timed may block 33 ns 30 ns 30 ns
832 DSPMC wait may block 37 ns 29 ns 28 ns
833 ..............................................................
834 DMPMC try spin only 55 ns 53 ns 51 ns
835 DMPMC timed spin only 36 ns 31 ns 26 ns
836 DMPMC wait spin only 54 ns 53 ns 51 ns
837 DMPMC try may block 68 ns 64 ns 51 ns
838 DMPMC timed may block 66 ns 63 ns 60 ns
839 DMPMC wait may block 68 ns 63 ns 60 ns
840 ..............................................................
841 folly::PCQ read 15 ns 13 ns 11 ns
842 ..............................................................
843 folly::MPMC read 60 ns 56 ns 51 ns
844 folly::MPMC tryReadUntil 134 ns 112 ns 102 ns
845 folly::MPMC blockingRead 57 ns 51 ns 48 ns
846 ==============================================================
847 ====================== 1 prod 8 cons ======================
848 === uint32_t =================================================
849 DSPMC try spin only 169 ns 162 ns 151 ns
850 DSPMC timed spin only 178 ns 166 ns 149 ns
851 DSPMC wait spin only 59 ns 55 ns 54 ns
852 DSPMC try may block 173 ns 163 ns 153 ns
853 DSPMC timed may block 171 ns 166 ns 156 ns
854 DSPMC wait may block 71 ns 57 ns 51 ns
855 ..............................................................
856 DMPMC try spin only 172 ns 164 ns 158 ns
857 DMPMC timed spin only 173 ns 164 ns 156 ns
858 DMPMC wait spin only 77 ns 62 ns 53 ns
859 DMPMC try may block 181 ns 163 ns 152 ns
860 DMPMC timed may block 174 ns 165 ns 151 ns
861 DMPMC wait may block 91 ns 72 ns 52 ns
862 ..............................................................
863 folly::MPMC read 178 ns 167 ns 161 ns
864 folly::MPMC tryReadUntil 991 ns 676 ns 423 ns
865 folly::MPMC blockingRead 154 ns 129 ns 96 ns
866 ==============================================================
867 ====================== 1 prod 32 cons ======================
868 === uint32_t =================================================
869 DSPMC try spin only 462 ns 288 ns 201 ns
870 DSPMC timed spin only 514 ns 283 ns 201 ns
871 DSPMC wait spin only 100 ns 60 ns 45 ns
872 DSPMC try may block 531 ns 318 ns 203 ns
873 DSPMC timed may block 1379 ns 891 ns 460 ns
874 DSPMC wait may block 148 ns 111 ns 82 ns
875 ..............................................................
876 DMPMC try spin only 404 ns 312 ns 205 ns
877 DMPMC timed spin only 337 ns 253 ns 219 ns
878 DMPMC wait spin only 130 ns 97 ns 72 ns
879 DMPMC try may block 532 ns 265 ns 201 ns
880 DMPMC timed may block 846 ns 606 ns 412 ns
881 DMPMC wait may block 158 ns 112 ns 87 ns
882 ..............................................................
883 folly::MPMC read 880 ns 419 ns 284 ns
884 folly::MPMC tryReadUntil 23432 ns 23184 ns 23007 ns
885 folly::MPMC blockingRead 1353 ns 1308 ns 1279 ns
886 ==============================================================
887 ====================== 8 prod 1 cons ======================
888 === uint32_t =================================================
889 DMPSC try spin only 67 ns 63 ns 51 ns
890 DMPSC timed spin only 69 ns 65 ns 63 ns
891 DMPSC wait spin only 67 ns 65 ns 61 ns
892 DMPSC try may block 73 ns 69 ns 63 ns
893 DMPSC timed may block 72 ns 69 ns 64 ns
894 DMPSC wait may block 71 ns 70 ns 68 ns
895 ..............................................................
896 DMPMC try spin only 70 ns 64 ns 59 ns
897 DMPMC timed spin only 76 ns 66 ns 53 ns
898 DMPMC wait spin only 68 ns 66 ns 64 ns
899 DMPMC try may block 71 ns 68 ns 66 ns
900 DMPMC timed may block 72 ns 70 ns 67 ns
901 DMPMC wait may block 73 ns 70 ns 67 ns
902 ..............................................................
903 folly::MPMC read 193 ns 167 ns 153 ns
904 folly::MPMC tryReadUntil 497 ns 415 ns 348 ns
905 folly::MPMC blockingRead 163 ns 134 ns 115 ns
906 ==============================================================
907 ====================== 8 prod 8 cons ======================
908 === uint32_t =================================================
909 DMPMC try spin only 216 ns 203 ns 196 ns
910 DMPMC timed spin only 199 ns 186 ns 178 ns
911 DMPMC wait spin only 63 ns 60 ns 58 ns
912 DMPMC try may block 212 ns 198 ns 183 ns
913 DMPMC timed may block 180 ns 170 ns 162 ns
914 DMPMC wait may block 72 ns 68 ns 65 ns
915 ..............................................................
916 folly::MPMC read 225 ns 201 ns 188 ns
917 folly::MPMC tryReadUntil 255 ns 248 ns 232 ns
918 folly::MPMC blockingRead 52 ns 48 ns 42 ns
919 ==============================================================
920 ====================== 8 prod 32 cons ======================
921 === uint32_t =================================================
922 DMPMC try spin only 360 ns 302 ns 195 ns
923 DMPMC timed spin only 350 ns 272 ns 218 ns
924 DMPMC wait spin only 92 ns 72 ns 61 ns
925 DMPMC try may block 352 ns 263 ns 223 ns
926 DMPMC timed may block 218 ns 213 ns 209 ns
927 DMPMC wait may block 98 ns 77 ns 70 ns
928 ..............................................................
929 folly::MPMC read 611 ns 461 ns 339 ns
930 folly::MPMC tryReadUntil 270 ns 260 ns 253 ns
931 folly::MPMC blockingRead 89 ns 84 ns 80 ns
932 ==============================================================
933 ====================== 32 prod 1 cons ======================
934 === uint32_t =================================================
935 DMPSC try spin only 389 ns 248 ns 149 ns
936 DMPSC timed spin only 356 ns 235 ns 120 ns
937 DMPSC wait spin only 343 ns 242 ns 125 ns
938 DMPSC try may block 412 ns 294 ns 168 ns
939 DMPSC timed may block 332 ns 271 ns 189 ns
940 DMPSC wait may block 280 ns 252 ns 199 ns
941 ..............................................................
942 DMPMC try spin only 393 ns 269 ns 105 ns
943 DMPMC timed spin only 328 ns 240 ns 112 ns
944 DMPMC wait spin only 502 ns 266 ns 107 ns
945 DMPMC try may block 514 ns 346 ns 192 ns
946 DMPMC timed may block 339 ns 318 ns 278 ns
947 DMPMC wait may block 319 ns 307 ns 292 ns
948 ..............................................................
949 folly::MPMC read 948 ns 517 ns 232 ns
950 folly::MPMC tryReadUntil 9649 ns 7567 ns 4140 ns
951 folly::MPMC blockingRead 1365 ns 1316 ns 1131 ns
952 ==============================================================
953 ====================== 32 prod 8 cons ======================
954 === uint32_t =================================================
955 DMPMC try spin only 436 ns 257 ns 115 ns
956 DMPMC timed spin only 402 ns 272 ns 121 ns
957 DMPMC wait spin only 136 ns 78 ns 55 ns
958 DMPMC try may block 454 ns 227 ns 78 ns
959 DMPMC timed may block 155 ns 137 ns 116 ns
960 DMPMC wait may block 62 ns 59 ns 57 ns
961 ..............................................................
962 folly::MPMC read 677 ns 497 ns 336 ns
963 folly::MPMC tryReadUntil 268 ns 262 ns 258 ns
964 folly::MPMC blockingRead 87 ns 85 ns 82 ns
965 ==============================================================
966 ====================== 32 prod 32 cons ======================
967 === uint32_t =================================================
968 DMPMC try spin only 786 ns 381 ns 142 ns
969 DMPMC timed spin only 795 ns 346 ns 126 ns
970 DMPMC wait spin only 334 ns 107 ns 55 ns
971 DMPMC try may block 535 ns 317 ns 144 ns
972 DMPMC timed may block 197 ns 192 ns 183 ns
973 DMPMC wait may block 189 ns 75 ns 60 ns
974 ..............................................................
975 folly::MPMC read 1110 ns 919 ns 732 ns
976 folly::MPMC tryReadUntil 214 ns 210 ns 206 ns
977 folly::MPMC blockingRead 53 ns 52 ns 51 ns
978 ==============================================================
982 CPU op-mode(s): 32-bit, 64-bit
983 Byte Order: Little Endian
985 On-line CPU(s) list: 0-31
986 Thread(s) per core: 2
987 Core(s) per socket: 8
990 Vendor ID: GenuineIntel
993 Model name: Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
996 CPU max MHz: 2200.0000
997 CPU min MHz: 1200.0000
1004 NUMA node0 CPU(s): 0-7,16-23
1005 NUMA node1 CPU(s): 8-15,24-31
1007 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr
1008 pge mca cmov pat pse36 clflush dts acpi mmx fxsr
1009 sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp
1010 lm constant_tsc arch_perfmon pebs bts rep_good
1011 nopl xtopology nonstop_tsc aperfmperf eagerfpu
1012 pni pclmulqdq dtes64 monitor ds_cpl vmx smx est
1013 tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2
1014 x2apic popcnt tsc_deadline_timer aes xsave avx
1015 lahf_lm epb tpr_shadow vnmi flexpriority ept vpid
1016 xsaveopt dtherm arat pln pts