From: Nathan Bronson Date: Mon, 7 Mar 2016 21:36:59 +0000 (-0800) Subject: minor tweak to MicroLock slow path X-Git-Tag: 2016.07.26~457 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e756d07cd35714d7528444321ea5145b41f5ae0f;p=folly.git minor tweak to MicroLock slow path Summary:This diff uses the x86 "pause" instruction or its equivalent when spinning in the MicroLock slow loop. This gives a hint to the processor that it should devote more resources to the other execution contexts sharing the same core. This diff also removes an mfence on x86 by using a slightly stronger memory model on the preceding compare_exchange_weak, and switches to a more portable way of invoking sched_yield(). Reviewed By: dcolascione Differential Revision: D3018568 fb-gh-sync-id: 02e0ab3a9d9bb9901eddf54e45b71cbb7758a227 shipit-source-id: 02e0ab3a9d9bb9901eddf54e45b71cbb7758a227 --- diff --git a/folly/MicroLock.cpp b/folly/MicroLock.cpp index d6656dce..dd649da1 100644 --- a/folly/MicroLock.cpp +++ b/folly/MicroLock.cpp @@ -15,6 +15,7 @@ */ #include +#include namespace folly { @@ -45,7 +46,10 @@ retry: } (void)wordPtr->futexWait(newWord, slotHeldBit); } else if (spins > maxSpins) { - sched_yield(); + // sched_yield(), but more portable + std::this_thread::yield(); + } else { + folly::asm_pause(); } oldWord = wordPtr->load(std::memory_order_relaxed); goto retry; @@ -54,14 +58,9 @@ retry: newWord = oldWord | slotHeldBit; if (!wordPtr->compare_exchange_weak(oldWord, newWord, - std::memory_order_relaxed, + std::memory_order_acquire, std::memory_order_relaxed)) { goto retry; } - - // Locks are traditionally memory barriers, so we emit a full fence - // even though we were happy using relaxed atomics for the - // lock itself. - std::atomic_thread_fence(std::memory_order_seq_cst); } } diff --git a/folly/MicroLock.h b/folly/MicroLock.h index 3ff158b8..07aa40b8 100644 --- a/folly/MicroLock.h +++ b/folly/MicroLock.h @@ -193,7 +193,7 @@ void MicroLockBase::lock(unsigned slot) { oldWord | heldBit(slot), std::memory_order_acquire, std::memory_order_relaxed)) { - // Fast uncontended case: seq_cst above is our memory barrier + // Fast uncontended case: memory_order_acquire above is our barrier } else { // lockSlowPath doesn't have any slot-dependent computation; it // just shifts the input bit. Make sure its shifting produces the