From: Peizhao Ou Date: Fri, 13 Feb 2015 21:45:07 +0000 (-0800) Subject: add notes to mcs-lock X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;ds=sidebyside;h=49ad748941ac5aec2723380ffe2d2719e760d42e;p=model-checker-benchmarks.git add notes to mcs-lock --- diff --git a/mcs-lock/mcs-lock-wildcard.h b/mcs-lock/mcs-lock-wildcard.h index b8d92b2..824fcc8 100644 --- a/mcs-lock/mcs-lock-wildcard.h +++ b/mcs-lock/mcs-lock-wildcard.h @@ -74,7 +74,7 @@ public: { mcs_node * tail_was_me = me; bool success; - // FIXME: SCFence infers acq_rel / release + // SCFence infers acq_rel / release !!! // Could be release if wildcard(8) is acquire if ( (success = m_tail.compare_exchange_strong( tail_was_me,NULL,wildcard(7))) ) { // acq_rel @@ -85,7 +85,7 @@ public: // (*1) catch the race : rl::linear_backoff bo; for(;;) { - // FIXME: SCFence infers relaxed / acquire + // SCFence infers relaxed / acquire!!! // Could be relaxed if wildcard(7) is acq_rel next = me->next.load(wildcard(8)); // acquire if ( next != NULL ) diff --git a/mcs-lock/mcs-lock.h b/mcs-lock/mcs-lock.h index 4b808f7..70e4019 100644 --- a/mcs-lock/mcs-lock.h +++ b/mcs-lock/mcs-lock.h @@ -69,7 +69,8 @@ public: if ( next == NULL ) { mcs_node * tail_was_me = me; - if ( m_tail.compare_exchange_strong( tail_was_me,NULL,std::mo_acq_rel) ) { + if ( m_tail.compare_exchange_strong( + tail_was_me,NULL,std::mo_release) ) { // got null in tail, mutex is unlocked return; } diff --git a/mcs-lock/note.txt b/mcs-lock/note.txt new file mode 100644 index 0000000..ef56cdc --- /dev/null +++ b/mcs-lock/note.txt @@ -0,0 +1,14 @@ +When lock() publishes the 'me' to the previous lock-holder + // prev->next.store(me, std::mo_release); +, it must synchronize with the unlock()'s + // me->next.load(std::mo_acquire); +because otherwise when it's the lock()'s turn to lock, it is allowed to always +read 1 in + // me->gate.load(std::mo_acquire) +such that it will end up with an infinite loop. + +To make this happen, we establish synchronization between the initialization of +the gate and the later update of gate. From the inference analysis, we actually +got two results, where wildcard(7) and wildcard(8) only need one acquire. That's +actually the place to synchronize the update of gate. We can achieve it at the +Tail or at the next field. diff --git a/mcs-lock/result1.txt b/mcs-lock/result1.txt new file mode 100644 index 0000000..6ba17d5 --- /dev/null +++ b/mcs-lock/result1.txt @@ -0,0 +1,10 @@ +Result 0: +wildcard 1 -> memory_order_relaxed +wildcard 2 -> memory_order_relaxed +wildcard 3 -> memory_order_acq_rel +wildcard 4 -> memory_order_release +wildcard 5 -> memory_order_acquire +wildcard 6 -> memory_order_acquire +wildcard 7 -> memory_order_acq_rel +wildcard 8 -> memory_order_relaxed +wildcard 9 -> memory_order_release diff --git a/mcs-lock/result2.txt b/mcs-lock/result2.txt new file mode 100644 index 0000000..6c708dc --- /dev/null +++ b/mcs-lock/result2.txt @@ -0,0 +1,10 @@ +Result 1: +wildcard 1 -> memory_order_relaxed +wildcard 2 -> memory_order_relaxed +wildcard 3 -> memory_order_acq_rel +wildcard 4 -> memory_order_release +wildcard 5 -> memory_order_acquire +wildcard 6 -> memory_order_acquire +wildcard 7 -> memory_order_release +wildcard 8 -> memory_order_acquire +wildcard 9 -> memory_order_release diff --git a/mcs-lock/testcase1.cc b/mcs-lock/testcase1.cc index 971bf1d..461f5b1 100644 --- a/mcs-lock/testcase1.cc +++ b/mcs-lock/testcase1.cc @@ -7,25 +7,29 @@ #include "librace.h" struct mcs_mutex *mutex; -static uint32_t shared; +static atomic_int shared; void threadA(void *arg) { mcs_mutex::guard g(mutex); printf("store: %d\n", 17); - store_32(&shared, 17); + //store_32(&shared, 17); + atomic_store_explicit(&shared, 17, relaxed); mutex->unlock(&g); mutex->lock(&g); - printf("load: %u\n", load_32(&shared)); + //printf("load: %u\n", load_32(&shared)); + atomic_load_explicit(&shared, relaxed); } void threadB(void *arg) { mcs_mutex::guard g(mutex); - printf("load: %u\n", load_32(&shared)); + //printf("load: %u\n", load_32(&shared)); + atomic_load_explicit(&shared, relaxed); mutex->unlock(&g); mutex->lock(&g); - printf("store: %d\n", 17); + atomic_store_explicit(&shared, 17, relaxed); + //printf("store: %d\n", 17); store_32(&shared, 17); } diff --git a/ms-queue/queue_wildcard.c b/ms-queue/queue_wildcard.c index df1702a..43a2de2 100644 --- a/ms-queue/queue_wildcard.c +++ b/ms-queue/queue_wildcard.c @@ -134,7 +134,7 @@ bool dequeue(queue_t *q, unsigned int *retVal) while (!success) { head = atomic_load_explicit(&q->head, wildcard(13)); // acquire - // FIXME: SCFence makes this acquire + // SCFence makes this acquire, and we actually need an acquire here!!! tail = atomic_load_explicit(&q->tail, wildcard(14)); // relaxed next = atomic_load_explicit(&q->nodes[get_ptr(head)].next, wildcard(15)); // acquire if (atomic_load_explicit(&q->head, wildcard(16)) == head) { // relaxed diff --git a/ms-queue/testcase1.c b/ms-queue/testcase1.c index 9bf12fb..6548366 100644 --- a/ms-queue/testcase1.c +++ b/ms-queue/testcase1.c @@ -5,7 +5,7 @@ #include "queue.h" #include "model-assert.h" -static int procs = 2; +static int procs = 4; static queue_t *queue; static thrd_t *threads; static unsigned int *input; @@ -29,15 +29,19 @@ static void main_task(void *param) { unsigned int val; int pid = *((int *)param); - if (!pid) { + if (pid % 4 == 0) { input[0] = 17; enqueue(queue, input[0]); succ1 = dequeue(queue, &output[0]); //printf("Dequeue: %d\n", output[0]); - } else { + } else if (pid % 4 == 1) { input[1] = 37; enqueue(queue, input[1]); succ2 = dequeue(queue, &output[1]); + } else if (pid % 4 == 2) { + + } else if (pid % 4 == 3) { + } } @@ -63,7 +67,7 @@ int user_main(int argc, char **argv) } for (i = 0; i < num_threads; i++) thrd_join(threads[i]); - +/* for (i = 0; i < num_threads; i++) { in_sum += input[i]; out_sum += output[i]; @@ -76,7 +80,7 @@ int user_main(int argc, char **argv) MODEL_ASSERT(in_sum == out_sum); else MODEL_ASSERT (false); - +*/ free(param); free(threads); free(queue); diff --git a/treiber-stack/stack.c b/treiber-stack/stack.c deleted file mode 100644 index 9fc501d..0000000 --- a/treiber-stack/stack.c +++ /dev/null @@ -1,123 +0,0 @@ -#include -#include -#include "librace.h" -#include "model-assert.h" - -#include "stack.h" - -#define MAX_FREELIST 4 /* Each thread can own up to MAX_FREELIST free nodes */ -#define INITIAL_FREE 2 /* Each thread starts with INITIAL_FREE free nodes */ - -#define POISON_IDX 0x666 - -static unsigned int (*free_lists)[MAX_FREELIST]; - -/* Search this thread's free list for a "new" node */ -static unsigned int new_node() -{ - int i; - int t = get_thread_num(); - for (i = 0; i < MAX_FREELIST; i++) { - //unsigned int node = load_32(&free_lists[t][i]); - unsigned int node = free_lists[t][i]; - if (node) { - //store_32(&free_lists[t][i], 0); - free_lists[t][i] = 0; - return node; - } - } - /* free_list is empty? */ - MODEL_ASSERT(0); - return 0; -} - -/* Place this node index back on this thread's free list */ -static void reclaim(unsigned int node) -{ - int i; - int t = get_thread_num(); - - /* Don't reclaim NULL node */ - //MODEL_ASSERT(node); - - for (i = 0; i < MAX_FREELIST; i++) { - /* Should never race with our own thread here */ - //unsigned int idx = load_32(&free_lists[t][i]); - unsigned int idx = free_lists[t][i]; - - /* Found empty spot in free list */ - if (idx == 0) { - //store_32(&free_lists[t][i], node); - free_lists[t][i] = node; - return; - } - } - /* free list is full? */ - MODEL_ASSERT(0); -} - -void init_stack(stack_t *s, int num_threads) -{ - int i, j; - - /* Initialize each thread's free list with INITIAL_FREE pointers */ - /* The actual nodes are initialized with poison indexes */ - free_lists = malloc(num_threads * sizeof(*free_lists)); - for (i = 0; i < num_threads; i++) { - for (j = 0; j < INITIAL_FREE; j++) { - free_lists[i][j] = 1 + i * MAX_FREELIST + j; - atomic_init(&s->nodes[free_lists[i][j]].next, MAKE_POINTER(POISON_IDX, 0)); - } - } - - /* initialize stack */ - atomic_init(&s->top, MAKE_POINTER(0, 0)); -} - -void push(stack_t *s, unsigned int val) { - unsigned int nodeIdx = new_node(); - node_t *node = &s->nodes[nodeIdx]; - node->value = val; - pointer oldTop, newTop; - bool success; - while (true) { - // acquire - oldTop = atomic_load_explicit(&s->top, acquire); - newTop = MAKE_POINTER(nodeIdx, get_count(oldTop) + 1); - // relaxed - atomic_store_explicit(&node->next, oldTop, relaxed); - - // release & relaxed - success = atomic_compare_exchange_strong_explicit(&s->top, &oldTop, - newTop, release, relaxed); - if (success) - break; - } -} - -unsigned int pop(stack_t *s) -{ - pointer oldTop, newTop, next; - node_t *node; - bool success; - int val; - while (true) { - // acquire - oldTop = atomic_load_explicit(&s->top, acquire); - if (get_ptr(oldTop) == 0) - return 0; - node = &s->nodes[get_ptr(oldTop)]; - // relaxed - next = atomic_load_explicit(&node->next, relaxed); - newTop = MAKE_POINTER(get_ptr(next), get_count(oldTop) + 1); - // release & relaxed - success = atomic_compare_exchange_strong_explicit(&s->top, &oldTop, - newTop, release, relaxed); - if (success) - break; - } - val = node->value; - /* Reclaim the used slot */ - reclaim(get_ptr(oldTop)); - return val; -} diff --git a/treiber-stack/testcase1.c b/treiber-stack/testcase1.c deleted file mode 100644 index b97af0f..0000000 --- a/treiber-stack/testcase1.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include - -#include "model-assert.h" - -#include "stack.h" - -static int procs = 4; -static stack_t *stack; -static thrd_t *threads; -static int num_threads; - -unsigned int idx1, idx2; -unsigned int a, b; - - -atomic_int x[3]; - -int get_thread_num() -{ - thrd_t curr = thrd_current(); - int i; - for (i = 0; i < num_threads; i++) - if (curr.priv == threads[i].priv) - return i; - MODEL_ASSERT(0); - return -1; -} - -static void main_task(void *param) -{ - unsigned int val; - int pid = *((int *)param); - - if (pid % 4 == 0) { - atomic_store_explicit(&x[1], 17, relaxed); - push(stack, 1); - } else if (pid % 4 == 1) { - atomic_store_explicit(&x[2], 37, relaxed); - push(stack, 2); - } else if (pid % 4 == 2) {/* - idx1 = pop(stack); - if (idx1 != 0) { - a = atomic_load_explicit(&x[idx1], relaxed); - printf("a: %d\n", a); - }*/ - } else { - idx2 = pop(stack); - if (idx2 != 0) { - b = atomic_load_explicit(&x[idx2], relaxed); - printf("b: %d\n", b); - } - } -} - -int user_main(int argc, char **argv) -{ - int i; - int *param; - unsigned int in_sum = 0, out_sum = 0; - - atomic_init(&x[1], 0); - atomic_init(&x[2], 0); - - stack = calloc(1, sizeof(*stack)); - - num_threads = procs; - threads = malloc(num_threads * sizeof(thrd_t)); - param = malloc(num_threads * sizeof(*param)); - - init_stack(stack, num_threads); - - for (i = 0; i < num_threads; i++) { - param[i] = i; - thrd_create(&threads[i], main_task, ¶m[i]); - } - for (i = 0; i < num_threads; i++) - thrd_join(threads[i]); - - bool correct = false; - //correct |= (a == 17 || a == 37 || a == 0); - //MODEL_ASSERT(correct); - - free(param); - free(threads); - free(stack); - - return 0; -}