{
mcs_node * tail_was_me = me;
bool success;
- // FIXME: SCFence infers acq_rel / release
+ // SCFence infers acq_rel / release !!!
// Could be release if wildcard(8) is acquire
if ( (success = m_tail.compare_exchange_strong(
tail_was_me,NULL,wildcard(7))) ) { // acq_rel
// (*1) catch the race :
rl::linear_backoff bo;
for(;;) {
- // FIXME: SCFence infers relaxed / acquire
+ // SCFence infers relaxed / acquire!!!
// Could be relaxed if wildcard(7) is acq_rel
next = me->next.load(wildcard(8)); // acquire
if ( next != NULL )
if ( next == NULL )
{
mcs_node * tail_was_me = me;
- if ( m_tail.compare_exchange_strong( tail_was_me,NULL,std::mo_acq_rel) ) {
+ if ( m_tail.compare_exchange_strong(
+ tail_was_me,NULL,std::mo_release) ) {
// got null in tail, mutex is unlocked
return;
}
--- /dev/null
+When lock() publishes the 'me' to the previous lock-holder
+ // prev->next.store(me, std::mo_release);
+, it must synchronize with the unlock()'s
+ // me->next.load(std::mo_acquire);
+because otherwise when it's the lock()'s turn to lock, it is allowed to always
+read 1 in
+ // me->gate.load(std::mo_acquire)
+such that it will end up with an infinite loop.
+
+To make this happen, we establish synchronization between the initialization of
+the gate and the later update of gate. From the inference analysis, we actually
+got two results, where wildcard(7) and wildcard(8) only need one acquire. That's
+actually the place to synchronize the update of gate. We can achieve it at the
+Tail or at the next field.
--- /dev/null
+Result 0:
+wildcard 1 -> memory_order_relaxed
+wildcard 2 -> memory_order_relaxed
+wildcard 3 -> memory_order_acq_rel
+wildcard 4 -> memory_order_release
+wildcard 5 -> memory_order_acquire
+wildcard 6 -> memory_order_acquire
+wildcard 7 -> memory_order_acq_rel
+wildcard 8 -> memory_order_relaxed
+wildcard 9 -> memory_order_release
--- /dev/null
+Result 1:
+wildcard 1 -> memory_order_relaxed
+wildcard 2 -> memory_order_relaxed
+wildcard 3 -> memory_order_acq_rel
+wildcard 4 -> memory_order_release
+wildcard 5 -> memory_order_acquire
+wildcard 6 -> memory_order_acquire
+wildcard 7 -> memory_order_release
+wildcard 8 -> memory_order_acquire
+wildcard 9 -> memory_order_release
#include "librace.h"
struct mcs_mutex *mutex;
-static uint32_t shared;
+static atomic_int shared;
void threadA(void *arg)
{
mcs_mutex::guard g(mutex);
printf("store: %d\n", 17);
- store_32(&shared, 17);
+ //store_32(&shared, 17);
+ atomic_store_explicit(&shared, 17, relaxed);
mutex->unlock(&g);
mutex->lock(&g);
- printf("load: %u\n", load_32(&shared));
+ //printf("load: %u\n", load_32(&shared));
+ atomic_load_explicit(&shared, relaxed);
}
void threadB(void *arg)
{
mcs_mutex::guard g(mutex);
- printf("load: %u\n", load_32(&shared));
+ //printf("load: %u\n", load_32(&shared));
+ atomic_load_explicit(&shared, relaxed);
mutex->unlock(&g);
mutex->lock(&g);
- printf("store: %d\n", 17);
+ atomic_store_explicit(&shared, 17, relaxed);
+ //printf("store: %d\n", 17);
store_32(&shared, 17);
}
while (!success) {
head = atomic_load_explicit(&q->head, wildcard(13)); // acquire
- // FIXME: SCFence makes this acquire
+ // SCFence makes this acquire, and we actually need an acquire here!!!
tail = atomic_load_explicit(&q->tail, wildcard(14)); // relaxed
next = atomic_load_explicit(&q->nodes[get_ptr(head)].next, wildcard(15)); // acquire
if (atomic_load_explicit(&q->head, wildcard(16)) == head) { // relaxed
#include "queue.h"
#include "model-assert.h"
-static int procs = 2;
+static int procs = 4;
static queue_t *queue;
static thrd_t *threads;
static unsigned int *input;
{
unsigned int val;
int pid = *((int *)param);
- if (!pid) {
+ if (pid % 4 == 0) {
input[0] = 17;
enqueue(queue, input[0]);
succ1 = dequeue(queue, &output[0]);
//printf("Dequeue: %d\n", output[0]);
- } else {
+ } else if (pid % 4 == 1) {
input[1] = 37;
enqueue(queue, input[1]);
succ2 = dequeue(queue, &output[1]);
+ } else if (pid % 4 == 2) {
+
+ } else if (pid % 4 == 3) {
+
}
}
}
for (i = 0; i < num_threads; i++)
thrd_join(threads[i]);
-
+/*
for (i = 0; i < num_threads; i++) {
in_sum += input[i];
out_sum += output[i];
MODEL_ASSERT(in_sum == out_sum);
else
MODEL_ASSERT (false);
-
+*/
free(param);
free(threads);
free(queue);
+++ /dev/null
-#include <threads.h>
-#include <stdlib.h>
-#include "librace.h"
-#include "model-assert.h"
-
-#include "stack.h"
-
-#define MAX_FREELIST 4 /* Each thread can own up to MAX_FREELIST free nodes */
-#define INITIAL_FREE 2 /* Each thread starts with INITIAL_FREE free nodes */
-
-#define POISON_IDX 0x666
-
-static unsigned int (*free_lists)[MAX_FREELIST];
-
-/* Search this thread's free list for a "new" node */
-static unsigned int new_node()
-{
- int i;
- int t = get_thread_num();
- for (i = 0; i < MAX_FREELIST; i++) {
- //unsigned int node = load_32(&free_lists[t][i]);
- unsigned int node = free_lists[t][i];
- if (node) {
- //store_32(&free_lists[t][i], 0);
- free_lists[t][i] = 0;
- return node;
- }
- }
- /* free_list is empty? */
- MODEL_ASSERT(0);
- return 0;
-}
-
-/* Place this node index back on this thread's free list */
-static void reclaim(unsigned int node)
-{
- int i;
- int t = get_thread_num();
-
- /* Don't reclaim NULL node */
- //MODEL_ASSERT(node);
-
- for (i = 0; i < MAX_FREELIST; i++) {
- /* Should never race with our own thread here */
- //unsigned int idx = load_32(&free_lists[t][i]);
- unsigned int idx = free_lists[t][i];
-
- /* Found empty spot in free list */
- if (idx == 0) {
- //store_32(&free_lists[t][i], node);
- free_lists[t][i] = node;
- return;
- }
- }
- /* free list is full? */
- MODEL_ASSERT(0);
-}
-
-void init_stack(stack_t *s, int num_threads)
-{
- int i, j;
-
- /* Initialize each thread's free list with INITIAL_FREE pointers */
- /* The actual nodes are initialized with poison indexes */
- free_lists = malloc(num_threads * sizeof(*free_lists));
- for (i = 0; i < num_threads; i++) {
- for (j = 0; j < INITIAL_FREE; j++) {
- free_lists[i][j] = 1 + i * MAX_FREELIST + j;
- atomic_init(&s->nodes[free_lists[i][j]].next, MAKE_POINTER(POISON_IDX, 0));
- }
- }
-
- /* initialize stack */
- atomic_init(&s->top, MAKE_POINTER(0, 0));
-}
-
-void push(stack_t *s, unsigned int val) {
- unsigned int nodeIdx = new_node();
- node_t *node = &s->nodes[nodeIdx];
- node->value = val;
- pointer oldTop, newTop;
- bool success;
- while (true) {
- // acquire
- oldTop = atomic_load_explicit(&s->top, acquire);
- newTop = MAKE_POINTER(nodeIdx, get_count(oldTop) + 1);
- // relaxed
- atomic_store_explicit(&node->next, oldTop, relaxed);
-
- // release & relaxed
- success = atomic_compare_exchange_strong_explicit(&s->top, &oldTop,
- newTop, release, relaxed);
- if (success)
- break;
- }
-}
-
-unsigned int pop(stack_t *s)
-{
- pointer oldTop, newTop, next;
- node_t *node;
- bool success;
- int val;
- while (true) {
- // acquire
- oldTop = atomic_load_explicit(&s->top, acquire);
- if (get_ptr(oldTop) == 0)
- return 0;
- node = &s->nodes[get_ptr(oldTop)];
- // relaxed
- next = atomic_load_explicit(&node->next, relaxed);
- newTop = MAKE_POINTER(get_ptr(next), get_count(oldTop) + 1);
- // release & relaxed
- success = atomic_compare_exchange_strong_explicit(&s->top, &oldTop,
- newTop, release, relaxed);
- if (success)
- break;
- }
- val = node->value;
- /* Reclaim the used slot */
- reclaim(get_ptr(oldTop));
- return val;
-}
+++ /dev/null
-#include <stdlib.h>
-#include <stdio.h>
-#include <threads.h>
-
-#include "model-assert.h"
-
-#include "stack.h"
-
-static int procs = 4;
-static stack_t *stack;
-static thrd_t *threads;
-static int num_threads;
-
-unsigned int idx1, idx2;
-unsigned int a, b;
-
-
-atomic_int x[3];
-
-int get_thread_num()
-{
- thrd_t curr = thrd_current();
- int i;
- for (i = 0; i < num_threads; i++)
- if (curr.priv == threads[i].priv)
- return i;
- MODEL_ASSERT(0);
- return -1;
-}
-
-static void main_task(void *param)
-{
- unsigned int val;
- int pid = *((int *)param);
-
- if (pid % 4 == 0) {
- atomic_store_explicit(&x[1], 17, relaxed);
- push(stack, 1);
- } else if (pid % 4 == 1) {
- atomic_store_explicit(&x[2], 37, relaxed);
- push(stack, 2);
- } else if (pid % 4 == 2) {/*
- idx1 = pop(stack);
- if (idx1 != 0) {
- a = atomic_load_explicit(&x[idx1], relaxed);
- printf("a: %d\n", a);
- }*/
- } else {
- idx2 = pop(stack);
- if (idx2 != 0) {
- b = atomic_load_explicit(&x[idx2], relaxed);
- printf("b: %d\n", b);
- }
- }
-}
-
-int user_main(int argc, char **argv)
-{
- int i;
- int *param;
- unsigned int in_sum = 0, out_sum = 0;
-
- atomic_init(&x[1], 0);
- atomic_init(&x[2], 0);
-
- stack = calloc(1, sizeof(*stack));
-
- num_threads = procs;
- threads = malloc(num_threads * sizeof(thrd_t));
- param = malloc(num_threads * sizeof(*param));
-
- init_stack(stack, num_threads);
-
- for (i = 0; i < num_threads; i++) {
- param[i] = i;
- thrd_create(&threads[i], main_task, ¶m[i]);
- }
- for (i = 0; i < num_threads; i++)
- thrd_join(threads[i]);
-
- bool correct = false;
- //correct |= (a == 17 || a == 37 || a == 0);
- //MODEL_ASSERT(correct);
-
- free(param);
- free(threads);
- free(stack);
-
- return 0;
-}