From: Brian Norris Date: Wed, 6 Mar 2013 00:29:51 +0000 (-0800) Subject: ms-queue: cleanups, convert to C11 atomics X-Git-Tag: oopsla2013-final~36 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=509ce2ac5c3372c0527e4b66d1088bcdd7b94cdf;p=model-checker-benchmarks.git ms-queue: cleanups, convert to C11 atomics This converts the "pointer" to a 64-bit type (unsigned long long, or 'pointer') divided into two 32-bit halves (ptr and count), instead of using a union of a struct and an 'unsigned long'. All atomics are seq_cst for now. Once the code all looks a little better, I'll relax these to reasonable memory orderings. --- diff --git a/ms-queue/args.c b/ms-queue/args.c index 89101e5..6fe19de 100644 --- a/ms-queue/args.c +++ b/ms-queue/args.c @@ -1,25 +1,17 @@ #include "main.h" extern unsigned iterations; -extern unsigned multi; -extern unsigned initial_nodes; extern unsigned procs; -extern unsigned repetitions; -extern unsigned work; void parse_args(int argc, char **argv) { - extern char * optarg; + extern char *optarg; int c; - while ((c = getopt(argc, argv, "i:m:n:p:r:w:")) != EOF) + while ((c = getopt(argc, argv, "i:p:")) != EOF) switch(c) { case 'i': iterations = atoi(optarg); break; - case 'm': multi = atoi(optarg); break; - case 'n': initial_nodes = atoi(optarg); break; case 'p': procs = atoi(optarg); break; - case 'r': repetitions = atoi(optarg); break; - case 'w': work = atoi(optarg); break; default: assert(0); } diff --git a/ms-queue/main.c b/ms-queue/main.c index 015fbd4..30d9da9 100644 --- a/ms-queue/main.c +++ b/ms-queue/main.c @@ -1,78 +1,52 @@ #include "main.h" #include -#define NUM_PROCESSORS 12 - -struct tms tim; -struct tms tim1; - -int shmid; - -unsigned pid; -char* name = ""; -unsigned procs = 1; -unsigned multi = 1; +unsigned procs = 2; unsigned iterations = 1; -unsigned initial_nodes = 0; -unsigned repetitions = 1; -unsigned work = 0; private_t private; shared_mem_t *smp; -void time_test() +static void main_task(void *param) { - unsigned i,j; - struct tms time_val; - clock_t t1, t2; + unsigned i, j; unsigned val; + int pid = *((int *)param); - if(pid==0) { - init_queue(); - } init_memory(); - init_private(); - for(i=0;i>1))/(procs*multi); + iterations = (iterations + (procs >> 1)) / procs; smp = (shared_mem_t *)calloc(1, sizeof(shared_mem_t)); assert(smp); - num_threads = procs * multi; + num_threads = procs; t = malloc(num_threads * sizeof(thrd_t)); + param = malloc(num_threads * sizeof(*param)); - for (i = 0; i < num_threads; i++) - thrd_create(&t[i], main_task, NULL); + init_queue(); + for (i = 0; i < num_threads; i++) { + param[i] = i; + thrd_create(&t[i], main_task, ¶m[i]); + } for (i = 0; i < num_threads; i++) thrd_join(t[i]); + free(param); free(t); free(smp); diff --git a/ms-queue/my_queue.c b/ms-queue/my_queue.c index 1f8a446..8fedd9c 100644 --- a/ms-queue/my_queue.c +++ b/ms-queue/my_queue.c @@ -1,15 +1,13 @@ #include "main.h" -extern unsigned pid; -extern unsigned iterations; -extern unsigned initial_nodes; +extern unsigned int iterations; extern private_t private; -extern shared_mem_t* smp; +extern shared_mem_t *smp; -void init_private() +void init_private(int pid) { - private.node = 2 + initial_nodes + pid; - private.value = 1 + initial_nodes + (pid * iterations); + private.node = 2 + pid; + private.value = 1 + (pid * iterations); } @@ -17,114 +15,116 @@ void init_memory() { } -static unsigned new_node() +static unsigned int new_node() { return private.node; } -static void reclaim(unsigned node) +static void reclaim(unsigned int node) { private.node = node; } void init_queue() { - unsigned i; + unsigned int i; + pointer head; + pointer tail; + pointer next; /* initialize queue */ - smp->head.sep.ptr = 1; - smp->head.sep.count = 0; - smp->tail.sep.ptr = 1; - smp->tail.sep.count = 0; - smp->nodes[1].next.sep.ptr = NULL; - smp->nodes[1].next.sep.count = 0; + head = MAKE_POINTER(1, 0); + tail = MAKE_POINTER(1, 0); + next = MAKE_POINTER(0, 0); // (NULL, 0) + + atomic_init(&smp->nodes[0].next, 0); // assumed inititalized in original example + + atomic_store(&smp->head, head); + atomic_store(&smp->tail, tail); + atomic_store(&smp->nodes[1].next, next); /* initialize avail list */ - for (i=2; inodes[i].next.sep.ptr = i+1; - smp->nodes[i].next.sep.count = 0; - } - smp->nodes[MAX_NODES].next.sep.ptr = NULL; - smp->nodes[MAX_NODES].next.sep.count = 0; - - /* initialize queue contents */ - if (initial_nodes > 0) { - for (i=2; inodes[i].value = i; - smp->nodes[i-1].next.sep.ptr = i; - smp->nodes[i].next.sep.ptr = NULL; - } - smp->head.sep.ptr = 1; - smp->tail.sep.ptr = 1 + initial_nodes; + for (i = 2; i < MAX_NODES; i++) { + next = MAKE_POINTER(i + 1, 0); + atomic_store(&smp->nodes[i].next, next); } + + next = MAKE_POINTER(0, 0); // (NULL, 0) + atomic_store(&smp->nodes[MAX_NODES].next, next); } -void enqueue(unsigned val) +void enqueue(unsigned int val) { - unsigned success; - unsigned node; - pointer_t tail; - pointer_t next; + unsigned int success = 0; + unsigned int node; + pointer tail; + pointer next; + pointer tmp; node = new_node(); smp->nodes[node].value = val; - smp->nodes[node].next.sep.ptr = NULL; + tmp = atomic_load(&smp->nodes[node].next); + set_ptr(&tmp, 0); // NULL + atomic_store(&smp->nodes[node].next, tmp); - for (success = FALSE; success == FALSE; ) { - tail.con = smp->tail.con; - next.con = smp->nodes[tail.sep.ptr].next.con; - if (tail.con == smp->tail.con) { - if (next.sep.ptr == NULL) { - success = cas(&smp->nodes[tail.sep.ptr].next, - next.con, - MAKE_LONG(node, next.sep.count+1)); + while (!success) { + tail = atomic_load(&smp->tail); + next = atomic_load(&smp->nodes[get_ptr(tail)].next); + if (tail == atomic_load(&smp->tail)) { + if (get_ptr(next) == 0) { // == NULL + pointer val = MAKE_POINTER(node, get_count(next) + 1); + success = atomic_compare_exchange_weak(&smp->nodes[get_ptr(tail)].next, + &next, + val); } - if (success == FALSE) { - cas(&smp->tail, - tail.con, - MAKE_LONG(smp->nodes[tail.sep.ptr].next.sep.ptr, - tail.sep.count+1)); + if (!success) { + unsigned int ptr = get_ptr(atomic_load(&smp->nodes[get_ptr(tail)].next)); + pointer val = MAKE_POINTER(ptr, + get_count(tail) + 1); + atomic_compare_exchange_strong(&smp->tail, + &tail, + val); thrd_yield(); } } } - cas(&smp->tail, - tail.con, - MAKE_LONG(node, tail.sep.count+1)); + atomic_compare_exchange_strong(&smp->tail, + &tail, + MAKE_POINTER(node, get_count(tail) + 1)); } -unsigned dequeue() +unsigned int dequeue() { - unsigned value; - unsigned success; - pointer_t head; - pointer_t tail; - pointer_t next; + unsigned int value; + unsigned int success; + pointer head; + pointer tail; + pointer next; for (success = FALSE; success == FALSE; ) { - head.con = smp->head.con; - tail.con = smp->tail.con; - next.con = smp->nodes[head.sep.ptr].next.con; - if (smp->head.con == head.con) { - if (head.sep.ptr == tail.sep.ptr) { - if (next.sep.ptr == NULL) { - return NULL; + head = atomic_load(&smp->head); + tail = atomic_load(&smp->tail); + next = atomic_load(&smp->nodes[get_ptr(head)].next); + if (atomic_load(&smp->head) == head) { + if (get_ptr(head) == get_ptr(tail)) { + if (get_ptr(next) == 0) { // NULL + return 0; // NULL } - cas(&smp->tail, - tail.con, - MAKE_LONG(next.sep.ptr, tail.sep.count+1)); + atomic_compare_exchange_weak(&smp->tail, + &tail, + MAKE_POINTER(get_ptr(next), get_count(tail) + 1)); thrd_yield(); } else { - value = smp->nodes[next.sep.ptr].value; - success = cas(&smp->head, - head.con, - MAKE_LONG(next.sep.ptr, head.sep.count+1)); + value = smp->nodes[get_ptr(next)].value; + success = atomic_compare_exchange_weak(&smp->head, + &head, + MAKE_POINTER(get_ptr(next), get_count(head) + 1)); if (success == FALSE) { thrd_yield(); } } } } - reclaim(head.sep.ptr); + reclaim(get_ptr(head)); return value; } diff --git a/ms-queue/my_queue.h b/ms-queue/my_queue.h index 3e3f435..519e9e3 100644 --- a/ms-queue/my_queue.h +++ b/ms-queue/my_queue.h @@ -3,41 +3,43 @@ #define TRUE 1 #define FALSE 0 -#define MAX_NODES 0xff +#define MAX_NODES 0xf #define MAX_SERIAL 10000 -#define MAKE_LONG(lo, hi) ((hi)<<16)+(lo) +typedef unsigned long long pointer; +typedef atomic_ullong pointer_t; -typedef union pointer { - struct { - volatile unsigned short count; - volatile unsigned short ptr; - } sep; - atomic_ulong con; -} pointer_t; +#define MAKE_POINTER(ptr, count) ((((pointer)count) << 32) | ptr) +#define PTR_MASK 0xffffffffLL +#define COUNT_MASK (0xffffffffLL << 32) + +static inline void set_count(pointer *p, unsigned int val) { *p = (*p & ~COUNT_MASK) | ((pointer)val << 32); } +static inline void set_ptr(pointer *p, unsigned int val) { *p = (*p & ~PTR_MASK) | val; } +static inline unsigned int get_count(pointer p) { return p & PTR_MASK; } +static inline unsigned int get_ptr(pointer p) { return (p & COUNT_MASK) >> 32; } typedef struct node { - unsigned value; + unsigned int value; pointer_t next; - unsigned foo[30]; + unsigned int foo[30]; } node_t; typedef struct private { - unsigned node; - unsigned value; - unsigned serial[MAX_SERIAL]; + unsigned int node; + unsigned int value; + unsigned int serial[MAX_SERIAL]; } private_t; typedef struct shared_mem { pointer_t head; - unsigned foo1[31]; + unsigned int foo1[31]; pointer_t tail; - unsigned foo2[31]; + unsigned int foo2[31]; node_t nodes[MAX_NODES+1]; - unsigned serial; + unsigned int serial; } shared_mem_t; -void init_private(); +void init_private(int pid); void init_memory(); void init_queue(); -unsigned dequeue(); +unsigned int dequeue();