1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
3 #include "structdefs.h"
6 #define INLINE inline __attribute__((always_inline))
15 ////////////////////////////////////////////////////////////////
16 // global variables //
17 ///////////////////////////////////////////////////////////////
19 // record the starting time
20 unsigned long long bamboo_start_time;
28 #ifdef GC_SMALLPAGESIZE
29 #define BAMBOO_GLOBAL_DEFS_SIZE (1024 * 1024)
30 #define BAMBOO_GLOBAL_DEFS_PRIM_SIZE (1024 * 512)
32 #define BAMBOO_GLOBAL_DEFS_SIZE (BAMBOO_SMEM_SIZE)
33 #define BAMBOO_GLOBAL_DEFS_PRIM_SIZE (BAMBOO_SMEM_SIZE/2)
34 #endif // GC_SMALLPAGESIZE
38 // shared memory pointer for global thread queue
39 // In MGC version, this block of memory is located at the very bottom of the
40 // shared memory with the base address as BAMBOO_BASE_VA.
41 // The bottom of the shared memory = global thread queue + sbstart tbl
42 // + smemtbl + NUMCORES4GC bamboo_rmsp
43 // This queue is always reside at the bottom of the shared memory. It is
44 // considered as runtime structure, during gc, it is scanned for mark and flush
45 // phase but never been compacted.
47 // This is a loop array and the first 4 int fields of the queue are:
48 // mutex + thread counter + start pointer + end pointer
49 #ifdef GC_SMALLPAGESIZE
50 #define BAMBOO_THREAD_QUEUE_SIZE (1024 * 1024)
52 #define BAMBOO_THREAD_QUEUE_SIZE (BAMBOO_SMEM_SIZE) // (45 * 16 * 1024)
54 // data structures for threads
55 INTPTR * bamboo_thread_queue;
56 unsigned int bamboo_max_thread_num_mask;
57 INTPTR bamboo_current_thread;
62 // data structures for msgs
63 #define BAMBOO_OUT_BUF_LENGTH 2048
64 #define BAMBOO_OUT_BUF_MASK (0x7FF)
65 #define BAMBOO_MSG_BUF_LENGTH 2048
66 #define BAMBOO_MSG_BUF_MASK (0x7FF)
67 int msgdata[BAMBOO_MSG_BUF_LENGTH];
68 volatile int msgdataindex;
69 volatile int msgdatalast;
71 volatile bool msgdatafull;
72 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
76 volatile bool isMsgHanging;
78 #define MSG_INDEXINC_I() \
79 msgdataindex = (msgdataindex + 1) & (BAMBOO_MSG_BUF_MASK)
81 #define MSG_LASTINDEXINC_I() \
82 msgdatalast = (msgdatalast + 1) & (BAMBOO_MSG_BUF_MASK)
84 #define MSG_CACHE_I(n) \
85 msgdata[msgdatalast] = (n); \
88 // NOTE: if msgdataindex == msgdatalast, it always means that the buffer if
89 // full. In the case that the buffer is empty, should never call this
91 #define MSG_REMAINSIZE_I(s) \
92 if(msgdataindex < msgdatalast) { \
93 (*(int*)s) = msgdatalast - msgdataindex; \
94 } else if((msgdataindex == msgdatalast) && (!msgdatafull)) { \
97 (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
100 #define OUTMSG_INDEXINC() \
101 outmsgindex = (outmsgindex + 1) & (BAMBOO_OUT_BUF_MASK)
103 #define OUTMSG_LASTINDEXINC() \
104 outmsglast = (outmsglast + 1) & (BAMBOO_OUT_BUF_MASK); \
105 if(outmsglast == outmsgindex) { \
106 BAMBOO_EXIT(0xd101); \
109 #define OUTMSG_CACHE(n) \
110 outmsgdata[outmsglast] = (n); \
111 OUTMSG_LASTINDEXINC();
113 #define MAX_PACKET_WORDS 5
117 * type: 1 -- transfer object
118 * 2 -- transfer stall msg
123 * // add for profile info
124 * 7 -- transfer profile output msg
125 * 8 -- transfer profile output finish msg
126 * // add for alias lock strategy
127 * 9 -- redirect lock request
128 * a -- lock grant with redirect info
129 * b -- lock deny with redirect info
130 * c -- lock release with redirect info
131 * d -- status confirm request
132 * e -- status report msg
134 * 10 -- requiring for new memory
135 * 11 -- response for new memory request
136 * 12 -- GC init phase start
138 * 14 -- compact phase start
139 * 15 -- flush phase start
140 * 16 -- init phase finish
141 * 17 -- mark phase finish
142 * 18 -- compact phase finish
143 * 19 -- flush phase finish
145 * 1b -- marked phase finish confirm request
146 * 1c -- marked phase finish confirm response
147 * 1d -- markedObj msg
148 * 1e -- start moving objs msg
149 * 1f -- ask for mapping info of a markedObj
150 * 20 -- mapping info of a markedObj
151 * 21 -- large objs info request
152 * 22 -- large objs info response
153 * 23 -- large objs mapping info
155 * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
156 * StallMsg: 2 + corenum + sendobjs + receiveobjs
157 * (size is always 4 * sizeof(int))
158 * LockMsg: 3 + lock type + obj pointer + lock + request core
159 * (size is always 5 * sizeof(int))
160 * 4/5/6 + lock type + obj pointer + lock
161 * (size is always 4 * sizeof(int))
162 * 9 + lock type + obj pointer + redirect lock + root request core
164 * (size is always 6 * sizeof(int))
165 * a/b + lock type + obj pointer + redirect lock
166 * (size is always 4 * sizeof(int))
167 * c + lock type + lock + redirect lock
168 * (size is always 4 * sizeof(int))
169 * lock type: 0 -- read; 1 -- write
170 * ProfileMsg: 7 + totalexetime
171 * (size is always 2 * sizeof(int))
173 * (size is always 2 * sizeof(int))
174 * StatusMsg: d (size is always 1 * sizeof(int))
175 * e + status + corenum + sendobjs + receiveobjs
176 * (size is always 5 * sizeof(int))
177 * status: 0 -- stall; 1 -- busy
178 * TerminateMsg: f (size is always 1 * sizeof(int)
179 * MemoryMsg: 10 + size + corenum
180 * (size is always 3 * sizeof(int))
181 * 11 + base_va + size
182 * (size is always 3 * sizeof(int))
183 * GCMsg: 12/13 (size is always 1 * sizeof(int))
184 * 14 + size of msg + (num of objs to move + (start address
185 * + end address + dst core + start dst)+)?
186 * + (num of incoming objs + (start dst + orig core)+)?
187 * + (num of large obj lists + (start address + lenght
189 * 15 (size is always 1 * sizeof(int))
191 * (size is always 2 * sizeof(int))
192 * 17 + corenum + gcsendobjs + gcreceiveobjs
193 * (size if always 4 * sizeof(int))
194 * 18 + corenum + fulfilled blocks num + (finish compact(1) + current
195 * heap top)/(need mem(0) + mem need)
196 * size is always 5 * sizeof(int))
198 * (size is always 2 * sizeof(int))
199 * 1a (size is always 1 * sizeof(int))
200 * 1b (size if always 1 * sizeof(int))
201 * 1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
202 * (size is always 5 * sizeof(int))
203 * 1d + obj's address + request core
204 * (size is always 3 * sizeof(int))
205 * 1e + corenum + start addr + end addr
206 * (size if always 4 * sizeof(int))
207 * 1f + obj's address + corenum
208 * (size is always 3 * sizeof(int))
209 * 20 + obj's address + dst address
210 * (size if always 3 * sizeof(int))
211 * 21 (size is always 1 * sizeof(int))
212 * 22 + size of msg + corenum + current heap size
213 * + (num of large obj lists + (start address + length)+)?
214 * 23 + orig large obj ptr + new large obj ptr
215 * (size is always 3 * sizeof(int))
218 MSGSTART = 0xD0, // 0xD0
225 PROFILEOUTPUT, // 0xD7
226 PROFILEFINISH, // 0xD8
227 REDIRECTLOCK, // 0xD9
228 REDIRECTGROUNT, // 0xDa
229 REDIRECTDENY, // 0xDb
230 REDIRECTRELEASE, // 0xDc
231 STATUSCONFIRM, // 0xDd
232 STATUSREPORT, // 0xDe
240 GCSTARTCOMPACT, // 0xE5
241 GCSTARTMAPINFO, // 0xE6
242 GCSTARTFLUSH, // 0xE7
244 GCFINISHINIT, // 0xE9
245 GCFINISHMARK, // 0xEa
246 GCFINISHCOMPACT, // 0xEb
247 GCFINISHMAPINFO, // 0xEc
248 GCFINISHFLUSH, // 0xEd
250 GCMARKCONFIRM, // 0xEf
251 GCMARKREPORT, // 0xF0
254 GCMAPREQUEST, // 0xF3
257 GCLOBJREQUEST, // 0xF6
259 GCLOBJMAPPING, // 0xF8
263 #ifdef GC_CACHE_ADAPT
264 GCSTARTPOSTINIT, // 0xFa
266 GCFINISHPOSTINIT, // 0xFc
267 GCFINISHPREF, // 0xFd
268 #endif // GC_CACHE_ADAPT
269 #endif // MULTICORE_GC
273 /////////////////////////////////////////////////////////////////////////////////
274 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor.
275 // No greater than the number of all the cores in
277 // NUMCORES -- number of cores chosen to deploy the application. It can
278 // be greater than that required to fully parallelize the
279 // application. The same as NUMCORES.
280 // NUMCORESACTIVE -- number of cores that really execute the
281 // application. No greater than NUMCORES
282 // NUMCORES4GC -- number of cores for gc. No greater than NUMCORES.
283 // NOTE: currently only support ontinuous cores as gc
284 // cores, i.e. 0~NUMCORES4GC-1
285 ////////////////////////////////////////////////////////////////////////////////
286 // data structures of status for termination
287 // only check working cores
288 volatile int corestatus[NUMCORESACTIVE]; // records status of each core
291 volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core
293 volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a
295 volatile int numconfirm;
296 volatile bool waitconfirm;
298 int self_numsendobjs;
299 int self_numreceiveobjs;
301 // TASK specific data structures
303 // get rid of lock msgs for GC version
305 // data structures for locking
306 struct RuntimeHash locktable;
307 static struct RuntimeHash* locktbl = &locktable;
308 struct RuntimeHash * lockRedirectTbl;
309 struct RuntimeHash * objRedirectLockTbl;
310 #endif // ifndef MULTICORE_GC
320 // data structures for waiting objs
321 struct Queue objqueue;
322 struct Queue * totransobjqueue; // queue to hold objs to be transferred
323 // should be cleared whenever enter a task
328 // data structures for profile mode
330 #define TASKINFOLENGTH 3000 // 0
331 #ifdef PROFILE_INTERRUPT
332 #define INTERRUPTINFOLENGTH 50 //0
333 #endif // PROFILE_INTERRUPT
335 typedef struct task_info {
337 unsigned long long startTime;
338 unsigned long long endTime;
339 unsigned long long exitIndex;
340 struct Queue * newObjs;
343 TaskInfo * taskInfoArray[TASKINFOLENGTH];
345 bool taskInfoOverflow;
346 #ifdef PROFILE_INTERRUPT
347 typedef struct interrupt_info {
348 unsigned long long startTime;
349 unsigned long long endTime;
352 InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
353 int interruptInfoIndex;
354 bool interruptInfoOverflow;
355 #endif // PROFILE_INTERUPT
356 volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
359 #endif // #ifdef PROFILE
362 #include "multicoremem.h"
364 /////////////////////////////////////////////////////////////
366 ////////////////////////////////////////////////////////////
367 // these are functions should be implemented in //
368 // multicore runtime for any multicore processors //
369 ////////////////////////////////////////////////////////////
371 INLINE void initialization(void);
372 INLINE void initCommunication(void);
373 INLINE void fakeExecution(void);
374 INLINE void terminate(void);
375 INLINE void initlock(struct ___Object___ * v);
376 #ifdef BAMBOO_MEMPROF
377 INLINE void terminatememprof(void);
378 #endif // BAMBOO_MEMPROF
380 // msg related functions
381 INLINE void send_hanging_msg(bool isInterrupt);
382 INLINE void send_msg_1(int targetcore,
385 INLINE void send_msg_2(int targetcore,
389 INLINE void send_msg_3(int targetcore,
394 INLINE void send_msg_4(int targetcore,
400 INLINE void send_msg_5(int targetcore,
407 INLINE void send_msg_6(int targetcore,
415 INLINE void cache_msg_1(int targetcore,
417 INLINE void cache_msg_2(int targetcore,
420 INLINE void cache_msg_3(int targetcore,
424 INLINE void cache_msg_4(int targetcore,
429 INLINE void cache_msg_5(int targetcore,
435 INLINE void cache_msg_6(int targetcore,
442 INLINE int receiveMsg(unsigned int send_port_pending);
445 INLINE void transferMarkResults();
446 #endif // MULTICORE_GC
449 // lock related functions
450 bool getreadlock(void* ptr);
451 void releasereadlock(void* ptr);
452 bool getwritelock(void* ptr);
453 void releasewritelock(void* ptr);
454 bool getwritelock_I(void* ptr);
455 void releasewritelock_I(void * ptr);
457 void releasewritelock_r(void * lock, void * redirectlock);
458 #endif // ifndef MULTICORE_GC
459 /* this function is to process lock requests.
460 * can only be invoked in receiveObject() */
461 // if return -1: the lock request is redirected
462 // 0: the lock request is approved
463 // 1: the lock request is denied
464 INLINE int processlockrequest(int locktype,
470 INLINE void processlockrelease(int locktype,
475 // msg related functions
476 INLINE void transferObject(struct transObjInfo * transObj);
479 INLINE void profileTaskStart(char * taskname);
480 INLINE void profileTaskEnd(void);
481 void outputProfileData();
482 #endif // #ifdef PROFILE
483 ///////////////////////////////////////////////////////////
485 /////////////////////////////////////////////////////////////////////////////
486 // For each version of BAMBOO runtime, there should be a header file named //
487 // runtim_arch.h defining following MARCOS: //
488 // BAMBOO_NUM_OF_CORE: the # of current residing core //
489 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core //
490 // BAMBOO_COORDS(c, x, y): convert the cpu # to coords (*x, *y) //
491 // BAMBOO_DEBUGPRINT(x): print out integer x //
492 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x //
493 // BAMBOO_EXIT_APP(x): exit the whole application //
494 // BAMBOO_EXIT(x): error exit routine with error # //
495 // BAMBOO_DIE(x): error exit routine with error msg //
496 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number //
497 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in //
498 // BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(): change to runtime mode from //
500 // BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(): change to client mode from //
502 // BAMBOO_ENTER_SEND_MODE_FROM_CLIENT(): change to send mode from //
504 // BAMBOO_ENTER_CLIENT_MODE_FROM_SEND(): change to client mode from //
506 // BAMBOO_ENTER_RUNTIME_MODE_FROM_SEND(): change to runtime mode from //
508 // BAMBOO_ENTER_SEND_MODE_FROM_RUNTIME(): change to send mode from //
510 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock //
511 // request response //
512 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of //
513 // whose size in bytes is y on local memory //
514 // which is given by the hypervisor //
515 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory //
516 // BAMBOO_LOCAL_MEM_CLOSE(): close the local heap //
517 // BAMBOO_LOCAL_MEM_CALLOC_S(x, y): allocate an array of x elements each of//
518 // whose size in bytes is y on local //
519 // memory which is not from the hypervisor//
520 // but is allocated from the free memory //
521 // BAMBOO_LOCAL_MEM_FREE_S(x): free space with ptr x on self-allocated //
523 // BAMBOO_LOCAL_MEM_CLOSE_S(): close the self-allocated local heap //
524 // BAMBOO_SHARE_MEM_CALLOC_I(x, y): allocate an array of x elements each of//
525 // whose size in bytes is y on shared memory//
526 // BAMBOO_SHARE_MEM_CLOSE(): close the shared heap //
527 // BAMBOO_CACHE_LINE_SIZE: the cache line size //
528 // BAMBOO_CACHE_LINE_MASK: mask for a cache line //
529 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with //
531 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary //
532 // BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start //
533 // address x, size z) to value y with write //
534 // hint, the processor will not fetch the //
535 // current content of the memory and directly //
537 // BAMBOO_CLEAN_DTLB(): zero-out all the dtlb entries //
538 // BAMBOO_CACHE_FLUSH_L2(): Flush the contents of this tile's L2 back to //
540 // BAMBOO_CACHE_FLUSH_RANGE_NO_FENCE(x, y): flush a range of mem without //
542 // BAMBOO_CACHE_MEM_FENCE_INCOHERENT(): fence to guarantee visibility of //
543 // stores to incoherent memory //
544 /////////////////////////////////////////////////////////////////////////////
546 #endif // #ifdef TASK
547 #endif // #ifdef MULTICORE
548 #endif // #ifndef MULTICORE_RUNTIME