1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
5 #define INLINE inline __attribute__((always_inline))
8 ////////////////////////////////////////////////////////////////
10 ///////////////////////////////////////////////////////////////
12 // record the starting time
13 unsigned long long bamboo_start_time;
15 // data structures for msgs
16 #define BAMBOO_OUT_BUF_LENGTH 3000
17 #define BAMBOO_MSG_BUF_LENGTH 3000
18 int msgdata[BAMBOO_MSG_BUF_LENGTH];
22 volatile bool msgdatafull;
23 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
27 volatile bool isMsgHanging;
28 volatile bool isMsgSending;
30 #define MSG_INDEXINC_I() \
31 msgdataindex = (msgdataindex + 1) % (BAMBOO_MSG_BUF_LENGTH)
33 #define MSG_LASTINDEXINC_I() \
34 msgdatalast = (msgdatalast + 1) % (BAMBOO_MSG_BUF_LENGTH)
36 #define MSG_CACHE_I(n) \
37 msgdata[msgdatalast] = (n); \
40 // NOTE: if msgdataindex == msgdatalast, it always means that the buffer if
41 // full. In the case that the buffer is empty, should never call this
43 #define MSG_REMAINSIZE_I(s) \
44 if(msgdataindex < msgdatalast) { \
45 (*(int*)s) = msgdatalast - msgdataindex; \
46 } else if((msgdataindex == msgdatalast) && (!msgdatafull)) {\
49 (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
52 #define OUTMSG_INDEXINC() \
53 outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
55 #define OUTMSG_LASTINDEXINC() \
56 outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
57 if(outmsglast == outmsgindex) { \
58 BAMBOO_EXIT(0xdd01); \
61 #define OUTMSG_CACHE(n) \
62 outmsgdata[outmsglast] = (n); \
63 OUTMSG_LASTINDEXINC();
67 * type: 1 -- transfer object
68 * 2 -- transfer stall msg
73 * // add for profile info
74 * 7 -- transfer profile output msg
75 * 8 -- transfer profile output finish msg
76 * // add for alias lock strategy
77 * 9 -- redirect lock request
78 * a -- lock grant with redirect info
79 * b -- lock deny with redirect info
80 * c -- lock release with redirect info
81 * d -- status confirm request
82 * e -- status report msg
84 * 10 -- requiring for new memory
85 * 11 -- response for new memory request
86 * 12 -- GC init phase start
88 * 14 -- compact phase start
89 * 15 -- flush phase start
90 * 16 -- init phase finish
91 * 17 -- mark phase finish
92 * 18 -- compact phase finish
93 * 19 -- flush phase finish
95 * 1b -- marked phase finish confirm request
96 * 1c -- marked phase finish confirm response
98 * 1e -- start moving objs msg
99 * 1f -- ask for mapping info of a markedObj
100 * 20 -- mapping info of a markedObj
101 * 21 -- large objs info request
102 * 22 -- large objs info response
103 * 23 -- large objs mapping info
105 * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
106 * StallMsg: 2 + corenum + sendobjs + receiveobjs
107 * (size is always 4 * sizeof(int))
108 * LockMsg: 3 + lock type + obj pointer + lock + request core
109 * (size is always 5 * sizeof(int))
110 * 4/5/6 + lock type + obj pointer + lock
111 * (size is always 4 * sizeof(int))
112 * 9 + lock type + obj pointer + redirect lock + root request core
114 * (size is always 6 * sizeof(int))
115 * a/b + lock type + obj pointer + redirect lock
116 * (size is always 4 * sizeof(int))
117 * c + lock type + lock + redirect lock
118 * (size is always 4 * sizeof(int))
119 * lock type: 0 -- read; 1 -- write
120 * ProfileMsg: 7 + totalexetime
121 * (size is always 2 * sizeof(int))
123 * (size is always 2 * sizeof(int))
124 * StatusMsg: d (size is always 1 * sizeof(int))
125 * e + status + corenum + sendobjs + receiveobjs
126 * (size is always 5 * sizeof(int))
127 * status: 0 -- stall; 1 -- busy
128 * TerminateMsg: f (size is always 1 * sizeof(int)
129 * MemoryMsg: 10 + size + corenum
130 * (size is always 3 * sizeof(int))
131 * 11 + base_va + size
132 * (size is always 3 * sizeof(int))
133 * GCMsg: 12/13 (size is always 1 * sizeof(int))
134 * 14 + size of msg + (num of objs to move + (start address
135 * + end address + dst core + start dst)+)?
136 * + (num of incoming objs + (start dst + orig core)+)?
137 * + (num of large obj lists + (start address + lenght
139 * 15 (size is always 1 * sizeof(int))
141 * (size is always 2 * sizeof(int))
142 * 17 + corenum + gcsendobjs + gcreceiveobjs
143 * (size if always 4 * sizeof(int))
144 * 18 + corenum + fulfilled blocks num + (finish compact(1) + current
145 * heap top)/(need mem(0) + mem need)
146 * size is always 5 * sizeof(int))
148 * (size is always 2 * sizeof(int))
149 * 1a (size is always 1 * sizeof(int))
150 * 1b (size if always 1 * sizeof(int))
151 * 1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
152 * (size is always 5 * sizeof(int))
154 * (size is always 2 * sizeof(int))
155 * 1e + corenum + start addr + end addr
156 * (size if always 4 * sizeof(int))
157 * 1f + obj's address + corenum
158 * (size is always 3 * sizeof(int))
159 * 20 + obj's address + dst address
160 * (size if always 3 * sizeof(int))
161 * 21 (size is always 1 * sizeof(int))
162 * 22 + size of msg + corenum + current heap size
163 * + (num of large obj lists + (start address + length)+)?
164 * 23 + orig large obj ptr + new large obj ptr
165 * (size is always 3 * sizeof(int))
168 MSGSTART = 0xD0, // 0xD0
175 PROFILEOUTPUT, // 0xD7
176 PROFILEFINISH, // 0xD8
177 REDIRECTLOCK, // 0xD9
178 REDIRECTGROUNT, // 0xDa
179 REDIRECTDENY, // 0xDb
180 REDIRECTRELEASE, // 0xDc
181 STATUSCONFIRM, // 0xDd
182 STATUSREPORT, // 0xDe
189 GCSTARTCOMPACT, // 0xE4
190 GCSTARTFLUSH, // 0xE5
191 GCFINISHINIT, // 0xE6
192 GCFINISHMARK, // 0xE7
193 GCFINISHCOMPACT, // 0xE8
194 GCFINISHFLUSH, // 0xE9
196 GCMARKCONFIRM, // 0xEb
197 GCMARKREPORT, // 0xEc
200 GCMAPREQUEST, // 0xEf
202 GCLOBJREQUEST, // 0xF1
204 GCLOBJMAPPING, // 0xF3
209 /////////////////////////////////////////////////////////////////////////////////
210 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor.
211 // No greater than the number of all the cores in
213 // NUMCORES -- number of cores chosen to deploy the application. It can
214 // be greater than that required to fully parallelize the
215 // application. The same as NUMCORES.
216 // NUMCORESACTIVE -- number of cores that really execute the
217 // application. No greater than NUMCORES
218 // NUMCORES4GC -- number of cores for gc. No greater than NUMCORES.
219 // NOTE: currently only support ontinuous cores as gc
220 // cores, i.e. 0~NUMCORES4GC-1
221 ////////////////////////////////////////////////////////////////////////////////
222 // data structures of status for termination
223 // only check working cores
224 volatile int corestatus[NUMCORESACTIVE]; // records status of each core
227 volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core
229 volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a
231 volatile int numconfirm;
232 volatile bool waitconfirm;
234 int self_numsendobjs;
235 int self_numreceiveobjs;
237 // get rid of lock msgs for GC version
239 // data structures for locking
240 struct RuntimeHash locktable;
241 static struct RuntimeHash* locktbl = &locktable;
242 struct RuntimeHash * lockRedirectTbl;
243 struct RuntimeHash * objRedirectLockTbl;
254 // data structures for waiting objs
255 struct Queue objqueue;
256 struct Queue * totransobjqueue; // queue to hold objs to be transferred
257 // should be cleared whenever enter a task
259 // data structures for shared memory allocation
260 #define BAMBOO_BASE_VA 0xd000000
262 #include "structdefs.h"
263 #define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3)
264 #define BAMBOO_PAGE_SIZE (64 * 64)
265 #define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE)
266 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
268 #define BAMBOO_NUM_PAGES (15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G
269 #define BAMBOO_PAGE_SIZE (16 * 1024)// * 1024) // (4096)
270 #define BAMBOO_SMEM_SIZE (16 * 1024)
271 #define BAMBOO_SHARED_MEM_SIZE (1024 * 1024 * 240) //(1024 * 1024 * 1024)
272 //(3.0 * 1024 * 1024 * 1024) // 3G// ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
276 #include "multicoregarbage.h"
279 SMEMLOCAL = 0x0, // 0x0, using local mem only
280 SMEMFIXED, // 0x1, use local mem in lower address space(1 block only)
281 // and global mem in higher address space
282 SMEMMIXED, // 0x2, like FIXED mode but use a threshold to control
283 SMEMGLOBAL, // 0x3, using global mem only
287 SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED;
288 //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
295 struct freeMemItem * next;
299 struct freeMemItem * head;
300 struct freeMemItem * backuplist; // hold removed freeMemItem for reuse;
301 // only maintain 1 fremmMemItem
304 // table recording the number of allocated bytes on each block
305 // Note: this table resides on the bottom of the shared heap for all cores
307 volatile int * bamboo_smemtbl;
308 volatile int bamboo_free_block;
309 //bool bamboo_smem_flushed;
310 //struct freeMemList * bamboo_free_mem_list;
311 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
312 // e.g. 20% of the heap and should not be allocated
313 // otherwise gc is invoked
315 volatile mspace bamboo_free_msp;
317 volatile bool smemflag;
318 volatile INTPTR bamboo_cur_msp;
319 volatile int bamboo_smem_size;
324 // data structures for profile mode
327 #define TASKINFOLENGTH 30000
328 //#define INTERRUPTINFOLENGTH 500
334 typedef struct task_info {
336 unsigned long long startTime;
337 unsigned long long endTime;
338 unsigned long long exitIndex;
339 struct Queue * newObjs;
342 /*typedef struct interrupt_info {
347 TaskInfo * taskInfoArray[TASKINFOLENGTH];
349 bool taskInfoOverflow;
350 /*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
351 int interruptInfoIndex;
352 bool interruptInfoOverflow;*/
353 volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
356 #endif // #ifdef PROFILE
361 /////////////////////////////////////////////////////////////
363 ////////////////////////////////////////////////////////////
364 // these are functions should be implemented in //
365 // multicore runtime for any multicore processors //
366 ////////////////////////////////////////////////////////////
369 INLINE void initialization(void);
370 INLINE void initCommunication(void);
371 INLINE void fakeExecution(void);
372 INLINE void terminate(void);
373 INLINE void initlock(struct ___Object___ * v);
375 // lock related functions
376 bool getreadlock(void* ptr);
377 void releasereadlock(void* ptr);
378 bool getwritelock(void* ptr);
379 void releasewritelock(void* ptr);
380 bool getwritelock_I(void* ptr);
381 void releasewritelock_I(void * ptr);
383 void releasewritelock_r(void * lock, void * redirectlock);
385 /* this function is to process lock requests.
386 * can only be invoked in receiveObject() */
387 // if return -1: the lock request is redirected
388 // 0: the lock request is approved
389 // 1: the lock request is denied
390 INLINE int processlockrequest(int locktype,
396 INLINE void processlockrelease(int locktype,
401 // msg related functions
402 INLINE void send_hanging_msg();
403 INLINE void send_msg_1(int targetcore,
406 INLINE void send_msg_2(int targetcore,
410 INLINE void send_msg_3(int targetcore,
415 INLINE void send_msg_4(int targetcore,
421 INLINE void send_msg_5(int targetcore,
428 INLINE void send_msg_6(int targetcore,
436 INLINE void cache_msg_1(int targetcore,
438 INLINE void cache_msg_2(int targetcore,
441 INLINE void cache_msg_3(int targetcore,
445 INLINE void cache_msg_4(int targetcore,
450 INLINE void cache_msg_5(int targetcore,
456 INLINE void cache_msg_6(int targetcore,
463 INLINE void transferObject(struct transObjInfo * transObj);
464 INLINE int receiveMsg(void);
467 INLINE void transferMarkResults();
471 INLINE void profileTaskStart(char * taskname);
472 INLINE void profileTaskEnd(void);
473 void outputProfileData();
474 #endif // #ifdef PROFILE
475 ///////////////////////////////////////////////////////////
477 /////////////////////////////////////////////////////////////////////////////
478 // For each version of BAMBOO runtime, there should be a header file named //
479 // runtim_arch.h defining following MARCOS: //
480 // BAMBOO_TOTALCORE: the total # of cores in the processor //
481 // BAMBOO_NUM_OF_CORE: the # of current residing core //
482 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core //
483 // BAMBOO_DEBUGPRINT(x): print out integer x //
484 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x //
485 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of //
486 // whose size in bytes is y on local memory //
487 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory //
488 // BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of //
489 // whose size in bytes is y on shared memory//
490 // BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE() //
491 // BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data //
492 // structures related to obj //
494 // BAMBOO_START_CRITICAL_SECTION_STATUS() //
495 // BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures//
496 // related to status data //
497 // BAMBOO_START_CRITICAL_SECTION_MSG() //
498 // BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures //
499 // related to msg data //
500 // BAMBOO_START_CRITICAL_SECTION_LOCK() //
501 // BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures //
502 // related to lock table //
503 // BAMBOO_START_CRITICAL_SECTION_MEM() //
504 // BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory //
505 // BAMBOO_START_CRITICAL_SECTION() //
506 // BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures //
507 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock //
508 // request response //
509 // BAMBOO_CACHE_LINE_SIZE: the cache line size //
510 // BAMBOO_CACHE_LINE_MASK: mask for a cache line //
511 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with //
513 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary //
514 // BAMBOO_EXIT(x): exit routine //
515 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in //
516 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in //
517 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number //
518 // BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start //
519 // address x, size z) to value y with write //
520 // hint, the processor will not fetch the //
521 // current content of the memory and directly //
524 // runtime_arch.h should also define following global parameters: //
525 // bamboo_cpu2coords: map the cpu # to (x,y) coordinates //
526 // bamboo_coords2cpu: map the (x,y) coordinates to cpu # //
527 /////////////////////////////////////////////////////////////////////////////
529 #endif // #ifdef MULTICORE
530 #endif // #ifdef TASK
531 #endif // #ifndef MULTICORE_RUNTIME