1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
5 #define INLINE inline __attribute__((always_inline))
14 ////////////////////////////////////////////////////////////////
15 // global variables //
16 ///////////////////////////////////////////////////////////////
18 // record the starting time
19 unsigned long long bamboo_start_time;
21 // data structures for msgs
22 #define BAMBOO_OUT_BUF_LENGTH 3000
23 #define BAMBOO_MSG_BUF_LENGTH 3000
24 int msgdata[BAMBOO_MSG_BUF_LENGTH];
25 volatile int msgdataindex;
26 volatile int msgdatalast;
28 volatile bool msgdatafull;
29 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
33 volatile bool isMsgHanging;
34 //volatile bool isMsgSending;
36 #define MSG_INDEXINC_I() \
37 msgdataindex = (msgdataindex + 1) % (BAMBOO_MSG_BUF_LENGTH)
39 #define MSG_LASTINDEXINC_I() \
40 msgdatalast = (msgdatalast + 1) % (BAMBOO_MSG_BUF_LENGTH)
42 #define MSG_CACHE_I(n) \
43 msgdata[msgdatalast] = (n); \
46 // NOTE: if msgdataindex == msgdatalast, it always means that the buffer if
47 // full. In the case that the buffer is empty, should never call this
49 #define MSG_REMAINSIZE_I(s) \
50 if(msgdataindex < msgdatalast) { \
51 (*(int*)s) = msgdatalast - msgdataindex; \
52 } else if((msgdataindex == msgdatalast) && (!msgdatafull)) { \
55 (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) -msgdataindex + msgdatalast; \
58 #define OUTMSG_INDEXINC() \
59 outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
61 #define OUTMSG_LASTINDEXINC() \
62 outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
63 if(outmsglast == outmsgindex) { \
64 BAMBOO_EXIT(0xdd01); \
67 #define OUTMSG_CACHE(n) \
68 outmsgdata[outmsglast] = (n); \
69 OUTMSG_LASTINDEXINC();
71 #define MAX_PACKET_WORDS 5
75 * type: 1 -- transfer object
76 * 2 -- transfer stall msg
81 * // add for profile info
82 * 7 -- transfer profile output msg
83 * 8 -- transfer profile output finish msg
84 * // add for alias lock strategy
85 * 9 -- redirect lock request
86 * a -- lock grant with redirect info
87 * b -- lock deny with redirect info
88 * c -- lock release with redirect info
89 * d -- status confirm request
90 * e -- status report msg
92 * 10 -- requiring for new memory
93 * 11 -- response for new memory request
94 * 12 -- GC init phase start
96 * 14 -- compact phase start
97 * 15 -- flush phase start
98 * 16 -- init phase finish
99 * 17 -- mark phase finish
100 * 18 -- compact phase finish
101 * 19 -- flush phase finish
103 * 1b -- marked phase finish confirm request
104 * 1c -- marked phase finish confirm response
105 * 1d -- markedObj msg
106 * 1e -- start moving objs msg
107 * 1f -- ask for mapping info of a markedObj
108 * 20 -- mapping info of a markedObj
109 * 21 -- large objs info request
110 * 22 -- large objs info response
111 * 23 -- large objs mapping info
113 * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
114 * StallMsg: 2 + corenum + sendobjs + receiveobjs
115 * (size is always 4 * sizeof(int))
116 * LockMsg: 3 + lock type + obj pointer + lock + request core
117 * (size is always 5 * sizeof(int))
118 * 4/5/6 + lock type + obj pointer + lock
119 * (size is always 4 * sizeof(int))
120 * 9 + lock type + obj pointer + redirect lock + root request core
122 * (size is always 6 * sizeof(int))
123 * a/b + lock type + obj pointer + redirect lock
124 * (size is always 4 * sizeof(int))
125 * c + lock type + lock + redirect lock
126 * (size is always 4 * sizeof(int))
127 * lock type: 0 -- read; 1 -- write
128 * ProfileMsg: 7 + totalexetime
129 * (size is always 2 * sizeof(int))
131 * (size is always 2 * sizeof(int))
132 * StatusMsg: d (size is always 1 * sizeof(int))
133 * e + status + corenum + sendobjs + receiveobjs
134 * (size is always 5 * sizeof(int))
135 * status: 0 -- stall; 1 -- busy
136 * TerminateMsg: f (size is always 1 * sizeof(int)
137 * MemoryMsg: 10 + size + corenum
138 * (size is always 3 * sizeof(int))
139 * 11 + base_va + size
140 * (size is always 3 * sizeof(int))
141 * GCMsg: 12/13 (size is always 1 * sizeof(int))
142 * 14 + size of msg + (num of objs to move + (start address
143 * + end address + dst core + start dst)+)?
144 * + (num of incoming objs + (start dst + orig core)+)?
145 * + (num of large obj lists + (start address + lenght
147 * 15 (size is always 1 * sizeof(int))
149 * (size is always 2 * sizeof(int))
150 * 17 + corenum + gcsendobjs + gcreceiveobjs
151 * (size if always 4 * sizeof(int))
152 * 18 + corenum + fulfilled blocks num + (finish compact(1) + current
153 * heap top)/(need mem(0) + mem need)
154 * size is always 5 * sizeof(int))
156 * (size is always 2 * sizeof(int))
157 * 1a (size is always 1 * sizeof(int))
158 * 1b (size if always 1 * sizeof(int))
159 * 1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
160 * (size is always 5 * sizeof(int))
161 * 1d + obj's address + request core
162 * (size is always 3 * sizeof(int))
163 * 1e + corenum + start addr + end addr
164 * (size if always 4 * sizeof(int))
165 * 1f + obj's address + corenum
166 * (size is always 3 * sizeof(int))
167 * 20 + obj's address + dst address
168 * (size if always 3 * sizeof(int))
169 * 21 (size is always 1 * sizeof(int))
170 * 22 + size of msg + corenum + current heap size
171 * + (num of large obj lists + (start address + length)+)?
172 * 23 + orig large obj ptr + new large obj ptr
173 * (size is always 3 * sizeof(int))
176 MSGSTART = 0xD0, // 0xD0
183 PROFILEOUTPUT, // 0xD7
184 PROFILEFINISH, // 0xD8
185 REDIRECTLOCK, // 0xD9
186 REDIRECTGROUNT, // 0xDa
187 REDIRECTDENY, // 0xDb
188 REDIRECTRELEASE, // 0xDc
189 STATUSCONFIRM, // 0xDd
190 STATUSREPORT, // 0xDe
197 GCSTARTCOMPACT, // 0xE4
198 GCSTARTMAPINFO, // 0xE5
199 GCSTARTFLUSH, // 0xE6
200 GCFINISHINIT, // 0xE7
201 GCFINISHMARK, // 0xE8
202 GCFINISHCOMPACT, // 0xE9
203 GCFINISHMAPINFO, // 0xEa
204 GCFINISHFLUSH, // 0xEb
206 GCMARKCONFIRM, // 0xEd
207 GCMARKREPORT, // 0xEe
210 GCMAPREQUEST, // 0xF1
213 GCLOBJREQUEST, // 0xF4
215 GCLOBJMAPPING, // 0xF6
216 #ifdef GC_PROFILE//_S
223 /////////////////////////////////////////////////////////////////////////////////
224 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor.
225 // No greater than the number of all the cores in
227 // NUMCORES -- number of cores chosen to deploy the application. It can
228 // be greater than that required to fully parallelize the
229 // application. The same as NUMCORES.
230 // NUMCORESACTIVE -- number of cores that really execute the
231 // application. No greater than NUMCORES
232 // NUMCORES4GC -- number of cores for gc. No greater than NUMCORES.
233 // NOTE: currently only support ontinuous cores as gc
234 // cores, i.e. 0~NUMCORES4GC-1
235 ////////////////////////////////////////////////////////////////////////////////
236 // data structures of status for termination
237 // only check working cores
238 volatile int corestatus[NUMCORESACTIVE]; // records status of each core
241 volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core
243 volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a
245 volatile int numconfirm;
246 volatile bool waitconfirm;
248 int self_numsendobjs;
249 int self_numreceiveobjs;
251 // get rid of lock msgs for GC version
253 // data structures for locking
254 struct RuntimeHash locktable;
255 static struct RuntimeHash* locktbl = &locktable;
256 struct RuntimeHash * lockRedirectTbl;
257 struct RuntimeHash * objRedirectLockTbl;
268 // data structures for waiting objs
269 struct Queue objqueue;
270 struct Queue * totransobjqueue; // queue to hold objs to be transferred
271 // should be cleared whenever enter a task
273 // data structures for shared memory allocation
275 #define BAMBOO_BASE_VA 0xd000000
276 #elif defined TILERA_ZLINUX
278 #define BAMBOO_BASE_VA 0xd000000
279 #endif // MULTICORE_GC
282 #ifdef BAMBOO_MEMPROF
283 #define GC_BAMBOO_NUMCORES 56
285 #define GC_BAMBOO_NUMCORES 62
289 #include "structdefs.h"
290 #define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3)
291 #define BAMBOO_PAGE_SIZE (64 * 64)
292 #define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE)
293 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) *(BAMBOO_NUM_PAGES))
295 #ifdef GC_LARGESHAREDHEAP
296 #define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+2))
297 #elif defined GC_LARGESHAREDHEAP2
298 #define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+2))
300 #define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G
302 #ifdef GC_LARGEPAGESIZE
303 #define BAMBOO_PAGE_SIZE (4 * 1024 * 1024) // (4096)
304 #define BAMBOO_SMEM_SIZE (4 * 1024 * 1024)
305 #elif defined GC_SMALLPAGESIZE
306 #define BAMBOO_PAGE_SIZE (256 * 1024) // (4096)
307 #define BAMBOO_SMEM_SIZE (256 * 1024)
308 #elif defined GC_SMALLPAGESIZE2
309 #define BAMBOO_PAGE_SIZE (256 * 1024) // (4096)
310 #define BAMBOO_SMEM_SIZE (256 * 1024)
312 #define BAMBOO_PAGE_SIZE (1024 * 1024) // (4096)
313 #define BAMBOO_SMEM_SIZE (1024 * 1024)
314 #endif // GC_LARGEPAGESIZE
315 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES)) //(1024 * 1024 * 240)
316 //((unsigned long long int)(3.0 * 1024 * 1024 * 1024)) // 3G
320 #include "multicoregarbage.h"
323 SMEMLOCAL = 0x0,// 0x0, using local mem only
324 SMEMFIXED, // 0x1, use local mem in lower address space(1 block only)
325 // and global mem in higher address space
326 SMEMMIXED, // 0x2, like FIXED mode but use a threshold to control
327 SMEMGLOBAL, // 0x3, using global mem only
331 SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED;
332 //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
339 struct freeMemItem * next;
343 struct freeMemItem * head;
344 struct freeMemItem * backuplist; // hold removed freeMemItem for reuse;
345 // only maintain 1 freemMemItem
348 // table recording the number of allocated bytes on each block
349 // Note: this table resides on the bottom of the shared heap for all cores
351 volatile int * bamboo_smemtbl;
352 volatile int bamboo_free_block;
353 //bool bamboo_smem_flushed;
354 //struct freeMemList * bamboo_free_mem_list;
355 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
356 // e.g. 20% of the heap and should not be allocated
357 // otherwise gc is invoked
358 volatile INTPTR bamboo_smem_zero_top;
359 #define BAMBOO_SMEM_ZERO_UNIT_SIZE (4 * 1024) // 4KB
361 //volatile mspace bamboo_free_msp;
362 INTPTR bamboo_free_smemp;
363 int bamboo_free_smem_size;
365 volatile bool smemflag;
366 volatile INTPTR bamboo_cur_msp;
367 volatile int bamboo_smem_size;
372 // data structures for profile mode
375 #define TASKINFOLENGTH 30000
376 #define INTERRUPTINFOLENGTH 500
381 //unsigned long long interrupttime;
383 typedef struct task_info {
385 unsigned long long startTime;
386 unsigned long long endTime;
387 unsigned long long exitIndex;
388 struct Queue * newObjs;
392 typedef struct interrupt_info {
393 unsigned long long startTime;
394 unsigned long long endTime;
397 TaskInfo * taskInfoArray[TASKINFOLENGTH];
399 bool taskInfoOverflow;
401 InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
402 int interruptInfoIndex;
403 bool interruptInfoOverflow;
404 volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
407 #endif // #ifdef PROFILE
412 /////////////////////////////////////////////////////////////
414 ////////////////////////////////////////////////////////////
415 // these are functions should be implemented in //
416 // multicore runtime for any multicore processors //
417 ////////////////////////////////////////////////////////////
420 INLINE void initialization(void);
421 INLINE void initCommunication(void);
422 INLINE void fakeExecution(void);
423 INLINE void terminate(void);
424 INLINE void initlock(struct ___Object___ * v);
425 #ifdef BAMBOO_MEMPROF
426 INLINE void terminatememprof(void);
429 // lock related functions
430 bool getreadlock(void* ptr);
431 void releasereadlock(void* ptr);
432 bool getwritelock(void* ptr);
433 void releasewritelock(void* ptr);
434 bool getwritelock_I(void* ptr);
435 void releasewritelock_I(void * ptr);
437 void releasewritelock_r(void * lock, void * redirectlock);
439 /* this function is to process lock requests.
440 * can only be invoked in receiveObject() */
441 // if return -1: the lock request is redirected
442 // 0: the lock request is approved
443 // 1: the lock request is denied
444 INLINE int processlockrequest(int locktype,
450 INLINE void processlockrelease(int locktype,
455 // msg related functions
456 INLINE void send_hanging_msg(bool isInterrupt);
457 INLINE void send_msg_1(int targetcore,
460 INLINE void send_msg_2(int targetcore,
464 INLINE void send_msg_3(int targetcore,
469 INLINE void send_msg_4(int targetcore,
475 INLINE void send_msg_5(int targetcore,
482 INLINE void send_msg_6(int targetcore,
490 INLINE void cache_msg_1(int targetcore,
492 INLINE void cache_msg_2(int targetcore,
495 INLINE void cache_msg_3(int targetcore,
499 INLINE void cache_msg_4(int targetcore,
504 INLINE void cache_msg_5(int targetcore,
510 INLINE void cache_msg_6(int targetcore,
517 INLINE void transferObject(struct transObjInfo * transObj);
518 INLINE int receiveMsg(uint32_t send_port_pending);
521 INLINE void transferMarkResults();
525 INLINE void profileTaskStart(char * taskname);
526 INLINE void profileTaskEnd(void);
527 void outputProfileData();
528 #endif // #ifdef PROFILE
529 ///////////////////////////////////////////////////////////
531 /////////////////////////////////////////////////////////////////////////////
532 // For each version of BAMBOO runtime, there should be a header file named //
533 // runtim_arch.h defining following MARCOS: //
534 // BAMBOO_NUM_OF_CORE: the # of current residing core //
535 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core //
536 // BAMBOO_COORDS(c, x, y): convert the cpu # to coords (*x, *y) //
537 // BAMBOO_DEBUGPRINT(x): print out integer x //
538 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x //
539 // BAMBOO_EXIT_APP(x): exit the whole application //
540 // BAMBOO_EXIT(x): error exit routine with error # //
541 // BAMBOO_DIE(x): error exit routine with error msg //
542 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number //
543 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in //
544 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in //
545 // BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(): change to runtime mode from //
547 // BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(): change to client mode from //
549 // BAMBOO_ENTER_SEND_MODE_FROM_CLIENT(): change to send mode from //
551 // BAMBOO_ENTER_CLIENT_MODE_FROM_SEND(): change to client mode from //
553 // BAMBOO_ENTER_RUNTIME_MODE_FROM_SEND(): change to runtime mode from //
555 // BAMBOO_ENTER_SEND_MODE_FROM_RUNTIME(): change to send mode from //
557 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock //
558 // request response //
559 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of //
560 // whose size in bytes is y on local memory //
561 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory //
562 // BAMBOO_LOCAL_MEM_CLOSE(): close the local heap //
563 // BAMBOO_SHARE_MEM_CALLOC_I(x, y): allocate an array of x elements each of//
564 // whose size in bytes is y on shared memory//
565 // BAMBOO_SHARE_MEM_CLOSE(): close the shared heap //
566 // BAMBOO_CACHE_LINE_SIZE: the cache line size //
567 // BAMBOO_CACHE_LINE_MASK: mask for a cache line //
568 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with //
570 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary //
571 // BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start //
572 // address x, size z) to value y with write //
573 // hint, the processor will not fetch the //
574 // current content of the memory and directly //
576 /////////////////////////////////////////////////////////////////////////////
578 #endif // #ifdef MULTICORE
579 #endif // #ifdef TASK
580 #endif // #ifndef MULTICORE_RUNTIME