1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
5 #define INLINE inline __attribute__((always_inline))
8 ////////////////////////////////////////////////////////////////
10 ///////////////////////////////////////////////////////////////
12 // data structures for msgs
13 #define BAMBOO_OUT_BUF_LENGTH 300
14 #define BAMBOO_MSG_BUF_LENGTH 30
15 int msgdata[BAMBOO_MSG_BUF_LENGTH];
18 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
23 volatile bool isMsgSending;
25 #define OUTMSG_INDEXINC() \
26 outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
28 #define OUTMSG_LASTINDEXINC() \
29 outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
30 if(outmsglast == outmsgindex) { \
31 BAMBOO_EXIT(0xd001); \
34 #define OUTMSG_CACHE(n) \
35 outmsgdata[outmsglast] = (n); \
36 OUTMSG_LASTINDEXINC();
40 * type: 1 -- transfer object
41 * 2 -- transfer stall msg
46 * // add for profile info
47 * 7 -- transfer profile output msg
48 * 8 -- transfer profile output finish msg
49 * // add for alias lock strategy
50 * 9 -- redirect lock request
51 * a -- lock grant with redirect info
52 * b -- lock deny with redirect info
53 * c -- lock release with redirect info
54 * d -- status confirm request
55 * e -- status report msg
57 * 10 -- requiring for new memory
58 * 11 -- response for new memory request
59 * 12 -- GC init phase start
61 * 14 -- compact phase start
62 * 15 -- flush phase start
63 * 16 -- init phase finish
64 * 17 -- mark phase finish
65 * 18 -- compact phase finish
66 * 19 -- flush phase finish
68 * 1b -- marked phase finish confirm request
69 * 1c -- marked phase finish confirm response
71 * 1e -- start moving objs msg
72 * 1f -- ask for mapping info of a markedObj
73 * 20 -- mapping info of a markedObj
74 * 21 -- large objs info request
75 * 22 -- large objs info response
76 * 23 -- large objs mapping info
78 * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
79 * StallMsg: 2 + corenum + sendobjs + receiveobjs
80 * (size is always 4 * sizeof(int))
81 * LockMsg: 3 + lock type + obj pointer + lock + request core
82 * (size is always 5 * sizeof(int))
83 * 4/5/6 + lock type + obj pointer + lock
84 * (size is always 4 * sizeof(int))
85 * 9 + lock type + obj pointer + redirect lock + root request core
87 * (size is always 6 * sizeof(int))
88 * a/b + lock type + obj pointer + redirect lock
89 * (size is always 4 * sizeof(int))
90 * c + lock type + lock + redirect lock
91 * (size is always 4 * sizeof(int))
92 * lock type: 0 -- read; 1 -- write
93 * ProfileMsg: 7 + totalexetime
94 * (size is always 2 * sizeof(int))
96 * (size is always 2 * sizeof(int))
97 * StatusMsg: d (size is always 1 * sizeof(int))
98 * e + status + corenum + sendobjs + receiveobjs
99 * (size is always 5 * sizeof(int))
100 * status: 0 -- stall; 1 -- busy
101 * TerminateMsg: f (size is always 1 * sizeof(int)
102 * MemoryMsg: 10 + size + corenum
103 * (size is always 3 * sizeof(int))
104 * 11 + base_va + size
105 * (size is always 3 * sizeof(int))
106 * GCMsg: 12/13 (size is always 1 * sizeof(int))
107 * 14 + size of msg + (num of objs to move + (start address
108 * + end address + dst core + start dst)+)?
109 * + (num of incoming objs + (start dst + orig core)+)?
110 * + (num of large obj lists + (start address + lenght
112 * 15 (size is always 1 * sizeof(int))
114 * (size is always 2 * sizeof(int))
115 * 17 + corenum + gcsendobjs + gcreceiveobjs
116 * (size if always 4 * sizeof(int))
117 * 18 + corenum + fulfilled blocks num + (finish compact(1) + current
118 * heap top)/(need mem(0) + mem need)
119 * size is always 5 * sizeof(int))
121 * (size is always 2 * sizeof(int))
122 * 1a (size is always 1 * sizeof(int))
123 * 1b (size if always 1 * sizeof(int))
124 * 1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
125 * (size is always 5 * sizeof(int))
127 * (size is always 2 * sizeof(int))
128 * 1e + corenum + start addr + end addr
129 * (size if always 4 * sizeof(int))
130 * 1f + obj's address + corenum
131 * (size is always 3 * sizeof(int))
132 * 20 + obj's address + dst address
133 * (size if always 3 * sizeof(int))
134 * 21 (size is always 1 * sizeof(int))
135 * 22 + size of msg + corenum + current heap size
136 * + (num of large obj lists + (start address + length)+)?
137 * 23 + orig large obj ptr + new large obj ptr
138 * (size is always 3 * sizeof(int))
141 MSGSTART = 0x0, // 0x0
148 PROFILEOUTPUT, // 0x7
149 PROFILEFINISH, // 0x8
151 REDIRECTGROUNT, // 0xa
153 REDIRECTRELEASE, // 0xc
154 STATUSCONFIRM, // 0xd
162 GCSTARTCOMPACT, // 0x14
163 GCSTARTFLUSH, // 0x15
164 GCFINISHINIT, // 0x16
165 GCFINISHMARK, // 0x17
166 GCFINISHCOMPACT, // 0x18
167 GCFINISHFLUSH, // 0x19
169 GCMARKCONFIRM, // 0x1b
170 GCMARKREPORT, // 0x1c
173 GCMAPREQUEST, // 0x1f
175 GCLOBJREQUEST, // 0x21
177 GCLOBJMAPPING, // 0x23
182 /////////////////////////////////////////////////////////////////////////////////
183 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor.
184 // No greater than the number of all the cores in
186 // NUMCORES -- number of cores chosen to deploy the application. It can
187 // be greater than that required to fully parallelize the
188 // application. The same as NUMCORES.
189 // NUMCORESACTIVE -- number of cores that really execute the
190 // application. No greater than NUMCORES
191 // NUMCORES4GC -- number of cores for gc. No greater than NUMCORES.
192 // NOTE: currently only support ontinuous cores as gc
193 // cores, i.e. 0~NUMCORES4GC-1
194 ////////////////////////////////////////////////////////////////////////////////
195 // data structures of status for termination
196 // only check working cores
197 int corestatus[NUMCORESACTIVE]; // records status of each core
200 int numsendobjs[NUMCORESACTIVE]; // records how many objects a core has sent out
201 int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a core has received
202 volatile int numconfirm;
203 volatile bool waitconfirm;
205 int self_numsendobjs;
206 int self_numreceiveobjs;
208 // get rid of lock msgs for GC version
210 // data structures for locking
211 struct RuntimeHash locktable;
212 static struct RuntimeHash* locktbl = &locktable;
213 struct RuntimeHash * lockRedirectTbl;
214 struct RuntimeHash * objRedirectLockTbl;
225 // data structures for waiting objs
226 struct Queue objqueue;
227 struct Queue * totransobjqueue; // queue to hold objs to be transferred
228 // should be cleared whenever enter a task
230 // data structures for shared memory allocation
231 #define BAMBOO_BASE_VA 0xd000000
233 #include "structdefs.h"
234 #define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3)
235 #define BAMBOO_PAGE_SIZE (64 * 64)
236 #define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE)
237 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
239 #define BAMBOO_NUM_PAGES (64 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G
240 #define BAMBOO_PAGE_SIZE (16 * 1024)// * 1024) // (4096)
241 #define BAMBOO_SMEM_SIZE (16 * 1024)
242 #define BAMBOO_SHARED_MEM_SIZE (1024 * 1024 * 1024)
243 //(3.0 * 1024 * 1024 * 1024) // 3G// ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
247 #include "multicoregarbage.h"
250 SMEMLOCAL = 0x0, // 0x0, using local mem only
251 SMEMFIXED, // 0x1, use local mem in lower address space(1 block only)
252 // and global mem in higher address space
253 SMEMMIXED, // 0x2, like FIXED mode but use a threshold to control
254 SMEMGLOBAL, // 0x3, using global mem only
258 SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED;
259 //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
266 struct freeMemItem * next;
270 struct freeMemItem * head;
271 struct freeMemItem * backuplist; // hold removed freeMemItem for reuse;
272 // only maintain 1 fremmMemItem
275 struct freeMemList * bamboo_free_mem_list;
276 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
277 // e.g. 20% of the heap and should not be allocated
278 // otherwise gc is invoked
280 volatile mspace bamboo_free_msp;
282 volatile bool smemflag;
283 volatile INTPTR bamboo_cur_msp;
284 volatile int bamboo_smem_size;
289 // data structures for profile mode
292 #define TASKINFOLENGTH 30000
293 //#define INTERRUPTINFOLENGTH 500
299 typedef struct task_info {
301 unsigned long long startTime;
302 unsigned long long endTime;
303 unsigned long long exitIndex;
304 struct Queue * newObjs;
307 /*typedef struct interrupt_info {
312 TaskInfo * taskInfoArray[TASKINFOLENGTH];
314 bool taskInfoOverflow;
315 /*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
316 int interruptInfoIndex;
317 bool interruptInfoOverflow;*/
318 int profilestatus[NUMCORESACTIVE]; // records status of each core
321 #endif // #ifdef PROFILE
326 /////////////////////////////////////////////////////////////
328 ////////////////////////////////////////////////////////////
329 // these are functions should be implemented in //
330 // multicore runtime for any multicore processors //
331 ////////////////////////////////////////////////////////////
334 INLINE void initialization(void);
335 INLINE void initCommunication(void);
336 INLINE void fakeExecution(void);
337 INLINE void terminate(void);
338 INLINE void initlock(struct ___Object___ * v);
340 // lock related functions
341 bool getreadlock(void* ptr);
342 void releasereadlock(void* ptr);
343 bool getwritelock(void* ptr);
344 void releasewritelock(void* ptr);
345 bool getwritelock_I(void* ptr);
346 void releasewritelock_I(void * ptr);
348 void releasewritelock_r(void * lock, void * redirectlock);
350 /* this function is to process lock requests.
351 * can only be invoked in receiveObject() */
352 // if return -1: the lock request is redirected
353 // 0: the lock request is approved
354 // 1: the lock request is denied
355 INLINE int processlockrequest(int locktype,
361 INLINE void processlockrelease(int locktype,
366 // msg related functions
367 INLINE void send_hanging_msg();
368 INLINE void send_msg_1(int targetcore,
370 INLINE void send_msg_2(int targetcore,
373 INLINE void send_msg_3(int targetcore,
377 INLINE void send_msg_4(int targetcore,
382 INLINE void send_msg_5(int targetcore,
388 INLINE void send_msg_6(int targetcore,
395 INLINE void cache_msg_2(int targetcore,
398 INLINE void cache_msg_3(int targetcore,
402 INLINE void cache_msg_4(int targetcore,
407 INLINE void cache_msg_5(int targetcore,
413 INLINE void cache_msg_6(int targetcore,
420 INLINE void transferObject(struct transObjInfo * transObj);
421 INLINE int receiveMsg(void);
424 INLINE void transferMarkResults();
428 INLINE void profileTaskStart(char * taskname);
429 INLINE void profileTaskEnd(void);
430 void outputProfileData();
431 #endif // #ifdef PROFILE
432 ///////////////////////////////////////////////////////////
434 /////////////////////////////////////////////////////////////////////////////
435 // For each version of BAMBOO runtime, there should be a header file named //
436 // runtim_arch.h defining following MARCOS: //
437 // BAMBOO_TOTALCORE: the total # of cores in the processor //
438 // BAMBOO_NUM_OF_CORE: the # of current residing core //
439 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core //
440 // BAMBOO_DEBUGPRINT(x): print out integer x //
441 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x //
442 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of //
443 // whose size in bytes is y on local memory //
444 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory //
445 // BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of //
446 // whose size in bytes is y on shared memory//
447 // BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE() //
448 // BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data //
449 // structures related to obj //
451 // BAMBOO_START_CRITICAL_SECTION_STATUS() //
452 // BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures//
453 // related to status data //
454 // BAMBOO_START_CRITICAL_SECTION_MSG() //
455 // BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures //
456 // related to msg data //
457 // BAMBOO_START_CRITICAL_SECTION_LOCK() //
458 // BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures //
459 // related to lock table //
460 // BAMBOO_START_CRITICAL_SECTION_MEM() //
461 // BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory //
462 // BAMBOO_START_CRITICAL_SECTION() //
463 // BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures //
464 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock //
465 // request response //
466 // BAMBOO_CACHE_LINE_SIZE: the cache line size //
467 // BAMBOO_CACHE_LINE_MASK: mask for a cache line //
468 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with //
470 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary //
471 // BAMBOO_EXIT(x): exit routine //
472 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in //
473 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in //
474 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number //
476 // runtime_arch.h should also define following global parameters: //
477 // bamboo_cpu2coords: map the cpu # to (x,y) coordinates //
478 // bamboo_coords2cpu: map the (x,y) coordinates to cpu # //
479 /////////////////////////////////////////////////////////////////////////////
481 #endif // #ifdef MULTICORE
482 #endif // #ifdef TASK
483 #endif // #ifndef MULTICORE_RUNTIME