fix some bug in the multicore gc
[IRC.git] / Robust / src / Runtime / multicoreruntime.h
1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
3
4 #ifndef INLINE
5 #define INLINE    inline __attribute__((always_inline))
6 #endif
7
8 ////////////////////////////////////////////////////////////////
9 // global variables                                          //
10 ///////////////////////////////////////////////////////////////
11
12 // data structures for msgs
13 #define BAMBOO_OUT_BUF_LENGTH 3000
14 #define BAMBOO_MSG_BUF_LENGTH 3000
15 int msgdata[BAMBOO_MSG_BUF_LENGTH];
16 int msgdataindex;
17 int msgdatalast;
18 int msglength;
19 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
20 int outmsgindex;
21 int outmsglast;
22 int outmsgleft;
23 bool isMsgHanging;
24 volatile bool isMsgSending;
25
26 #define MSG_INDEXINC_I() \
27         msgdataindex = (msgdataindex + 1) % (BAMBOO_MSG_BUF_LENGTH)
28
29 #define MSG_LASTINDEXINC_I() \
30         msgdatalast = (msgdatalast + 1) % (BAMBOO_MSG_BUF_LENGTH)
31
32 #define MSG_CACHE_I(n) \
33         msgdata[msgdatalast] = (n); \
34   MSG_LASTINDEXINC_I() 
35
36 // NOTE: if msgdataindex == msgdatalast, it always means that the buffer if 
37 //       full. In the case that the buffer is empty, should never call this
38 //       MACRO
39 #define MSG_REMAINSIZE_I(s) \
40         if(msgdataindex < msgdatalast) { \
41                 (*(int*)s) = msgdatalast - msgdataindex; \
42         } else { \
43                 (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
44         } 
45
46 #define OUTMSG_INDEXINC() \
47         outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
48
49 #define OUTMSG_LASTINDEXINC() \
50         outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
51         if(outmsglast == outmsgindex) { \
52                 BAMBOO_EXIT(0xdd01); \
53         } 
54
55 #define OUTMSG_CACHE(n) \
56         outmsgdata[outmsglast] = (n); \
57   OUTMSG_LASTINDEXINC(); 
58
59 /* Message format:
60  *      type + Msgbody
61  * type: 1 -- transfer object
62  *       2 -- transfer stall msg
63  *       3 -- lock request
64  *       4 -- lock grount
65  *       5 -- lock deny
66  *       6 -- lock release
67  *       // add for profile info
68  *       7 -- transfer profile output msg
69  *       8 -- transfer profile output finish msg
70  *       // add for alias lock strategy
71  *       9 -- redirect lock request
72  *       a -- lock grant with redirect info
73  *       b -- lock deny with redirect info
74  *       c -- lock release with redirect info
75  *       d -- status confirm request
76  *       e -- status report msg
77  *       f -- terminate
78  *      10 -- requiring for new memory
79  *      11 -- response for new memory request
80  *      12 -- GC init phase start
81  *      13 -- GC start
82  *      14 -- compact phase start
83  *      15 -- flush phase start
84  *      16 -- init phase finish
85  *      17 -- mark phase finish
86  *      18 -- compact phase finish
87  *      19 -- flush phase finish
88  *      1a -- GC finish
89  *      1b -- marked phase finish confirm request
90  *      1c -- marked phase finish confirm response
91  *      1d -- markedObj msg
92  *      1e -- start moving objs msg
93  *      1f -- ask for mapping info of a markedObj
94  *      20 -- mapping info of a markedObj
95  *      21 -- large objs info request
96  *      22 -- large objs info response
97  *      23 -- large objs mapping info
98  *
99  * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
100  * StallMsg: 2 + corenum + sendobjs + receiveobjs 
101  *             (size is always 4 * sizeof(int))
102  * LockMsg: 3 + lock type + obj pointer + lock + request core 
103  *            (size is always 5 * sizeof(int))
104  *          4/5/6 + lock type + obj pointer + lock 
105  *            (size is always 4 * sizeof(int))
106  *          9 + lock type + obj pointer +  redirect lock + root request core 
107  *            + request core 
108  *            (size is always 6 * sizeof(int))
109  *          a/b + lock type + obj pointer + redirect lock 
110  *              (size is always 4 * sizeof(int))
111  *          c + lock type + lock + redirect lock 
112  *            (size is always 4 * sizeof(int))
113  *          lock type: 0 -- read; 1 -- write
114  * ProfileMsg: 7 + totalexetime 
115  *               (size is always 2 * sizeof(int))
116  *             8 + corenum 
117  *               (size is always 2 * sizeof(int))
118  * StatusMsg: d (size is always 1 * sizeof(int))
119  *            e + status + corenum + sendobjs + receiveobjs 
120  *              (size is always 5 * sizeof(int))
121  *            status: 0 -- stall; 1 -- busy
122  * TerminateMsg: f (size is always 1 * sizeof(int)
123  * MemoryMsg: 10 + size + corenum 
124  *              (size is always 3 * sizeof(int))
125  *           11 + base_va + size 
126  *              (size is always 3 * sizeof(int))
127  * GCMsg: 12/13 (size is always 1 * sizeof(int))
128  *        14 + size of msg + (num of objs to move + (start address 
129  *           + end address + dst core + start dst)+)? 
130  *           + (num of incoming objs + (start dst + orig core)+)? 
131  *           + (num of large obj lists + (start address + lenght 
132  *           + start dst)+)?
133  *        15 (size is always 1 * sizeof(int))
134  *        16 + corenum 
135  *           (size is always 2 * sizeof(int))
136  *        17 + corenum + gcsendobjs + gcreceiveobjs     
137  *           (size if always 4 * sizeof(int))
138  *        18 + corenum + fulfilled blocks num + (finish compact(1) + current
139  *           heap top)/(need mem(0) + mem need) 
140  *           size is always 5 * sizeof(int))
141  *        19 + corenum 
142  *              (size is always 2 * sizeof(int))
143  *        1a (size is always 1 * sizeof(int))
144  *        1b (size if always 1 * sizeof(int))
145  *        1c + size of msg + corenum + gcsendobjs + gcreceiveobjs 
146  *           (size is always 5 * sizeof(int))
147  *        1d + obj's address 
148  *           (size is always 2 * sizeof(int))
149  *        1e + corenum + start addr + end addr
150  *           (size if always 4 * sizeof(int))
151  *        1f + obj's address + corenum 
152  *           (size is always 3 * sizeof(int))
153  *        20 + obj's address + dst address 
154  *           (size if always 3 * sizeof(int))
155  *        21 (size is always 1 * sizeof(int))
156  *        22 + size of msg + corenum + current heap size 
157  *           + (num of large obj lists + (start address + length)+)?
158  *        23 + orig large obj ptr + new large obj ptr 
159  *            (size is always 3 * sizeof(int))
160  */
161 typedef enum {
162         MSGSTART = 0xD0, // 0xD0
163         TRANSOBJ,        // 0xD1
164         TRANSTALL,       // 0xD2
165         LOCKREQUEST,     // 0xD3
166         LOCKGROUNT,      // 0xD4
167         LOCKDENY,        // 0xD5
168         LOCKRELEASE,     // 0xD6
169         PROFILEOUTPUT,   // 0xD7
170         PROFILEFINISH,   // 0xD8
171         REDIRECTLOCK,    // 0xD9
172         REDIRECTGROUNT,  // 0xDa
173         REDIRECTDENY,    // 0xDb
174         REDIRECTRELEASE, // 0xDc
175         STATUSCONFIRM,   // 0xDd
176         STATUSREPORT,    // 0xDe
177         TERMINATE,       // 0xDf
178         MEMREQUEST,      // 0xE0
179         MEMRESPONSE,     // 0xE1
180 #ifdef MULTICORE_GC
181         GCSTARTINIT,     // 0xE2
182         GCSTART,         // 0xE3
183         GCSTARTCOMPACT,  // 0xE4
184         GCSTARTFLUSH,    // 0xE5
185         GCFINISHINIT,    // 0xE6
186         GCFINISHMARK,    // 0xE7
187         GCFINISHCOMPACT, // 0xE8
188         GCFINISHFLUSH,   // 0xE9
189         GCFINISH,        // 0xEa
190         GCMARKCONFIRM,   // 0xEb
191         GCMARKREPORT,    // 0xEc
192         GCMARKEDOBJ,     // 0xEd
193         GCMOVESTART,     // 0xEe
194         GCMAPREQUEST,    // 0xEf
195         GCMAPINFO,       // 0xF0
196         GCLOBJREQUEST,   // 0xF1
197         GCLOBJINFO,      // 0xF2
198         GCLOBJMAPPING,   // 0xF3
199 #endif
200         MSGEND
201 } MSGTYPE;
202
203 /////////////////////////////////////////////////////////////////////////////////
204 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor. 
205 //                           No greater than the number of all the cores in 
206 //                           the processor
207 //       NUMCORES -- number of cores chosen to deploy the application. It can 
208 //                   be greater than that required to fully parallelize the 
209 //                   application. The same as NUMCORES.
210 //       NUMCORESACTIVE -- number of cores that really execute the 
211 //                         application. No greater than NUMCORES
212 //       NUMCORES4GC -- number of cores for gc. No greater than NUMCORES. 
213 //                      NOTE: currently only support ontinuous cores as gc 
214 //                            cores, i.e. 0~NUMCORES4GC-1
215 ////////////////////////////////////////////////////////////////////////////////
216 // data structures of status for termination
217 // only check working cores
218 volatile int corestatus[NUMCORESACTIVE]; // records status of each core
219                                          // 1: running tasks
220                                          // 0: stall
221 volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core 
222                                           // has sent out
223 volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a 
224                                              // core has received
225 volatile int numconfirm;
226 volatile bool waitconfirm;
227 bool busystatus;
228 int self_numsendobjs;
229 int self_numreceiveobjs;
230
231 // get rid of lock msgs for GC version
232 #ifndef MULTICORE_GC
233 // data structures for locking
234 struct RuntimeHash locktable;
235 static struct RuntimeHash* locktbl = &locktable;
236 struct RuntimeHash * lockRedirectTbl;
237 struct RuntimeHash * objRedirectLockTbl;
238 #endif
239 struct LockValue {
240         int redirectlock;
241         int value;
242 };
243 int lockobj;
244 int lock2require;
245 int lockresult;
246 bool lockflag;
247
248 // data structures for waiting objs
249 struct Queue objqueue;
250 struct Queue * totransobjqueue; // queue to hold objs to be transferred
251                                 // should be cleared whenever enter a task
252
253 // data structures for shared memory allocation
254 #define BAMBOO_BASE_VA 0xd000000
255 #ifdef GC_DEBUG
256 #include "structdefs.h"
257 #define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3)
258 #define BAMBOO_PAGE_SIZE (64 * 64)
259 #define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE)
260 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
261 #else
262 #define BAMBOO_NUM_PAGES (15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5)  3G
263 #define BAMBOO_PAGE_SIZE (16 * 1024)// * 1024)  // (4096)
264 #define BAMBOO_SMEM_SIZE (16 * 1024)
265 #define BAMBOO_SHARED_MEM_SIZE (1024 * 1024 * 240) //(1024 * 1024 * 1024)
266 //(3.0 * 1024 * 1024 * 1024) // 3G// ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
267 #endif
268
269 #ifdef MULTICORE_GC
270 #include "multicoregarbage.h"
271
272 typedef enum {
273         SMEMLOCAL = 0x0, // 0x0, using local mem only
274         SMEMFIXED,       // 0x1, use local mem in lower address space(1 block only)
275                          //      and global mem in higher address space
276         SMEMMIXED,       // 0x2, like FIXED mode but use a threshold to control
277         SMEMGLOBAL,      // 0x3, using global mem only
278         SMEMEND
279 } SMEMSTRATEGY;
280
281 SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED; 
282                               //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
283
284 struct freeMemItem {
285         INTPTR ptr;
286         int size;
287         int startblock;  
288         int endblock;
289         struct freeMemItem * next;
290 };
291
292 struct freeMemList {
293         struct freeMemItem * head;
294         struct freeMemItem * backuplist;  // hold removed freeMemItem for reuse; 
295                                           // only maintain 1 fremmMemItem
296 };
297
298 // table recording the number of allocated bytes on each block
299 // Note: this table resides on the bottom of the shared heap for all cores
300 //       to access
301 volatile int * bamboo_smemtbl;
302 volatile int bamboo_free_block;
303 //bool bamboo_smem_flushed;
304 //struct freeMemList * bamboo_free_mem_list;
305 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
306                           // e.g. 20% of the heap and should not be allocated
307                                                                                                         // otherwise gc is invoked
308 #else
309 volatile mspace bamboo_free_msp;
310 #endif
311 volatile bool smemflag;
312 volatile INTPTR bamboo_cur_msp;
313 volatile int bamboo_smem_size;
314
315 // for test TODO
316 int total_num_t6;
317
318 // data structures for profile mode
319 #ifdef PROFILE
320
321 #define TASKINFOLENGTH 30000
322 //#define INTERRUPTINFOLENGTH 500
323
324 bool stall;
325 //bool isInterrupt;
326 int totalexetime;
327
328 typedef struct task_info {
329   char* taskName;
330   unsigned long long startTime;
331   unsigned long long endTime;
332   unsigned long long exitIndex;
333   struct Queue * newObjs; 
334 } TaskInfo;
335
336 /*typedef struct interrupt_info {
337    int startTime;
338    int endTime;
339    } InterruptInfo;*/
340
341 TaskInfo * taskInfoArray[TASKINFOLENGTH];
342 int taskInfoIndex;
343 bool taskInfoOverflow;
344 /*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
345    int interruptInfoIndex;
346    bool interruptInfoOverflow;*/
347 volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
348                                             // 1: running tasks
349                                             // 0: stall
350 #endif // #ifdef PROFILE
351
352 #ifndef INTERRUPT
353 bool reside;
354 #endif
355 /////////////////////////////////////////////////////////////
356
357 ////////////////////////////////////////////////////////////
358 // these are functions should be implemented in           //
359 // multicore runtime for any multicore processors         //
360 ////////////////////////////////////////////////////////////
361 #ifdef TASK
362 #ifdef MULTICORE
363 INLINE void initialization(void);
364 INLINE void initCommunication(void);
365 INLINE void fakeExecution(void);
366 INLINE void terminate(void);
367 INLINE void initlock(struct ___Object___ * v);
368
369 // lock related functions
370 bool getreadlock(void* ptr);
371 void releasereadlock(void* ptr);
372 bool getwritelock(void* ptr);
373 void releasewritelock(void* ptr);
374 bool getwritelock_I(void* ptr);
375 void releasewritelock_I(void * ptr);
376 #ifndef MULTICORE_GC
377 void releasewritelock_r(void * lock, void * redirectlock);
378 #endif
379 /* this function is to process lock requests. 
380  * can only be invoked in receiveObject() */
381 // if return -1: the lock request is redirected
382 //            0: the lock request is approved
383 //            1: the lock request is denied
384 INLINE int processlockrequest(int locktype, 
385                                           int lock, 
386                                                                                                                         int obj, 
387                                                                                                                         int requestcore, 
388                                                                                                                         int rootrequestcore, 
389                                                                                                                         bool cache);
390 INLINE void processlockrelease(int locktype, 
391                                            int lock, 
392                                                                                                                          int redirectlock, 
393                                                                                                                          bool redirect);
394
395 // msg related functions
396 INLINE void send_hanging_msg();
397 INLINE void send_msg_1(int targetcore, 
398                                    unsigned long n0,
399                                                                                          bool isinterrupton);
400 INLINE void send_msg_2(int targetcore, 
401                                    unsigned long n0, 
402                                                                                          unsigned long n1,
403                                                                                          bool isinterrupton);
404 INLINE void send_msg_3(int targetcore, 
405                                    unsigned long n0, 
406                                                                                          unsigned long n1, 
407                                                                                          unsigned long n2,
408                                                                                          bool isinterrupton);
409 INLINE void send_msg_4(int targetcore, 
410                                    unsigned long n0, 
411                                                                                          unsigned long n1, 
412                                                                                          unsigned long n2, 
413                                                                                          unsigned long n3,
414                                                                                          bool isinterrupton);
415 INLINE void send_msg_5(int targetcore, 
416                                    unsigned long n0, 
417                                                                                          unsigned long n1, 
418                                                                                          unsigned long n2, 
419                                                                                          unsigned long n3, 
420                                                                                          unsigned long n4,
421                                                                                          bool isinterrupton);
422 INLINE void send_msg_6(int targetcore, 
423                                    unsigned long n0, 
424                                                                                          unsigned long n1, 
425                                                                                          unsigned long n2, 
426                                                                                          unsigned long n3, 
427                                                                                          unsigned long n4, 
428                                                                                          unsigned long n5,
429                                                                                          bool isinterrupton);
430 INLINE void cache_msg_1(int targetcore, 
431                                                                                                 unsigned long n0);
432 INLINE void cache_msg_2(int targetcore, 
433                                     unsigned long n0, 
434                                                                                                 unsigned long n1);
435 INLINE void cache_msg_3(int targetcore, 
436                                     unsigned long n0, 
437                                                                                                 unsigned long n1, 
438                                                                                                 unsigned long n2);
439 INLINE void cache_msg_4(int targetcore, 
440                                     unsigned long n0, 
441                                                                                                 unsigned long n1, 
442                                                                                                 unsigned long n2, 
443                                                                                                 unsigned long n3);
444 INLINE void cache_msg_5(int targetcore, 
445                                     unsigned long n0, 
446                                                                                                 unsigned long n1, 
447                                                                                                 unsigned long n2, 
448                                                                                                 unsigned long n3, 
449                                                                                                 unsigned long n4);
450 INLINE void cache_msg_6(int targetcore, 
451                                     unsigned long n0, 
452                                                                                                 unsigned long n1, 
453                                                                                                 unsigned long n2, 
454                                                                                                 unsigned long n3, 
455                                                                                                 unsigned long n4, 
456                                                                                                 unsigned long n5);
457 INLINE void transferObject(struct transObjInfo * transObj);
458 INLINE int receiveMsg(void);
459
460 #ifdef MULTICORE_GC
461 INLINE void transferMarkResults();
462 #endif
463
464 #ifdef PROFILE
465 INLINE void profileTaskStart(char * taskname);
466 INLINE void profileTaskEnd(void);
467 void outputProfileData();
468 #endif  // #ifdef PROFILE
469 ///////////////////////////////////////////////////////////
470
471 /////////////////////////////////////////////////////////////////////////////
472 // For each version of BAMBOO runtime, there should be a header file named //
473 // runtim_arch.h defining following MARCOS:                                //
474 // BAMBOO_TOTALCORE: the total # of cores in the processor                 //
475 // BAMBOO_NUM_OF_CORE: the # of current residing core                      //
476 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core        //
477 // BAMBOO_DEBUGPRINT(x): print out integer x                               //
478 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                 //
479 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of  //
480 //                                whose size in bytes is y on local memory //
481 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory         //
482 // BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of  //
483 //                                whose size in bytes is y on shared memory//
484 // BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE()                               //
485 // BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data        //
486 //                                            structures related to obj    //
487 //                                            queue                        //
488 // BAMBOO_START_CRITICAL_SECTION_STATUS()                                  //
489 // BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures//
490 //                                         related to status data          //
491 // BAMBOO_START_CRITICAL_SECTION_MSG()                                     //
492 // BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures   //
493 //                                      related to msg data                //
494 // BAMBOO_START_CRITICAL_SECTION_LOCK()                                    //
495 // BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures  //
496 //                                       related to lock table             //
497 // BAMBOO_START_CRITICAL_SECTION_MEM()                                     //
498 // BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory        //
499 // BAMBOO_START_CRITICAL_SECTION()                                         //
500 // BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures   //
501 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock      //
502 //                            request response                             //
503 // BAMBOO_CACHE_LINE_SIZE: the cache line size                             //
504 // BAMBOO_CACHE_LINE_MASK: mask for a cache line                           //
505 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with     //
506 //                                 length y                                //
507 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary  //
508 // BAMBOO_EXIT(x): exit routine                                            //
509 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in                //
510 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in            //
511 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number       //
512 // BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start //
513 //                            address x, size z) to value y with write     //
514 //                            hint, the processor will not fetch the       //
515 //                            current content of the memory and directly   //
516 //                            write                                        //
517 //                                                                         //
518 // runtime_arch.h should also define following global parameters:          //
519 // bamboo_cpu2coords: map the cpu # to (x,y) coordinates                   //
520 // bamboo_coords2cpu: map the (x,y) coordinates to cpu #                   //
521 /////////////////////////////////////////////////////////////////////////////
522
523 #endif  // #ifdef MULTICORE
524 #endif  // #ifdef TASK
525 #endif  // #ifndef MULTICORE_RUNTIME