bug fixing in multicore gc
[IRC.git] / Robust / src / Runtime / multicoreruntime.h
1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
3
4 #ifndef INLINE
5 #define INLINE    inline __attribute__((always_inline))
6 #endif
7
8 ////////////////////////////////////////////////////////////////
9 // global variables                                          //
10 ///////////////////////////////////////////////////////////////
11
12 // data structures for msgs
13 #define BAMBOO_OUT_BUF_LENGTH 300
14 #define BAMBOO_MSG_BUF_LENGTH 30
15 int msgdata[BAMBOO_MSG_BUF_LENGTH];
16 int msgdataindex;
17 int msglength;
18 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
19 int outmsgindex;
20 int outmsglast;
21 int outmsgleft;
22 bool isMsgHanging;
23 volatile bool isMsgSending;
24
25 #define OUTMSG_INDEXINC() \
26         outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
27
28 #define OUTMSG_LASTINDEXINC() \
29         outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
30         if(outmsglast == outmsgindex) { \
31                 BAMBOO_EXIT(0xd001); \
32         } 
33
34 #define OUTMSG_CACHE(n) \
35         outmsgdata[outmsglast] = (n); \
36   OUTMSG_LASTINDEXINC(); 
37
38 /* Message format:
39  *      type + Msgbody
40  * type: 1 -- transfer object
41  *       2 -- transfer stall msg
42  *       3 -- lock request
43  *       4 -- lock grount
44  *       5 -- lock deny
45  *       6 -- lock release
46  *       // add for profile info
47  *       7 -- transfer profile output msg
48  *       8 -- transfer profile output finish msg
49  *       // add for alias lock strategy
50  *       9 -- redirect lock request
51  *       a -- lock grant with redirect info
52  *       b -- lock deny with redirect info
53  *       c -- lock release with redirect info
54  *       d -- status confirm request
55  *       e -- status report msg
56  *       f -- terminate
57  *      10 -- requiring for new memory
58  *      11 -- response for new memory request
59  *      12 -- GC init phase start
60  *      13 -- GC start
61  *      14 -- compact phase start
62  *      15 -- flush phase start
63  *      16 -- init phase finish
64  *      17 -- mark phase finish
65  *      18 -- compact phase finish
66  *      19 -- flush phase finish
67  *      1a -- GC finish
68  *      1b -- marked phase finish confirm request
69  *      1c -- marked phase finish confirm response
70  *      1d -- markedObj msg
71  *      1e -- start moving objs msg
72  *      1f -- ask for mapping info of a markedObj
73  *      20 -- mapping info of a markedObj
74  *      21 -- large objs info request
75  *      22 -- large objs info response
76  *      23 -- large objs mapping info
77  *
78  * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
79  * StallMsg: 2 + corenum + sendobjs + receiveobjs 
80  *             (size is always 4 * sizeof(int))
81  * LockMsg: 3 + lock type + obj pointer + lock + request core 
82  *            (size is always 5 * sizeof(int))
83  *          4/5/6 + lock type + obj pointer + lock 
84  *            (size is always 4 * sizeof(int))
85  *          9 + lock type + obj pointer +  redirect lock + root request core 
86  *            + request core 
87  *            (size is always 6 * sizeof(int))
88  *          a/b + lock type + obj pointer + redirect lock 
89  *              (size is always 4 * sizeof(int))
90  *          c + lock type + lock + redirect lock 
91  *            (size is always 4 * sizeof(int))
92  *          lock type: 0 -- read; 1 -- write
93  * ProfileMsg: 7 + totalexetime 
94  *               (size is always 2 * sizeof(int))
95  *             8 + corenum 
96  *               (size is always 2 * sizeof(int))
97  * StatusMsg: d (size is always 1 * sizeof(int))
98  *            e + status + corenum + sendobjs + receiveobjs 
99  *              (size is always 5 * sizeof(int))
100  *            status: 0 -- stall; 1 -- busy
101  * TerminateMsg: f (size is always 1 * sizeof(int)
102  * MemoryMsg: 10 + size + corenum 
103  *              (size is always 3 * sizeof(int))
104  *           11 + base_va + size 
105  *              (size is always 3 * sizeof(int))
106  * GCMsg: 12/13 (size is always 1 * sizeof(int))
107  *        14 + size of msg + (num of objs to move + (start address 
108  *           + end address + dst core + start dst)+)? 
109  *           + (num of incoming objs + (start dst + orig core)+)? 
110  *           + (num of large obj lists + (start address + lenght 
111  *           + start dst)+)?
112  *        15 (size is always 1 * sizeof(int))
113  *        16 + corenum 
114  *           (size is always 2 * sizeof(int))
115  *        17 + corenum + gcsendobjs + gcreceiveobjs     
116  *           (size if always 4 * sizeof(int))
117  *        18 + corenum + fulfilled blocks num + (finish compact(1) + current
118  *           heap top)/(need mem(0) + mem need) 
119  *           size is always 5 * sizeof(int))
120  *        19 + corenum 
121  *              (size is always 2 * sizeof(int))
122  *        1a (size is always 1 * sizeof(int))
123  *        1b (size if always 1 * sizeof(int))
124  *        1c + size of msg + corenum + gcsendobjs + gcreceiveobjs 
125  *           (size is always 5 * sizeof(int))
126  *        1d + obj's address 
127  *           (size is always 2 * sizeof(int))
128  *        1e + corenum + start addr + end addr
129  *           (size if always 4 * sizeof(int))
130  *        1f + obj's address + corenum 
131  *           (size is always 3 * sizeof(int))
132  *        20 + obj's address + dst address 
133  *           (size if always 3 * sizeof(int))
134  *        21 (size is always 1 * sizeof(int))
135  *        22 + size of msg + corenum + current heap size 
136  *           + (num of large obj lists + (start address + length)+)?
137  *        23 + orig large obj ptr + new large obj ptr 
138  *            (size is always 3 * sizeof(int))
139  */
140 typedef enum {
141         MSGSTART = 0x0,  // 0x0
142         TRANSOBJ,        // 0x1
143         TRANSTALL,       // 0x2
144         LOCKREQUEST,     // 0x3
145         LOCKGROUNT,      // 0x4
146         LOCKDENY,        // 0x5
147         LOCKRELEASE,     // 0x6
148         PROFILEOUTPUT,   // 0x7
149         PROFILEFINISH,   // 0x8
150         REDIRECTLOCK,    // 0x9
151         REDIRECTGROUNT,  // 0xa
152         REDIRECTDENY,    // 0xb
153         REDIRECTRELEASE, // 0xc
154         STATUSCONFIRM,   // 0xd
155         STATUSREPORT,    // 0xe
156         TERMINATE,       // 0xf
157         MEMREQUEST,      // 0x10
158         MEMRESPONSE,     // 0x11
159 #ifdef MULTICORE_GC
160         GCSTARTINIT,     // 0x12
161         GCSTART,         // 0x13
162         GCSTARTCOMPACT,  // 0x14
163         GCSTARTFLUSH,    // 0x15
164         GCFINISHINIT,    // 0x16
165         GCFINISHMARK,    // 0x17
166         GCFINISHCOMPACT, // 0x18
167         GCFINISHFLUSH,   // 0x19
168         GCFINISH,        // 0x1a
169         GCMARKCONFIRM,   // 0x1b
170         GCMARKREPORT,    // 0x1c
171         GCMARKEDOBJ,     // 0x1d
172         GCMOVESTART,     // 0x1e
173         GCMAPREQUEST,    // 0x1f
174         GCMAPINFO,       // 0x20
175         GCLOBJREQUEST,   // 0x21
176         GCLOBJINFO,      // 0x22
177         GCLOBJMAPPING,   // 0x23
178 #endif
179         MSGEND
180 } MSGTYPE;
181
182 /////////////////////////////////////////////////////////////////////////////////
183 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor. 
184 //                           No greater than the number of all the cores in 
185 //                           the processor
186 //       NUMCORES -- number of cores chosen to deploy the application. It can 
187 //                   be greater than that required to fully parallelize the 
188 //                   application. The same as NUMCORES.
189 //       NUMCORESACTIVE -- number of cores that really execute the 
190 //                         application. No greater than NUMCORES
191 //       NUMCORES4GC -- number of cores for gc. No greater than NUMCORES. 
192 //                      NOTE: currently only support ontinuous cores as gc 
193 //                            cores, i.e. 0~NUMCORES4GC-1
194 ////////////////////////////////////////////////////////////////////////////////
195 // data structures of status for termination
196 // only check working cores
197 int corestatus[NUMCORESACTIVE]; // records status of each core
198                                 // 1: running tasks
199                                 // 0: stall
200 int numsendobjs[NUMCORESACTIVE]; // records how many objects a core has sent out
201 int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a core has received
202 volatile int numconfirm;
203 volatile bool waitconfirm;
204 bool busystatus;
205 int self_numsendobjs;
206 int self_numreceiveobjs;
207
208 // get rid of lock msgs for GC version
209 #ifndef MULTICORE_GC
210 // data structures for locking
211 struct RuntimeHash locktable;
212 static struct RuntimeHash* locktbl = &locktable;
213 struct RuntimeHash * lockRedirectTbl;
214 struct RuntimeHash * objRedirectLockTbl;
215 #endif
216 struct LockValue {
217         int redirectlock;
218         int value;
219 };
220 int lockobj;
221 int lock2require;
222 int lockresult;
223 bool lockflag;
224
225 // data structures for waiting objs
226 struct Queue objqueue;
227 struct Queue * totransobjqueue; // queue to hold objs to be transferred
228                                 // should be cleared whenever enter a task
229
230 // data structures for shared memory allocation
231 #define BAMBOO_BASE_VA 0xd000000
232 #ifdef GC_DEBUG
233 #include "structdefs.h"
234 #define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3)
235 #define BAMBOO_PAGE_SIZE (64 * 64)
236 #define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE)
237 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
238 #else
239 #define BAMBOO_NUM_PAGES (64 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5)  3G
240 #define BAMBOO_PAGE_SIZE (16 * 1024)// * 1024)  // (4096)
241 #define BAMBOO_SMEM_SIZE (16 * 1024)
242 #define BAMBOO_SHARED_MEM_SIZE (1024 * 1024 * 1024)
243 //(3.0 * 1024 * 1024 * 1024) // 3G// ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
244 #endif
245
246 #ifdef MULTICORE_GC
247 #include "multicoregarbage.h"
248
249 typedef enum {
250         SMEMLOCAL = 0x0, // 0x0, using local mem only
251         SMEMFIXED,       // 0x1, use local mem in lower address space(1 block only)
252                          //      and global mem in higher address space
253         SMEMMIXED,       // 0x2, like FIXED mode but use a threshold to control
254         SMEMGLOBAL,      // 0x3, using global mem only
255         SMEMEND
256 } SMEMSTRATEGY;
257
258 SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED; 
259                               //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
260
261 struct freeMemItem {
262         INTPTR ptr;
263         int size;
264         int startblock;  
265         int endblock;
266         struct freeMemItem * next;
267 };
268
269 struct freeMemList {
270         struct freeMemItem * head;
271         struct freeMemItem * backuplist;  // hold removed freeMemItem for reuse; 
272                                           // only maintain 1 fremmMemItem
273 };
274
275 struct freeMemList * bamboo_free_mem_list;
276 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
277                           // e.g. 20% of the heap and should not be allocated
278                                                                                                         // otherwise gc is invoked
279 #else
280 volatile mspace bamboo_free_msp;
281 #endif
282 volatile bool smemflag;
283 volatile INTPTR bamboo_cur_msp;
284 volatile int bamboo_smem_size;
285
286 // for test TODO
287 int total_num_t6;
288
289 // data structures for profile mode
290 #ifdef PROFILE
291
292 #define TASKINFOLENGTH 30000
293 //#define INTERRUPTINFOLENGTH 500
294
295 bool stall;
296 //bool isInterrupt;
297 int totalexetime;
298
299 typedef struct task_info {
300   char* taskName;
301   unsigned long long startTime;
302   unsigned long long endTime;
303   unsigned long long exitIndex;
304   struct Queue * newObjs; 
305 } TaskInfo;
306
307 /*typedef struct interrupt_info {
308    int startTime;
309    int endTime;
310    } InterruptInfo;*/
311
312 TaskInfo * taskInfoArray[TASKINFOLENGTH];
313 int taskInfoIndex;
314 bool taskInfoOverflow;
315 /*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
316    int interruptInfoIndex;
317    bool interruptInfoOverflow;*/
318 int profilestatus[NUMCORESACTIVE]; // records status of each core
319                              // 1: running tasks
320                              // 0: stall
321 #endif // #ifdef PROFILE
322
323 #ifndef INTERRUPT
324 bool reside;
325 #endif
326 /////////////////////////////////////////////////////////////
327
328 ////////////////////////////////////////////////////////////
329 // these are functions should be implemented in           //
330 // multicore runtime for any multicore processors         //
331 ////////////////////////////////////////////////////////////
332 #ifdef TASK
333 #ifdef MULTICORE
334 INLINE void initialization(void);
335 INLINE void initCommunication(void);
336 INLINE void fakeExecution(void);
337 INLINE void terminate(void);
338 INLINE void initlock(struct ___Object___ * v);
339
340 // lock related functions
341 bool getreadlock(void* ptr);
342 void releasereadlock(void* ptr);
343 bool getwritelock(void* ptr);
344 void releasewritelock(void* ptr);
345 bool getwritelock_I(void* ptr);
346 void releasewritelock_I(void * ptr);
347 #ifndef MULTICORE_GC
348 void releasewritelock_r(void * lock, void * redirectlock);
349 #endif
350 /* this function is to process lock requests. 
351  * can only be invoked in receiveObject() */
352 // if return -1: the lock request is redirected
353 //            0: the lock request is approved
354 //            1: the lock request is denied
355 INLINE int processlockrequest(int locktype, 
356                                           int lock, 
357                                                                                                                         int obj, 
358                                                                                                                         int requestcore, 
359                                                                                                                         int rootrequestcore, 
360                                                                                                                         bool cache);
361 INLINE void processlockrelease(int locktype, 
362                                            int lock, 
363                                                                                                                          int redirectlock, 
364                                                                                                                          bool redirect);
365
366 // msg related functions
367 INLINE void send_hanging_msg();
368 INLINE void send_msg_1(int targetcore, 
369                                    unsigned long n0);
370 INLINE void send_msg_2(int targetcore, 
371                                    unsigned long n0, 
372                                                                                          unsigned long n1);
373 INLINE void send_msg_3(int targetcore, 
374                                    unsigned long n0, 
375                                                                                          unsigned long n1, 
376                                                                                          unsigned long n2);
377 INLINE void send_msg_4(int targetcore, 
378                                    unsigned long n0, 
379                                                                                          unsigned long n1, 
380                                                                                          unsigned long n2, 
381                                                                                          unsigned long n3);
382 INLINE void send_msg_5(int targetcore, 
383                                    unsigned long n0, 
384                                                                                          unsigned long n1, 
385                                                                                          unsigned long n2, 
386                                                                                          unsigned long n3, 
387                                                                                          unsigned long n4);
388 INLINE void send_msg_6(int targetcore, 
389                                    unsigned long n0, 
390                                                                                          unsigned long n1, 
391                                                                                          unsigned long n2, 
392                                                                                          unsigned long n3, 
393                                                                                          unsigned long n4, 
394                                                                                          unsigned long n5);
395 INLINE void cache_msg_2(int targetcore, 
396                                     unsigned long n0, 
397                                                                                                 unsigned long n1);
398 INLINE void cache_msg_3(int targetcore, 
399                                     unsigned long n0, 
400                                                                                                 unsigned long n1, 
401                                                                                                 unsigned long n2);
402 INLINE void cache_msg_4(int targetcore, 
403                                     unsigned long n0, 
404                                                                                                 unsigned long n1, 
405                                                                                                 unsigned long n2, 
406                                                                                                 unsigned long n3);
407 INLINE void cache_msg_5(int targetcore, 
408                                     unsigned long n0, 
409                                                                                                 unsigned long n1, 
410                                                                                                 unsigned long n2, 
411                                                                                                 unsigned long n3, 
412                                                                                                 unsigned long n4);
413 INLINE void cache_msg_6(int targetcore, 
414                                     unsigned long n0, 
415                                                                                                 unsigned long n1, 
416                                                                                                 unsigned long n2, 
417                                                                                                 unsigned long n3, 
418                                                                                                 unsigned long n4, 
419                                                                                                 unsigned long n5);
420 INLINE void transferObject(struct transObjInfo * transObj);
421 INLINE int receiveMsg(void);
422
423 #ifdef MULTICORE_GC
424 INLINE void transferMarkResults();
425 #endif
426
427 #ifdef PROFILE
428 INLINE void profileTaskStart(char * taskname);
429 INLINE void profileTaskEnd(void);
430 void outputProfileData();
431 #endif  // #ifdef PROFILE
432 ///////////////////////////////////////////////////////////
433
434 /////////////////////////////////////////////////////////////////////////////
435 // For each version of BAMBOO runtime, there should be a header file named //
436 // runtim_arch.h defining following MARCOS:                                //
437 // BAMBOO_TOTALCORE: the total # of cores in the processor                 //
438 // BAMBOO_NUM_OF_CORE: the # of current residing core                      //
439 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core        //
440 // BAMBOO_DEBUGPRINT(x): print out integer x                               //
441 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                 //
442 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of  //
443 //                                whose size in bytes is y on local memory //
444 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory         //
445 // BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of  //
446 //                                whose size in bytes is y on shared memory//
447 // BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE()                               //
448 // BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data        //
449 //                                            structures related to obj    //
450 //                                            queue                        //
451 // BAMBOO_START_CRITICAL_SECTION_STATUS()                                  //
452 // BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures//
453 //                                         related to status data          //
454 // BAMBOO_START_CRITICAL_SECTION_MSG()                                     //
455 // BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures   //
456 //                                      related to msg data                //
457 // BAMBOO_START_CRITICAL_SECTION_LOCK()                                    //
458 // BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures  //
459 //                                       related to lock table             //
460 // BAMBOO_START_CRITICAL_SECTION_MEM()                                     //
461 // BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory        //
462 // BAMBOO_START_CRITICAL_SECTION()                                         //
463 // BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures   //
464 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock      //
465 //                            request response                             //
466 // BAMBOO_CACHE_LINE_SIZE: the cache line size                             //
467 // BAMBOO_CACHE_LINE_MASK: mask for a cache line                           //
468 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with     //
469 //                                 length y                                //
470 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary  //
471 // BAMBOO_EXIT(x): exit routine                                            //
472 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in                //
473 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in            //
474 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number       //
475 //                                                                         //
476 // runtime_arch.h should also define following global parameters:          //
477 // bamboo_cpu2coords: map the cpu # to (x,y) coordinates                   //
478 // bamboo_coords2cpu: map the (x,y) coordinates to cpu #                   //
479 /////////////////////////////////////////////////////////////////////////////
480
481 #endif  // #ifdef MULTICORE
482 #endif  // #ifdef TASK
483 #endif  // #ifndef MULTICORE_RUNTIME