bug fix in scheduling and multicore support for tags
[IRC.git] / Robust / src / Runtime / multicoreruntime.h
1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
3
4 #ifndef INLINE
5 #define INLINE    inline __attribute__((always_inline))
6 #endif
7
8 ////////////////////////////////////////////////////////////////
9 // global variables                                          //
10 ///////////////////////////////////////////////////////////////
11
12 // data structures for msgs
13 #define BAMBOO_OUT_BUF_LENGTH 300
14 #define BAMBOO_MSG_BUF_LENGTH 30
15 int msgdata[BAMBOO_MSG_BUF_LENGTH];
16 int msgtype;
17 int msgdataindex;
18 int msglength;
19 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
20 int outmsgindex;
21 int outmsglast;
22 int outmsgleft;
23 bool isMsgHanging;
24 volatile bool isMsgSending;
25
26 #define OUTMSG_INDEXINC() \
27         outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
28
29 #define OUTMSG_LASTINDEXINC() \
30         outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
31         if(outmsglast == outmsgindex) { \
32                 BAMBOO_EXIT(0xd001); \
33         } 
34
35 #define OUTMSG_CACHE(n) \
36         outmsgdata[outmsglast] = (n); \
37   OUTMSG_LASTINDEXINC(); 
38
39 /* Message format:
40  *      type + Msgbody
41  * type: 0 -- transfer object
42  *       1 -- transfer stall msg
43  *       2 -- lock request
44  *       3 -- lock grount
45  *       4 -- lock deny
46  *       5 -- lock release
47  *       // add for profile info
48  *       6 -- transfer profile output msg
49  *       7 -- transfer profile output finish msg
50  *       // add for alias lock strategy
51  *       8 -- redirect lock request
52  *       9 -- lock grant with redirect info
53  *       a -- lock deny with redirect info
54  *       b -- lock release with redirect info
55  *       c -- status confirm request
56  *       d -- status report msg
57  *       e -- terminate
58  *       f -- requiring for new memory
59  *      10 -- response for new memory request
60  *      11 -- GC init phase start
61  *      12 -- GC start
62  *      13 -- compact phase start
63  *      14 -- flush phase start
64  *      15 -- init phase finish
65  *      16 -- mark phase finish
66  *      17 -- compact phase finish
67  *      18 -- flush phase finish
68  *      19 -- GC finish
69  *      1a -- marked phase finish confirm request
70  *      1b -- marked phase finish confirm response
71  *      1c -- markedObj msg
72  *      1d -- start moving objs msg
73  *      1e -- ask for mapping info of a markedObj
74  *      1f -- mapping info of a markedObj
75  *      20 -- large objs info request
76  *      21 -- large objs info response
77  *      22 -- large objs mapping info
78  *
79  * ObjMsg: 0 + size of msg + obj's address + (task index + param index)+
80  * StallMsg: 1 + corenum + sendobjs + receiveobjs 
81  *             (size is always 4 * sizeof(int))
82  * LockMsg: 2 + lock type + obj pointer + lock + request core 
83  *            (size is always 5 * sizeof(int))
84  *          3/4/5 + lock type + obj pointer + lock 
85  *            (size is always 4 * sizeof(int))
86  *          8 + lock type + obj pointer +  redirect lock + root request core 
87  *            + request core 
88  *            (size is always 6 * sizeof(int))
89  *          9/a + lock type + obj pointer + redirect lock 
90  *              (size is always 4 * sizeof(int))
91  *          b + lock type + lock + redirect lock 
92  *            (size is always 4 * sizeof(int))
93  *          lock type: 0 -- read; 1 -- write
94  * ProfileMsg: 6 + totalexetime 
95  *               (size is always 2 * sizeof(int))
96  *             7 + corenum 
97  *               (size is always 2 * sizeof(int))
98  * StatusMsg: c (size is always 1 * sizeof(int))
99  *            d + status + corenum + sendobjs + receiveobjs 
100  *              (size is always 5 * sizeof(int))
101  *            status: 0 -- stall; 1 -- busy
102  * TerminateMsg: e (size is always 1 * sizeof(int)
103  * MemoryMsg: f + size + corenum 
104  *              (size is always 3 * sizeof(int))
105  *           10 + base_va + size 
106  *              (size is always 3 * sizeof(int))
107  * GCMsg: 11/12 (size is always 1 * sizeof(int))
108  *        13 + size of msg + (num of objs to move + (start address 
109  *           + end address + dst core + start dst)+)? 
110  *           + (num of incoming objs + (start dst + orig core)+)? 
111  *           + (num of large obj lists + (start address + lenght 
112  *           + start dst)+)?
113  *        14 (size is always 1 * sizeof(int))
114  *        15 + corenum 
115  *           (size is always 2 * sizeof(int))
116  *        16 + corenum + gcsendobjs + gcreceiveobjs     
117  *           (size if always 4 * sizeof(int))
118  *        17 + corenum + fulfilled blocks num + (finish compact(1) + current
119  *           heap top)/(need mem(0) + mem need) 
120  *           size is always 5 * sizeof(int))
121  *        18 + corenum 
122  *              (size is always 2 * sizeof(int))
123  *        19 (size is always 1 * sizeof(int))
124  *        1a (size if always 1 * sizeof(int))
125  *        1b + size of msg + corenum + gcsendobjs + gcreceiveobjs 
126  *           (size is always 5 * sizeof(int))
127  *        1c + obj's address 
128  *           (size is always 2 * sizeof(int))
129  *        1d + corenum + start addr + end addr
130  *           (size if always 4 * sizeof(int))
131  *        1e + obj's address + corenum 
132  *           (size is always 3 * sizeof(int))
133  *        1f + obj's address + dst address 
134  *           (size if always 3 * sizeof(int))
135  *        20 (size is always 1 * sizeof(int))
136  *        21 + size of msg + corenum + current heap size 
137  *           + (num of large obj lists + (start address + length)+)?
138  *        22 + orig large obj ptr + new large obj ptr 
139  *            (size is always 3 * sizeof(int))
140  */
141 typedef enum {
142         TRANSOBJ = 0x0,  // 0x0
143         TRANSTALL,       // 0x1
144         LOCKREQUEST,     // 0x2
145         LOCKGROUNT,      // 0x3
146         LOCKDENY,        // 0x4
147         LOCKRELEASE,     // 0x5
148         PROFILEOUTPUT,   // 0x6
149         PROFILEFINISH,   // 0x7
150         REDIRECTLOCK,    // 0x8
151         REDIRECTGROUNT,  // 0x9
152         REDIRECTDENY,    // 0xa
153         REDIRECTRELEASE, // 0xb
154         STATUSCONFIRM,   // 0xc
155         STATUSREPORT,    // 0xd
156         TERMINATE,       // 0xe
157         MEMREQUEST,      // 0xf
158         MEMRESPONSE,     // 0x10
159 #ifdef MULTICORE_GC
160         GCSTARTINIT,     // 0x11
161         GCSTART,         // 0x12
162         GCSTARTCOMPACT,  // 0x13
163         GCSTARTFLUSH,    // 0x14
164         GCFINISHINIT,    // 0x15
165         GCFINISHMARK,    // 0x16
166         GCFINISHCOMPACT, // 0x17
167         GCFINISHFLUSH,   // 0x18
168         GCFINISH,        // 0x19
169         GCMARKCONFIRM,   // 0x1a
170         GCMARKREPORT,    // 0x1b
171         GCMARKEDOBJ,     // 0x1c
172         GCMOVESTART,     // 0x1d
173         GCMAPREQUEST,    // 0x1e
174         GCMAPINFO,       // 0x1f
175         GCLOBJREQUEST,   // 0x20
176         GCLOBJINFO,      // 0x21
177         GCLOBJMAPPING,   // 0x22
178 #endif
179         MSGEND
180 } MSGTYPE;
181
182 // data structures of status for termination
183 int corestatus[NUMCORES]; // records status of each core
184                           // 1: running tasks
185                           // 0: stall
186 int numsendobjs[NUMCORES]; // records how many objects a core has sent out
187 int numreceiveobjs[NUMCORES]; // records how many objects a core has received
188 volatile int numconfirm;
189 volatile bool waitconfirm;
190 bool busystatus;
191 int self_numsendobjs;
192 int self_numreceiveobjs;
193
194 // get rid of lock msgs for GC version
195 #ifndef MULTICORE_GC
196 // data structures for locking
197 struct RuntimeHash locktable;
198 static struct RuntimeHash* locktbl = &locktable;
199 struct RuntimeHash * lockRedirectTbl;
200 struct RuntimeHash * objRedirectLockTbl;
201 #endif
202 struct LockValue {
203         int redirectlock;
204         int value;
205 };
206 int lockobj;
207 int lock2require;
208 int lockresult;
209 bool lockflag;
210
211 // data structures for waiting objs
212 struct Queue objqueue;
213 struct Queue * totransobjqueue; // queue to hold objs to be transferred
214                                 // should be cleared whenever enter a task
215
216 // data structures for shared memory allocation
217 #define BAMBOO_BASE_VA 0xd000000
218 #ifdef GC_DEBUG
219 #include "structdefs.h"
220 #define BAMBOO_NUM_PAGES (NUMCORES*(2+1)+3)
221 #define BAMBOO_PAGE_SIZE (64 * 64)
222 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
223 #else
224 #define BAMBOO_NUM_PAGES (1024 * 512 * 4)
225 #define BAMBOO_PAGE_SIZE (4096)
226 #define BAMBOO_SMEM_SIZE (16 * BAMBOO_PAGE_SIZE)
227 #endif
228 #define BAMBOO_SHARED_MEM_SIZE (BAMBOO_PAGE_SIZE * BAMBOO_NUM_PAGES)
229
230 #ifdef MULTICORE_GC
231 #include "multicoregarbage.h"
232
233 typedef enum {
234         SMEMLOCAL = 0x0, // 0x0, using local mem only
235         SMEMFIXED,       // 0x1, use local mem in lower address space(1 block only)
236                          //      and global mem in higher address space
237         SMEMMIXED,        // 0x2, like FIXED mode but use a threshold to control
238         SMEMGLOBAL,       // 0x3, using global mem only
239         SMEMEND
240 } SMEMSTRATEGY;
241
242 SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED; 
243                               //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
244
245 struct freeMemItem {
246         INTPTR ptr;
247         int size;
248         int startblock;  
249         int endblock;
250         struct freeMemItem * next;
251 };
252
253 struct freeMemList {
254         struct freeMemItem * head;
255         struct freeMemItem * tail;
256 };
257
258 struct freeMemList * bamboo_free_mem_list;
259 #else
260 volatile mspace bamboo_free_msp;
261 #endif
262 volatile bool smemflag;
263 volatile INTPTR bamboo_cur_msp;
264 volatile int bamboo_smem_size;
265
266 // for test TODO
267 int total_num_t6;
268
269 // data structures for profile mode
270 #ifdef PROFILE
271
272 #define TASKINFOLENGTH 30000
273 //#define INTERRUPTINFOLENGTH 500
274
275 bool stall;
276 //bool isInterrupt;
277 int totalexetime;
278
279 typedef struct task_info {
280   char* taskName;
281   unsigned long long startTime;
282   unsigned long long endTime;
283   unsigned long long exitIndex;
284   struct Queue * newObjs; 
285 } TaskInfo;
286
287 /*typedef struct interrupt_info {
288    int startTime;
289    int endTime;
290    } InterruptInfo;*/
291
292 TaskInfo * taskInfoArray[TASKINFOLENGTH];
293 int taskInfoIndex;
294 bool taskInfoOverflow;
295 /*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
296    int interruptInfoIndex;
297    bool interruptInfoOverflow;*/
298 int profilestatus[NUMCORES]; // records status of each core
299                              // 1: running tasks
300                              // 0: stall
301 #endif // #ifdef PROFILE
302
303 #ifndef INTERRUPT
304 bool reside;
305 #endif
306 /////////////////////////////////////////////////////////////
307
308 ////////////////////////////////////////////////////////////
309 // these are functions should be implemented in           //
310 // multicore runtime for any multicore processors         //
311 ////////////////////////////////////////////////////////////
312 #ifdef TASK
313 #ifdef MULTICORE
314 INLINE void initialization(void);
315 INLINE void initCommunication(void);
316 INLINE void fakeExecution(void);
317 INLINE void terminate(void);
318 INLINE void initlock(struct ___Object___ * v);
319
320 // lock related functions
321 bool getreadlock(void* ptr);
322 void releasereadlock(void* ptr);
323 bool getwritelock(void* ptr);
324 void releasewritelock(void* ptr);
325 bool getwritelock_I(void* ptr);
326 void releasewritelock_I(void * ptr);
327 #ifndef MULTICORE_GC
328 void releasewritelock_r(void * lock, void * redirectlock);
329 #endif
330 /* this function is to process lock requests. 
331  * can only be invoked in receiveObject() */
332 // if return -1: the lock request is redirected
333 //            0: the lock request is approved
334 //            1: the lock request is denied
335 INLINE int processlockrequest(int locktype, 
336                                           int lock, 
337                                                                                                                         int obj, 
338                                                                                                                         int requestcore, 
339                                                                                                                         int rootrequestcore, 
340                                                                                                                         bool cache);
341 INLINE void processlockrelease(int locktype, 
342                                            int lock, 
343                                                                                                                          int redirectlock, 
344                                                                                                                          bool redirect);
345
346 // msg related functions
347 INLINE void send_hanging_msg();
348 INLINE void send_msg_1(int targetcore, 
349                                    unsigned long n0);
350 INLINE void send_msg_2(int targetcore, 
351                                    unsigned long n0, 
352                                                                                          unsigned long n1);
353 INLINE void send_msg_3(int targetcore, 
354                                    unsigned long n0, 
355                                                                                          unsigned long n1, 
356                                                                                          unsigned long n2);
357 INLINE void send_msg_4(int targetcore, 
358                                    unsigned long n0, 
359                                                                                          unsigned long n1, 
360                                                                                          unsigned long n2, 
361                                                                                          unsigned long n3);
362 INLINE void send_msg_5(int targetcore, 
363                                    unsigned long n0, 
364                                                                                          unsigned long n1, 
365                                                                                          unsigned long n2, 
366                                                                                          unsigned long n3, 
367                                                                                          unsigned long n4);
368 INLINE void send_msg_6(int targetcore, 
369                                    unsigned long n0, 
370                                                                                          unsigned long n1, 
371                                                                                          unsigned long n2, 
372                                                                                          unsigned long n3, 
373                                                                                          unsigned long n4, 
374                                                                                          unsigned long n5);
375 INLINE void cache_msg_2(int targetcore, 
376                                     unsigned long n0, 
377                                                                                                 unsigned long n1);
378 INLINE void cache_msg_3(int targetcore, 
379                                     unsigned long n0, 
380                                                                                                 unsigned long n1, 
381                                                                                                 unsigned long n2);
382 INLINE void cache_msg_4(int targetcore, 
383                                     unsigned long n0, 
384                                                                                                 unsigned long n1, 
385                                                                                                 unsigned long n2, 
386                                                                                                 unsigned long n3);
387 INLINE void cache_msg_5(int targetcore, 
388                                     unsigned long n0, 
389                                                                                                 unsigned long n1, 
390                                                                                                 unsigned long n2, 
391                                                                                                 unsigned long n3, 
392                                                                                                 unsigned long n4);
393 INLINE void cache_msg_6(int targetcore, 
394                                     unsigned long n0, 
395                                                                                                 unsigned long n1, 
396                                                                                                 unsigned long n2, 
397                                                                                                 unsigned long n3, 
398                                                                                                 unsigned long n4, 
399                                                                                                 unsigned long n5);
400 INLINE void transferObject(struct transObjInfo * transObj);
401 INLINE int receiveMsg(void);
402
403 #ifdef MULTICORE_GC
404 INLINE void transferMarkResults();
405 #endif
406
407 #ifdef PROFILE
408 INLINE void profileTaskStart(char * taskname);
409 INLINE void profileTaskEnd(void);
410 void outputProfileData();
411 #endif  // #ifdef PROFILE
412 ///////////////////////////////////////////////////////////
413
414 /////////////////////////////////////////////////////////////////////////////
415 // For each version of BAMBOO runtime, there should be a header file named //
416 // runtim_arch.h defining following MARCOS:                                //
417 // BAMBOO_TOTALCORE: the total # of cores available in the processor       //
418 // BAMBOO_NUM_OF_CORE: the # of current residing core                      //
419 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core        //
420 // BAMBOO_DEBUGPRINT(x): print out integer x                               //
421 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                 //
422 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of  //
423 //                                whose size in bytes is y on local memory //
424 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory         //
425 // BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of  //
426 //                                whose size in bytes is y on shared memory//
427 // BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE()                               //
428 // BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data        //
429 //                                            structures related to obj    //
430 //                                            queue                        //
431 // BAMBOO_START_CRITICAL_SECTION_STATUS()                                  //
432 // BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures//
433 //                                         related to status data          //
434 // BAMBOO_START_CRITICAL_SECTION_MSG()                                     //
435 // BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures   //
436 //                                      related to msg data                //
437 // BAMBOO_START_CRITICAL_SECTION_LOCK()                                    //
438 // BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures  //
439 //                                       related to lock table             //
440 // BAMBOO_START_CRITICAL_SECTION_MEM()                                     //
441 // BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory        //
442 // BAMBOO_START_CRITICAL_SECTION()                                         //
443 // BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures   //
444 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock      //
445 //                            request response                             //
446 // BAMBOO_CACHE_LINE_SIZE: the cache line size                             //
447 // BAMBOO_CACHE_LINE_MASK: mask for a cache line                           //
448 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with     //
449 //                                 length y                                //
450 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary  //
451 // BAMBOO_EXIT(x): exit routine                                            //
452 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in                //
453 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in            //
454 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number       //
455 /////////////////////////////////////////////////////////////////////////////
456
457 #endif  // #ifdef MULTICORE
458 #endif  // #ifdef TASK
459 #endif  // #ifndef MULTICORE_RUNTIME