/*
* Start with "sp" this many bytes below the top of the kernel stack.
- * This preserves the invariant that a called function may write to *sp.
+ * This allows us to be cache-aware when handling the initial save
+ * of the pt_regs value to the stack.
*/
-#define STACK_TOP_DELTA 8
+#define STACK_TOP_DELTA 64
/*
* When entering the kernel via a fault, start with the top of the
unsigned long get_wchan(struct task_struct *p);
/* Return initial ksp value for given task. */
-#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE)
+#define task_ksp0(task) \
+ ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA)
/* Return some info about the user process TASK. */
-#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA)
#define task_pt_regs(task) \
- ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+ ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
#define current_pt_regs() \
- ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
- (KSTK_PTREGS_GAP - 1)) - 1)
+ ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
+ STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1)
#define task_sp(task) (task_pt_regs(task)->sp)
#define task_pc(task) (task_pt_regs(task)->pc)
/* Aliases for pc and sp (used in fs/proc/array.c) */
#define KERNEL_PL CONFIG_KERNEL_PL
/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
-#define CPU_LOG_MASK_VALUE 12
-#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
-#if CONFIG_NR_CPUS > CPU_MASK_VALUE
-# error Too many cpus!
+#ifdef __tilegx__
+#define CPU_SHIFT 48
+#if CHIP_VA_WIDTH() > CPU_SHIFT
+# error Too many VA bits!
#endif
+#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1)
#define raw_smp_processor_id() \
- ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE)
+ ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT))
#define get_current_ksp0() \
- (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE)
+ ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \
+ (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT)))
+#define next_current_ksp0(task) ({ \
+ unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \
+ unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \
+ __ksp0 | __cpu; \
+})
+#else
+#define LOG2_NR_CPU_IDS 6
+#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1)
+#define raw_smp_processor_id() \
+ ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID)
+#define get_current_ksp0() \
+ (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID)
#define next_current_ksp0(task) ({ \
unsigned long __ksp0 = task_ksp0(task); \
int __cpu = raw_smp_processor_id(); \
- BUG_ON(__ksp0 & CPU_MASK_VALUE); \
+ BUG_ON(__ksp0 & MAX_CPU_ID); \
__ksp0 | __cpu; \
})
+#endif
+#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1)
+# error Too many cpus!
+#endif
#endif /* _ASM_TILE_PROCESSOR_H */
/*
* Load up our per-cpu offset. When the first (master) tile
* boots, this value is still zero, so we will load boot_pc
- * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * with start_kernel, and boot_sp at the top of init_stack.
* The master tile initializes the per-cpu offset array, so that
* when subsequent (secondary) tiles boot, they will instead load
* from their per-cpu versions of boot_sp and boot_pc.
lw sp, r1
or r4, sp, r4
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
- addi sp, sp, -STACK_TOP_DELTA
{
move lr, zero /* stop backtraces in the called function */
jr r0
/*
* Load up our per-cpu offset. When the first (master) tile
* boots, this value is still zero, so we will load boot_pc
- * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * with start_kernel, and boot_sp with at the top of init_stack.
* The master tile initializes the per-cpu offset array, so that
* when subsequent (secondary) tiles boot, they will instead load
* from their per-cpu versions of boot_sp and boot_pc.
}
ld r0, r0
ld sp, r1
- or r4, sp, r4
+ shli r4, r4, CPU_SHIFT
+ bfins r4, sp, 0, CPU_SHIFT-1
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
- addi sp, sp, -STACK_TOP_DELTA
{
move lr, zero /* stop backtraces in the called function */
jr r0
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, r0, zero, LOG2_THREAD_SIZE, 31
+ mm r0, r0, zero, LOG2_NR_CPU_IDS, 31
0:
/*
* cache line 1: r14...r29
* cache line 0: 2 x frame, r0..r13
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
}
{
auli r21, r21, ha16(__per_cpu_offset)
- mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1
+ mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1
}
s2a r20, r20, r21
lw tp, r20
mfspr r3, SPR_SYSTEM_SAVE_K_0
/* Get &thread_info->unalign_jit_tmp[0] in r3. */
+ bfexts r3, r3, 0, CPU_SHIFT-1
mm r3, zero, LOG2_THREAD_SIZE, 63
-#if THREAD_SIZE < 65536
- addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
-#else
- addli r3, r3, -(PAGE_SIZE/2)
- addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
-#endif
+ addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET
/*
* Save r0, r1, r2 into thread_info array r3 points to
2:
/*
- * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
- * the current stack top in the higher bits. So we recover
- * our stack top by just masking off the low bits, then
+ * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and
+ * the current stack top in the lower bits. So we recover
+ * our starting stack value by sign-extending the low bits, then
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, zero, LOG2_THREAD_SIZE, 63
+ bfexts r0, r0, 0, CPU_SHIFT-1
0:
/*
* cache line 1: r6...r13
* cache line 0: 2 x frame, r0..r5
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
}
{
shl16insli r21, r21, hw1(__per_cpu_offset)
- bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
+ bfextu r20, r20, CPU_SHIFT, 63
}
shl16insli r21, r21, hw0(__per_cpu_offset)
shl3add r20, r20, r21
{
int cpu = raw_smp_processor_id();
unsigned long ksp0 = get_current_ksp0();
- unsigned long ksp0_base = ksp0 - THREAD_SIZE;
+ unsigned long ksp0_base = ksp0 & -THREAD_SIZE;
unsigned long sp = stack_pointer;
if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) {
- pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n"
+ pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n"
" sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
- cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
}
else if (sp < ksp0_base + sizeof(struct thread_info)) {
- pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n"
+ pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n"
" sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
- cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
}
}