powerpc: Reimplement __get_SP() as a function not a define
authorAnton Blanchard <anton@samba.org>
Mon, 13 Oct 2014 08:41:38 +0000 (19:41 +1100)
committerMichael Ellerman <mpe@ellerman.id.au>
Wed, 15 Oct 2014 00:23:19 +0000 (11:23 +1100)
Li Zhong points out an issue with our current __get_SP()
implementation. If ftrace function tracing is enabled (ie -pg
profiling using _mcount) we spill a stack frame on 64bit all the
time.

If a function calls __get_SP() and later calls a function that is
tail call optimised, we will pop the stack frame and the value
returned by __get_SP() is no longer valid. An example from Li can
be found in save_stack_trace -> save_context_stack:

c0000000000432c0 <.save_stack_trace>:
c0000000000432c0:       mflr    r0
c0000000000432c4:       std     r0,16(r1)
c0000000000432c8:       stdu    r1,-128(r1) <-- stack frame for _mcount
c0000000000432cc:       std     r3,112(r1)
c0000000000432d0:       bl      <._mcount>
c0000000000432d4:       nop

c0000000000432d8:       mr      r4,r1 <-- __get_SP()

c0000000000432dc:       ld      r5,632(r13)
c0000000000432e0:       ld      r3,112(r1)
c0000000000432e4:       li      r6,1

c0000000000432e8:       addi    r1,r1,128 <-- pop stack frame

c0000000000432ec:       ld      r0,16(r1)
c0000000000432f0:       mtlr    r0
c0000000000432f4:       b       <.save_context_stack> <-- tail call optimized

save_context_stack ends up with a stack pointer below the current
one, and it is likely to be scribbled over.

Fix this by making __get_SP() a function which returns the
callers stack frame. Also replace inline assembly which grabs
the stack pointer in save_stack_trace and show_stack with
__get_SP().

This also fixes an issue with perf_arch_fetch_caller_regs().
It currently unwinds the stack once, which will skip a
valid stack frame on a leaf function. With the __get_SP() fixes
in this patch, we never need to unwind the stack frame to get
to the first interesting frame.

We have to export __get_SP() because perf_arch_fetch_caller_regs()
(which is used in modules) calls it from a header file.

Reported-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/perf_event.h
arch/powerpc/include/asm/reg.h
arch/powerpc/kernel/misc.S
arch/powerpc/kernel/ppc_ksyms.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/stacktrace.c

index 0bb23725b1e721a0d5e89f23db233f15b8a9b3ac..b058568a850d63ca52af55c969bf80298a7ecaa5 100644 (file)
@@ -34,7 +34,7 @@
        do {                                                    \
                (regs)->result = 0;                             \
                (regs)->nip = __ip;                             \
-               (regs)->gpr[1] = *(unsigned long *)__get_SP();  \
+               (regs)->gpr[1] = __get_SP();                    \
                asm volatile("mfmsr %0" : "=r" ((regs)->msr));  \
        } while (0)
 #endif
index fe3f9488f321e5ec20448ac92c6e6a3e16d4fc77..e539d7e714513df85e92ef6b1b8a9e52af61da7d 100644 (file)
@@ -1265,8 +1265,7 @@ static inline unsigned long mfvtb (void)
 
 #define proc_trap()    asm volatile("trap")
 
-#define __get_SP()     ({unsigned long sp; \
-                       asm volatile("mr %0,1": "=r" (sp)); sp;})
+extern unsigned long __get_SP(void);
 
 extern unsigned long scom970_read(unsigned int address);
 extern void scom970_write(unsigned int address, unsigned long value);
index 7ce26d45777ef1d5497ab778eff937b11204aef3..120deb713bc813d36189488a668fdd7192f18aba 100644 (file)
@@ -114,3 +114,7 @@ _GLOBAL(longjmp)
        mtlr    r0
        mr      r3,r4
        blr
+
+_GLOBAL(__get_SP)
+       PPC_LL  r3,0(r1)
+       blr
index c4dfff6c2719ca5bfc6a17c9faf204fa836cad7f..9d84efbd7b7a8b6a8445bc2a62040e61793edef0 100644 (file)
@@ -41,3 +41,5 @@ EXPORT_SYMBOL(giveup_spe);
 #ifdef CONFIG_EPAPR_PARAVIRT
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
+
+EXPORT_SYMBOL(__get_SP);
index aa1df89c8b2a8165411d588af394ea85f833265b..3cc64398810177fbcdf0f60178549ddedd49da44 100644 (file)
@@ -1545,7 +1545,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
                tsk = current;
        if (sp == 0) {
                if (tsk == current)
-                       asm("mr %0,1" : "=r" (sp));
+                       sp = __get_SP();
                else
                        sp = tsk->thread.ksp;
        }
index 3d30ef1038e5e285f1fe43f1d85ace5b1c3463b4..7f65baec29f64eb873d0c0c831da34011d5f2063 100644 (file)
@@ -50,7 +50,7 @@ void save_stack_trace(struct stack_trace *trace)
 {
        unsigned long sp;
 
-       asm("mr %0,1" : "=r" (sp));
+       sp = __get_SP();
 
        save_context_stack(trace, sp, current, 1);
 }