x86/asm/entry/64: Use PUSH instructions to build pt_regs on stack
authorDenys Vlasenko <dvlasenk@redhat.com>
Thu, 19 Mar 2015 17:17:47 +0000 (18:17 +0100)
committerIngo Molnar <mingo@kernel.org>
Tue, 24 Mar 2015 18:42:38 +0000 (19:42 +0100)
With this change, on SYSCALL64 code path we are now populating
pt_regs->cs, pt_regs->ss and pt_regs->rcx unconditionally and
therefore don't need to do that in FIXUP_TOP_OF_STACK.

We lose a number of large instructions there:

    text    data     bss     dec     hex filename
   13298       0       0   13298    33f2 entry_64_before.o
   12978       0       0   12978    32b2 entry_64.o

What's more important, we convert two "MOVQ $imm,off(%rsp)" to
"PUSH $imm" (the ones which fill pt_regs->cs,ss).

Before this patch, placing them on fast path was slowing it down
by two cycles: this form of MOV is very large, 12 bytes, and
this probably reduces decode bandwidth to one instruction per cycle
when CPU sees them.

Therefore they were living in FIXUP_TOP_OF_STACK instead (away
from fast path).

"PUSH $imm" is a small 2-byte instruction. Moving it to fast path does
not slow it down in my measurements.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1426785469-15125-3-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/kernel/entry_64.S

index eae69bba3a2c280a9a1ae9700e4754f99599d24d..3ea4f6d8bc982f7970b79a79607458c486146243 100644 (file)
@@ -126,11 +126,8 @@ ENDPROC(native_usergs_sysret64)
  * manipulation.
  */
        .macro FIXUP_TOP_OF_STACK tmp offset=0
-       movq $__USER_DS,SS+\offset(%rsp)
-       movq $__USER_CS,CS+\offset(%rsp)
-       movq RIP+\offset(%rsp),\tmp  /* get rip */
-       movq \tmp,RCX+\offset(%rsp)  /* copy it to rcx as sysret would do */
-       movq EFLAGS+\offset(%rsp),\tmp /* ditto for rflags->r11 */
+       /* copy flags to r11 as sysret would do */
+       movq EFLAGS+\offset(%rsp),\tmp
        movq \tmp,R11+\offset(%rsp)
        .endm
 
@@ -214,7 +211,6 @@ ENDPROC(native_usergs_sysret64)
  * r9   arg5
  * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
  *
- * Interrupts are off on entry.
  * Only called from user space.
  *
  * When user can change pt_regs->foo always force IRET. That is because
@@ -228,6 +224,12 @@ ENTRY(system_call)
        CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
+
+       /*
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
        SWAPGS_UNSAFE_STACK
        /*
         * A hypervisor implementation might want to use a label
@@ -236,27 +238,35 @@ ENTRY(system_call)
         */
 GLOBAL(system_call_after_swapgs)
 
-       /*
-        * We use 'rsp_scratch' as a scratch register, hence this block must execute
-        * atomically in the face of possible interrupt-driven task preemption,
-        * so we can enable interrupts only after we're done with using rsp_scratch:
-        */
        movq    %rsp,PER_CPU_VAR(rsp_scratch)
        movq    PER_CPU_VAR(kernel_stack),%rsp
-       ALLOC_PT_GPREGS_ON_STACK 6*8 /* 6*8: space for orig_ax and iret frame */
-       movq    %rcx,RIP(%rsp)
-       movq    PER_CPU_VAR(rsp_scratch),%rcx
-       movq    %r11,EFLAGS(%rsp)
-       movq    %rcx,RSP(%rsp)
+
+       /* Construct struct pt_regs on stack */
+       pushq_cfi $__USER_DS                    /* pt_regs->ss */
+       pushq_cfi PER_CPU_VAR(rsp_scratch)      /* pt_regs->sp */
        /*
-        * No need to follow this irqs off/on section - it's straight
-        * and short:
+        * Re-enable interrupts.
+        * We use 'rsp_scratch' as a scratch space, hence irq-off block above
+        * must execute atomically in the face of possible interrupt-driven
+        * task preemption. We must enable interrupts only after we're done
+        * with using rsp_scratch:
         */
        ENABLE_INTERRUPTS(CLBR_NONE)
-       movq_cfi rax,ORIG_RAX
-       SAVE_C_REGS_EXCEPT_RAX_RCX_R11
-       movq    $-ENOSYS,RAX(%rsp)
-       CFI_REL_OFFSET rip,RIP
+       pushq_cfi       %r11                    /* pt_regs->flags */
+       pushq_cfi       $__USER_CS              /* pt_regs->cs */
+       pushq_cfi       %rcx                    /* pt_regs->ip */
+       CFI_REL_OFFSET rip,0
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rcx                     /* pt_regs->cx */
+       pushq_cfi       $-ENOSYS                /* pt_regs->ax */
+       pushq_cfi_reg   r8                      /* pt_regs->r8 */
+       pushq_cfi_reg   r9                      /* pt_regs->r9 */
+       pushq_cfi_reg   r10                     /* pt_regs->r10 */
+       sub     $(7*8),%rsp /* pt_regs->r11,bp,bx,r12-15 not saved */
+
        testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz tracesys
 system_call_fastpath: