x86: Emit LAHF/SAHF instead of PUSHF/POPF

author JF Bastien <jfb@google.com>

Mon, 10 Aug 2015 20:59:36 +0000 (20:59 +0000)

committer JF Bastien <jfb@google.com>

Mon, 10 Aug 2015 20:59:36 +0000 (20:59 +0000)
author JF Bastien <jfb@google.com>
Mon, 10 Aug 2015 20:59:36 +0000 (20:59 +0000)
committer JF Bastien <jfb@google.com>
Mon, 10 Aug 2015 20:59:36 +0000 (20:59 +0000)
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp

index cd820ee1ac526f2efff9c62f800c3839f9592f41..f6e45a4b7c7db4550a0f50388aae322440e9fde3 100644 (file)
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -310,7 +310,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
      if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
        continue;
  
-    bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
+    bool IsRegOrSuperReg = MOReg == Reg || TRI->isSuperRegister(MOReg, Reg);
      bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
  
      if (IsRegOrSuperReg && MO.readsReg()) {
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index d65c2d737643e193f20110ff2681584185bbe090..d79806e664bb14afd0b2192104fc73dd1c240df3 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3903,34 +3903,59 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      return;
    }
  
-  // Moving EFLAGS to / from another register requires a push and a pop.
-  // Notice that we have to adjust the stack if we don't want to clobber the
-  // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
-  if (SrcReg == X86::EFLAGS) {
-    if (X86::GR64RegClass.contains(DestReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSHF64));
-      BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
-      return;
+  bool FromEFLAGS = SrcReg == X86::EFLAGS;
+  bool ToEFLAGS = DestReg == X86::EFLAGS;
+  int Reg = FromEFLAGS ? DestReg : SrcReg;
+  bool is32 = X86::GR32RegClass.contains(Reg);
+  bool is64 = X86::GR64RegClass.contains(Reg);
+  if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
+    // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
+    // inefficient. Instead:
+    //   - Save the overflow flag OF into AL using SETO, and restore it using a
+    //     signed 8-bit addition of AL and INT8_MAX.
+    //   - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
+    //     using LAHF/SAHF.
+    //   - When RAX/EAX is live and isn't the destination register, make sure it
+    //     isn't clobbered by PUSH/POP'ing it before and after saving/restoring
+    //     the flags.
+    // This approach is ~2.25x faster than using PUSHF/POPF.
+    //
+    // This is still somewhat inefficient because we don't know which flags are
+    // actually live inside EFLAGS. Were we able to do a single SETcc instead of
+    // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
+    //
+    // PUSHF/POPF is also potentially incorrect because it affects other flags
+    // such as TF/IF/DF, which LLVM doesn't model.
+    //
+    // Notice that we have to adjust the stack if we don't want to clobber the
+    // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
+
+    int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
+    int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
+    int Pop = is64 ? X86::POP64r : X86::POP32r;
+    int AX = is64 ? X86::RAX : X86::EAX;
+
+    bool AXDead = (Reg == AX) ||
+                  (MachineBasicBlock::LQR_Dead ==
+                   MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+
+    if (!AXDead)
+      BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
+    if (FromEFLAGS) {
+      BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
+      BuildMI(MBB, MI, DL, get(X86::LAHF));
+      BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
      }
-    if (X86::GR32RegClass.contains(DestReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSHF32));
-      BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
-      return;
-    }
-  }
-  if (DestReg == X86::EFLAGS) {
-    if (X86::GR64RegClass.contains(SrcReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSH64r))
-        .addReg(SrcReg, getKillRegState(KillSrc));
-      BuildMI(MBB, MI, DL, get(X86::POPF64));
-      return;
-    }
-    if (X86::GR32RegClass.contains(SrcReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSH32r))
-        .addReg(SrcReg, getKillRegState(KillSrc));
-      BuildMI(MBB, MI, DL, get(X86::POPF32));
-      return;
+    if (ToEFLAGS) {
+      BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
+      BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
+          .addReg(X86::AL)
+          .addImm(INT8_MAX);
+      BuildMI(MBB, MI, DL, get(X86::SAHF));
      }
+    if (!AXDead)
+      BuildMI(MBB, MI, DL, get(Pop), AX);
+    return;
    }
  
    DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll

index 61123930887b613a38a1e4aa80270ed9331177ed..c129128b5fa78996c295c2c913fe0e29cd54d67d 100644 (file)
--- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,24 +1,58 @@
-; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
  
-declare i32 @bar()
+declare i32 @foo()
+declare i32 @bar(i64)
  
  define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
-; CHECK-LABEL: test_intervening_call:
-; CHECK: cmpxchg
-; CHECK: pushf[[LQ:[lq]]]
-; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
+; i386-LABEL: test_intervening_call:
+; i386: cmpxchg8b
+; i386-NEXT: pushl %eax
+; i386-NEXT: seto %al
+; i386-NEXT: lahf
+; i386-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386-NEXT: popl %eax
+; i386-NEXT: movl %edx, 4(%esp)
+; i386-NEXT: movl %eax, (%esp)
+; i386-NEXT: calll bar
+; i386-NEXT: movl [[FLAGS]], %eax
+; i386-NEXT: addb $127, %al
+; i386-NEXT: sahf
+; i386-NEXT: jne
+
+; i386f-LABEL: test_intervening_call:
+; i386f: cmpxchg8b
+; i386f-NEXT: movl %eax, (%esp)
+; i386f-NEXT: movl %edx, 4(%esp)
+; i386f-NEXT: seto %al
+; i386f-NEXT: lahf
+; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386f-NEXT: calll bar
+; i386f-NEXT: movl [[FLAGS]], %eax
+; i386f-NEXT: addb $127, %al
+; i386f-NEXT: sahf
+; i386f-NEXT: jne
+
+; x8664-LABEL: test_intervening_call:
+; x8664: cmpxchgq
+; x8664: pushq %rax
+; x8664-NEXT: seto %al
+; x8664-NEXT: lahf
+; x8664-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-NEXT: popq %rax
+; x8664-NEXT: movq %rax, %rdi
+; x8664-NEXT: callq bar
+; x8664-NEXT: movq [[FLAGS]], %rax
+; x8664-NEXT: addb $127, %al
+; x8664-NEXT: sahf
+; x8664-NEXT: jne
  
-; CHECK-NEXT: call[[LQ]] bar
-
-; CHECK-NEXT: push[[LQ]] [[FLAGS]]
-; CHECK-NEXT: popf[[LQ]]
-; CHECK-NEXT: jne
    %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+  %v = extractvalue { i64, i1 } %cx, 0
    %p = extractvalue { i64, i1 } %cx, 1
-  call i32 @bar()
+  call i32 @bar(i64 %v)
    br i1 %p, label %t, label %f
  
  t:
@@ -30,10 +64,18 @@ f:
  
  ; Interesting in producing a clobber without any function calls.
  define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
-; CHECK-LABEL: test_control_flow:
+; i386-LABEL: test_control_flow:
+; i386: cmpxchg
+; i386-NEXT: jne
+
+; i386f-LABEL: test_control_flow:
+; i386f: cmpxchg
+; i386f-NEXT: jne
+
+; x8664-LABEL: test_control_flow:
+; x8664: cmpxchg
+; x8664-NEXT: jne
  
-; CHECK: cmpxchg
-; CHECK-NEXT: jne
  entry:
    %cmp = icmp sgt i32 %i, %j
    br i1 %cmp, label %loop_start, label %cond.end
@@ -67,20 +109,46 @@ cond.end:
  ; This one is an interesting case because CMOV doesn't have a chain
  ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
  define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
-; CHECK-LABEL: test_feed_cmov:
-
-; CHECK: cmpxchg
-; CHECK: pushf[[LQ:[lq]]]
-; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
-
-; CHECK-NEXT: call[[LQ]] bar
+; i386-LABEL: test_feed_cmov:
+; i386: cmpxchgl
+; i386-NEXT: seto %al
+; i386-NEXT: lahf
+; i386-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386-NEXT: calll foo
+; i386-NEXT: pushl %eax
+; i386-NEXT: movl [[FLAGS]], %eax
+; i386-NEXT: addb $127, %al
+; i386-NEXT: sahf
+; i386-NEXT: popl %eax
+
+; i386f-LABEL: test_feed_cmov:
+; i386f: cmpxchgl
+; i386f-NEXT: seto %al
+; i386f-NEXT: lahf
+; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386f-NEXT: calll foo
+; i386f-NEXT: pushl %eax
+; i386f-NEXT: movl [[FLAGS]], %eax
+; i386f-NEXT: addb $127, %al
+; i386f-NEXT: sahf
+; i386f-NEXT: popl %eax
+
+; x8664-LABEL: test_feed_cmov:
+; x8664: cmpxchgl
+; x8664: seto %al
+; x8664-NEXT: lahf
+; x8664-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-NEXT: callq foo
+; x8664-NEXT: pushq %rax
+; x8664-NEXT: movq [[FLAGS]], %rax
+; x8664-NEXT: addb $127, %al
+; x8664-NEXT: sahf
+; x8664-NEXT: popq %rax
  
-; CHECK-NEXT: push[[LQ]] [[FLAGS]]
-; CHECK-NEXT: popf[[LQ]]
    %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
    %success = extractvalue { i32, i1 } %res, 1
  
-  %rhs = call i32 @bar()
+  %rhs = call i32 @foo()
  
    %ret = select i1 %success, i32 %new, i32 %rhs
    ret i32 %ret
author	JF Bastien <jfb@google.com>
	Mon, 10 Aug 2015 20:59:36 +0000 (20:59 +0000)
committer	JF Bastien <jfb@google.com>
	Mon, 10 Aug 2015 20:59:36 +0000 (20:59 +0000)
lib/CodeGen/MachineInstrBundle.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
test/CodeGen/X86/cmpxchg-clobber-flags.ll		patch \| blob \| history