From: JF Bastien Date: Wed, 5 Aug 2015 20:53:56 +0000 (+0000) Subject: Revert "Fix MO's analyzePhysReg, it was confusing sub- and super-registers. Problem... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3e8a65d34602cec80e90e5197814771eb9f7dd71;p=oota-llvm.git Revert "Fix MO's analyzePhysReg, it was confusing sub- and super-registers. Problem pointed out by Michael Hordijk." I mistakenly committed the patch for D6629, and was trying to commit another. Reverting until it gets proper signoff. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244121 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index f6e45a4b7c7..cd820ee1ac5 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -310,7 +310,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) continue; - bool IsRegOrSuperReg = MOReg == Reg || TRI->isSuperRegister(MOReg, Reg); + bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg); bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg); if (IsRegOrSuperReg && MO.readsReg()) { diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 674cb266322..810fdb77a0d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3903,56 +3903,34 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - bool FromEFLAGS = SrcReg == X86::EFLAGS; - bool ToEFLAGS = DestReg == X86::EFLAGS; - int Reg = FromEFLAGS ? DestReg : SrcReg; - bool is32 = X86::GR32RegClass.contains(Reg); - bool is64 = X86::GR64RegClass.contains(Reg); - if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) { - // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is - // inefficient. Instead: - // - Save the overflow flag OF into AL using SETO, and restore it using a - // signed 8-bit addition of AL and INT8_MAX. - // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH - // using LAHF/SAHF. - // - When RAX/EAX is live and isn't the destination register, make sure it - // isn't clobbered by PUSH/POP'ing it before and after saving/restoring - // the flags. - // This approach is ~2.25x faster than using PUSHF/POPF. - // - // This is still somewhat inefficient because we don't know which flags are - // actually live inside EFLAGS. Were we able to do a single SETcc instead of - // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster. - // - // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - clobbersTheStack. - - int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; - int Push = is64 ? X86::PUSH64r : X86::PUSH32r; - int Pop = is64 ? X86::POP64r : X86::POP32r; - int AX = is64 ? X86::RAX : X86::EAX; - - bool AXDead = (Reg == AX) || - (MachineBasicBlock::LQR_Dead == - MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI)); - - if (!AXDead) - BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); - if (FromEFLAGS) { - BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL); - BuildMI(MBB, MI, DL, get(X86::LAHF)); - BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX); + // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - clobbersTheStack. + if (SrcReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(DestReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSHF64)); + BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); + return; } - if (ToEFLAGS) { - BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL) - .addReg(X86::AL) - .addImm(INT8_MAX); - BuildMI(MBB, MI, DL, get(X86::SAHF)); + if (X86::GR32RegClass.contains(DestReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSHF32)); + BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); + return; + } + } + if (DestReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH64r)) + .addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(X86::POPF64)); + return; + } + if (X86::GR32RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH32r)) + .addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(X86::POPF32)); + return; } - if (!AXDead) - BuildMI(MBB, MI, DL, get(Pop), AX); - return; } DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll index c129128b5fa..61123930887 100644 --- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -1,58 +1,24 @@ -; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386 -; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f -; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664 -; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664 +; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -declare i32 @foo() -declare i32 @bar(i64) +declare i32 @bar() define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { -; i386-LABEL: test_intervening_call: -; i386: cmpxchg8b -; i386-NEXT: pushl %eax -; i386-NEXT: seto %al -; i386-NEXT: lahf -; i386-NEXT: movl %eax, [[FLAGS:%.*]] -; i386-NEXT: popl %eax -; i386-NEXT: movl %edx, 4(%esp) -; i386-NEXT: movl %eax, (%esp) -; i386-NEXT: calll bar -; i386-NEXT: movl [[FLAGS]], %eax -; i386-NEXT: addb $127, %al -; i386-NEXT: sahf -; i386-NEXT: jne - -; i386f-LABEL: test_intervening_call: -; i386f: cmpxchg8b -; i386f-NEXT: movl %eax, (%esp) -; i386f-NEXT: movl %edx, 4(%esp) -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, [[FLAGS:%.*]] -; i386f-NEXT: calll bar -; i386f-NEXT: movl [[FLAGS]], %eax -; i386f-NEXT: addb $127, %al -; i386f-NEXT: sahf -; i386f-NEXT: jne - -; x8664-LABEL: test_intervening_call: -; x8664: cmpxchgq -; x8664: pushq %rax -; x8664-NEXT: seto %al -; x8664-NEXT: lahf -; x8664-NEXT: movq %rax, [[FLAGS:%.*]] -; x8664-NEXT: popq %rax -; x8664-NEXT: movq %rax, %rdi -; x8664-NEXT: callq bar -; x8664-NEXT: movq [[FLAGS]], %rax -; x8664-NEXT: addb $127, %al -; x8664-NEXT: sahf -; x8664-NEXT: jne +; CHECK-LABEL: test_intervening_call: +; CHECK: cmpxchg +; CHECK: pushf[[LQ:[lq]]] +; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]] +; CHECK-NEXT: call[[LQ]] bar + +; CHECK-NEXT: push[[LQ]] [[FLAGS]] +; CHECK-NEXT: popf[[LQ]] +; CHECK-NEXT: jne %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst - %v = extractvalue { i64, i1 } %cx, 0 %p = extractvalue { i64, i1 } %cx, 1 - call i32 @bar(i64 %v) + call i32 @bar() br i1 %p, label %t, label %f t: @@ -64,18 +30,10 @@ f: ; Interesting in producing a clobber without any function calls. define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) { -; i386-LABEL: test_control_flow: -; i386: cmpxchg -; i386-NEXT: jne - -; i386f-LABEL: test_control_flow: -; i386f: cmpxchg -; i386f-NEXT: jne - -; x8664-LABEL: test_control_flow: -; x8664: cmpxchg -; x8664-NEXT: jne +; CHECK-LABEL: test_control_flow: +; CHECK: cmpxchg +; CHECK-NEXT: jne entry: %cmp = icmp sgt i32 %i, %j br i1 %cmp, label %loop_start, label %cond.end @@ -109,46 +67,20 @@ cond.end: ; This one is an interesting case because CMOV doesn't have a chain ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { -; i386-LABEL: test_feed_cmov: -; i386: cmpxchgl -; i386-NEXT: seto %al -; i386-NEXT: lahf -; i386-NEXT: movl %eax, [[FLAGS:%.*]] -; i386-NEXT: calll foo -; i386-NEXT: pushl %eax -; i386-NEXT: movl [[FLAGS]], %eax -; i386-NEXT: addb $127, %al -; i386-NEXT: sahf -; i386-NEXT: popl %eax - -; i386f-LABEL: test_feed_cmov: -; i386f: cmpxchgl -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, [[FLAGS:%.*]] -; i386f-NEXT: calll foo -; i386f-NEXT: pushl %eax -; i386f-NEXT: movl [[FLAGS]], %eax -; i386f-NEXT: addb $127, %al -; i386f-NEXT: sahf -; i386f-NEXT: popl %eax - -; x8664-LABEL: test_feed_cmov: -; x8664: cmpxchgl -; x8664: seto %al -; x8664-NEXT: lahf -; x8664-NEXT: movq %rax, [[FLAGS:%.*]] -; x8664-NEXT: callq foo -; x8664-NEXT: pushq %rax -; x8664-NEXT: movq [[FLAGS]], %rax -; x8664-NEXT: addb $127, %al -; x8664-NEXT: sahf -; x8664-NEXT: popq %rax +; CHECK-LABEL: test_feed_cmov: + +; CHECK: cmpxchg +; CHECK: pushf[[LQ:[lq]]] +; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]] + +; CHECK-NEXT: call[[LQ]] bar +; CHECK-NEXT: push[[LQ]] [[FLAGS]] +; CHECK-NEXT: popf[[LQ]] %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %res, 1 - %rhs = call i32 @foo() + %rhs = call i32 @bar() %ret = select i1 %success, i32 %new, i32 %rhs ret i32 %ret