From: Andrew Trick Date: Tue, 15 Oct 2013 03:39:43 +0000 (+0000) Subject: Improve on r192635, ExeDepsFix for avx, and add a test case. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=51dee24ca6ba63cf021d56ca9cbae62c739d5041;p=oota-llvm.git Improve on r192635, ExeDepsFix for avx, and add a test case. rdar:15221834 False AVX register dependencies cause 5x slowdown on flops-5/6 and significant slowdown on several others. This was blocking the switch to MI-Sched. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192669 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 0d26f9d4cba..031f19c135a 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -557,6 +557,9 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) { + // Update liveness, including the current instrucion's defs. + LiveUnits.stepBackward(*I, *TRI); + if (UndefMI == &*I) { if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); @@ -568,7 +571,6 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { UndefMI = UndefReads.back().first; OpIdx = UndefReads.back().second; } - LiveUnits.stepBackward(*I, *TRI); } } diff --git a/test/CodeGen/X86/break-avx-dep.ll b/test/CodeGen/X86/break-avx-dep.ll new file mode 100644 index 00000000000..eea7d1565d8 --- /dev/null +++ b/test/CodeGen/X86/break-avx-dep.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s +; +; rdar:15221834 False AVX register dependencies cause 5x slowdown on +; flops-6. Make sure the unused register read by vcvtsi2sdq is zeroed +; to avoid cyclic dependence on a write to the same register in a +; previous iteration. + +; CHECK-LABEL: t1: +; CHECK-LABEL: %loop +; CHECK: vxorps %[[REG:xmm.]], %{{xmm.}}, %{{xmm.}} +; CHECK: vcvtsi2sdq %{{r..}}, %[[REG]], %{{xmm.}} +define i64 @t1(i64* nocapture %x, double* nocapture %y) nounwind { +entry: + %vx = load i64* %x + br label %loop +loop: + %i = phi i64 [ 1, %entry ], [ %inc, %loop ] + %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ] + %fi = sitofp i64 %i to double + %vy = load double* %y + %fipy = fadd double %fi, %vy + %iipy = fptosi double %fipy to i64 + %s2 = add i64 %s1, %iipy + %inc = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %inc, 156250000 + br i1 %exitcond, label %ret, label %loop +ret: + ret i64 %s2 +}