From 01b623c8c2d1bd015a8bb20eafee3322575eff8f Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 20 Feb 2012 23:28:17 +0000 Subject: [PATCH] Fix machine-cp by having it to check sub-register indicies. e.g. ecx = mov eax al = mov ch The second copy is not a nop because the sub-indices of ecx,ch is not the same of that of eax/al. Re-enabled machine-cp. PR11940 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151002 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineCopyPropagation.cpp | 28 +++++++- lib/CodeGen/Passes.cpp | 2 +- test/CodeGen/X86/2012-02-20-MachineCPBug.ll | 77 +++++++++++++++++++++ test/CodeGen/X86/machine-cp.ll | 2 +- 4 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/X86/2012-02-20-MachineCPBug.ll diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 0e9d3a4f8b1..9ed7b7357ed 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -99,6 +99,31 @@ static bool NoInterveningSideEffect(const MachineInstr *CopyMI, return true; } +/// isNopCopy - Return true if the specified copy is really a nop. That is +/// if the source of the copy is the same of the definition of the copy that +/// supplied the source. If the source of the copy is a sub-register than it +/// must check the sub-indices match. e.g. +/// ecx = mov eax +/// al = mov cl +/// But not +/// ecx = mov eax +/// al = mov ch +static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, + const TargetRegisterInfo *TRI) { + unsigned SrcSrc = CopyMI->getOperand(1).getReg(); + if (Def == SrcSrc) + return true; + if (TRI->isSubRegister(SrcSrc, Def)) { + unsigned SrcDef = CopyMI->getOperand(0).getReg(); + unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def); + if (!SubIdx) + return false; + return SubIdx == TRI->getSubRegIndex(SrcDef, Src); + } + + return false; +} + bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector MaybeDeadCopies; // Candidates for deletion DenseMap AvailCopyMap; // Def -> available copies map @@ -122,10 +147,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DenseMap::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; - unsigned SrcSrc = CopyMI->getOperand(1).getReg(); if (!ReservedRegs.test(Def) && (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && - (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) { + isNopCopy(CopyMI, Def, Src, TRI)) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ECX = COPY %EAX diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 401ca657c40..ec1f2b4c3b2 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -69,7 +69,7 @@ static cl::opt DisableLSR("disable-lsr", cl::Hidden, static cl::opt DisableCGP("disable-cgp", cl::Hidden, cl::desc("Disable Codegen Prepare")); static cl::opt DisableCopyProp("disable-copyprop", cl::Hidden, - cl::desc("Disable Copy Propagation pass"), cl::init(true)); // PR11940 + cl::desc("Disable Copy Propagation pass")); static cl::opt PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt PrintISelInput("print-isel-input", cl::Hidden, diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll new file mode 100644 index 00000000000..58569833df0 --- /dev/null +++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -mtriple=i386-apple-macosx | FileCheck %s +; PR11940: Do not optimize away movb %al, %ch + +%struct.APInt = type { i64* } + +declare noalias i8* @calloc(i32, i32) nounwind + +define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 { +entry: +; CHECK: bug: + %call = tail call i8* @calloc(i32 1, i32 32) + %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind + %0 = bitcast i8* %call.i to i64* + %rem.i = and i32 %rotateAmt, 63 + %div.i = lshr i32 %rotateAmt, 6 + %cmp.i = icmp eq i32 %rem.i, 0 + br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i + +for.cond.preheader.i: ; preds = %entry + %sub.i = sub i32 4, %div.i + %cmp23.i = icmp eq i32 %div.i, 4 + br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i + +for.body.lr.ph.i: ; preds = %for.cond.preheader.i + %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0 + %.pre5.i = load i64** %pVal.i, align 4 + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %for.body.lr.ph.i + %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ] + %add.i = add i32 %i.04.i, %div.i + %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i + %1 = load i64* %arrayidx.i, align 4 + %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i + store i64 %1, i64* %arrayidx3.i, align 4 + %inc.i = add i32 %i.04.i, 1 + %cmp2.i = icmp ult i32 %inc.i, %sub.i + br i1 %cmp2.i, label %for.body.i, label %if.end.i + +if.end.i: ; preds = %for.body.i, %entry + %cmp81.i = icmp eq i32 %div.i, 3 + br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i + +for.body9.lr.ph.i: ; preds = %if.end.i, %for.cond.preheader.i + %sub58.i = sub i32 3, %div.i + %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0 + %sh_prom.i = zext i32 %rem.i to i64 + %sub17.i = sub i32 64, %rem.i + %sh_prom18.i = zext i32 %sub17.i to i64 + %.pre.i = load i64** %pVal11.i, align 4 + br label %for.body9.i + +for.body9.i: ; preds = %for.body9.i, %for.body9.lr.ph.i +; CHECK: %for.body9.i +; CHECK: movb %al, %ch + %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ] + %add10.i = add i32 %i6.02.i, %div.i + %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i + %2 = load i64* %arrayidx12.i, align 4 + %shr.i = lshr i64 %2, %sh_prom.i + %add14.i = add i32 %add10.i, 1 + %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i + %3 = load i64* %arrayidx16.i, align 4 + %shl.i = shl i64 %3, %sh_prom18.i + %or.i = or i64 %shl.i, %shr.i + %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i + store i64 %or.i, i64* %arrayidx19.i, align 4 + %inc21.i = add i32 %i6.02.i, 1 + %cmp8.i = icmp ult i32 %inc21.i, %sub58.i + br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit + +_ZNK5APInt4lshrEj.exit: ; preds = %for.body9.i, %if.end.i + %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind + %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0 + store i64* %0, i64** %4, align 4 + ret void +} diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll index 772d2d080b6..54fa01c38fd 100644 --- a/test/CodeGen/X86/machine-cp.ll +++ b/test/CodeGen/X86/machine-cp.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s -disable-copyprop=false | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s ; After tail duplication, two copies in an early exit BB can be cancelled out. ; rdar://10640363 -- 2.34.1