bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
MachineBasicBlock::iterator OldPos);
+ bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+
bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
return true;
}
+/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
+/// in current BB.
+static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ MachineInstr *Ret = nullptr;
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getParent() != BB || DefMI.isDebugValue())
+ continue;
+ if (!Ret)
+ Ret = &DefMI;
+ else if (Ret != &DefMI)
+ return nullptr;
+ }
+ return Ret;
+}
+
+/// Check if there is a reversed copy chain from FromReg to ToReg:
+/// %Tmp1 = copy %Tmp2;
+/// %FromReg = copy %Tmp1;
+/// %ToReg = add %FromReg ...
+/// %Tmp2 = copy %ToReg;
+/// MaxLen specifies the maximum length of the copy chain the func
+/// can walk through.
+bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+ int Maxlen) {
+ unsigned TmpReg = FromReg;
+ for (int i = 0; i < Maxlen; i++) {
+ MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+ if (!Def || !Def->isCopy())
+ return false;
+
+ TmpReg = Def->getOperand(1).getReg();
+
+ if (TmpReg == ToReg)
+ return true;
+ }
+ return false;
+}
+
/// noUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
if (!noUseAfterLastDef(regB, Dist, LastDefB))
return true;
+ // Look for situation like this:
+ // %reg101 = MOV %reg100
+ // %reg102 = ...
+ // %reg103 = ADD %reg102, %reg101
+ // ... = %reg103 ...
+ // %reg100 = MOV %reg103
+ // If there is a reversed copy chain from reg101 to reg103, commute the ADD
+ // to eliminate an otherwise unavoidable copy.
+ // FIXME:
+ // We can extend the logic further: If an pair of operands in an insn has
+ // been merged, the insn could be regarded as a virtual copy, and the virtual
+ // copy could also be used to construct a copy chain.
+ // To more generally minimize register copies, ideally the logic of two addr
+ // instruction pass should be integrated with register allocation pass where
+ // interference graph is available.
+ if (isRevCopyChain(regC, regA, 3))
+ return true;
+
+ if (isRevCopyChain(regB, regA, 3))
+ return false;
+
// Since there are no intervening uses for both registers, then commute
// if the def of regC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
--- /dev/null
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; This test is to ensure the TwoAddrInstruction pass chooses the proper operands to
+; merge and generates fewer mov insns.
+
+@M = common global i32 0, align 4
+@total = common global i32 0, align 4
+@g = common global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define void @foo() {
+entry:
+ %0 = load i32, i32* @M, align 4
+ %cmp3 = icmp sgt i32 %0, 0
+ br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %total.promoted = load i32, i32* @total, align 4
+ br label %for.body
+
+; Check that only one mov will be generated in the kernel loop.
+; CHECK-LABEL: foo:
+; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: # %for.body
+; CHECK-NOT: mov
+; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]]
+; CHECK-NOT: mov
+; CHECK: shrl $31, [[REG1]]
+; CHECK-NOT: mov
+; CHECK: jl [[LOOP1]]
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]
+ %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %div = sdiv i32 %i.04, 2
+ %add = add nsw i32 %div, %add5
+ %inc = add nuw nsw i32 %i.04, 1
+ %cmp = icmp slt i32 %inc, %0
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ store i32 %add, i32* @total, align 4
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry
+ ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @goo() {
+entry:
+ %0 = load i32, i32* @M, align 4
+ %cmp3 = icmp sgt i32 %0, 0
+ br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %total.promoted = load i32, i32* @total, align 4
+ br label %for.body
+
+; Check that only two mov will be generated in the kernel loop.
+; CHECK-LABEL: goo:
+; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: # %for.body
+; CHECK-NOT: mov
+; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]]
+; CHECK-NOT: mov
+; CHECK: shrl $31, [[REG2]]
+; CHECK-NOT: mov
+; CHECK: movl {{.*}}, g(%rip)
+; CHECK: jl [[LOOP2]]
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]
+ %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %div = sdiv i32 %i.04, 2
+ %add = add nsw i32 %div, %add5
+ store volatile i32 %add, i32* @g, align 4
+ %inc = add nuw nsw i32 %i.04, 1
+ %cmp = icmp slt i32 %inc, %0
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ store i32 %add, i32* @total, align 4
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry
+ ret void
+}
+