Without this, MachineCSE is powerless to handle redundant operations with truncated source operands.
This required fixing the 2-addr pass to handle tied subregisters. It isn't clear what combinations of subregisters can legally be tied, but the simple case of truncated source operands is now safely handled:
%vreg11<def> = COPY %vreg1:sub_32bit; GR32:%vreg11 GR64:%vreg1
%vreg12<def> = COPY %vreg2:sub_32bit; GR32:%vreg12 GR64:%vreg2
%vreg13<def,tied1> = ADD32rr %vreg11<tied0>, %vreg12<kill>, %EFLAGS<imp-def>
Test case: cse-add-with-overflow.ll.
This exposed an existing bug in
PPCInstrInfo::commuteInstruction. Thanks to Rafael for the test case:
PowerPC/crash.ll.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197465
91177308-0d34-0410-b5e6-
96231b3b80d8
unsigned SrcReg = DefMI->getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
continue;
- if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+ if (DefMI->getOperand(0).getSubReg())
continue;
- if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+ unsigned SrcSubReg = DefMI->getOperand(1).getSubReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SrcSubReg)
+ RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
+ SrcSubReg);
+ if (!MRI->constrainRegClass(SrcReg, RC))
continue;
DEBUG(dbgs() << "Coalescing: " << *DefMI);
DEBUG(dbgs() << "*** to: " << *MI);
- MO.setReg(SrcReg);
+ MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
MRI->clearKillFlags(SrcReg);
DefMI->eraseFromParent();
++NumCoalesces;
assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
// Deal with <undef> uses immediately - simply rewrite the src operand.
- if (SrcMO.isUndef()) {
+ if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
if (TargetRegisterInfo::isVirtualRegister(DstReg))
if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
TRI, *MF))
MRI->constrainRegClass(DstReg, RC);
SrcMO.setReg(DstReg);
+ SrcMO.setSubReg(0);
DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
continue;
}
unsigned LastCopiedReg = 0;
SlotIndex LastCopyIdx;
unsigned RegB = 0;
+ unsigned SubRegB = 0;
for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
unsigned SrcIdx = TiedPairs[tpi].first;
unsigned DstIdx = TiedPairs[tpi].second;
// Grab RegB from the instruction because it may have changed if the
// instruction was commuted.
RegB = MI->getOperand(SrcIdx).getReg();
+ SubRegB = MI->getOperand(SrcIdx).getSubReg();
if (RegA == RegB) {
// The register is tied to multiple destinations (or else we would
#endif
// Emit a copy.
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+ MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA);
+ // If this operand is folding a truncation, the truncation now moves to the
+ // copy so that the register classes remain valid for the operands.
+ MIB.addReg(RegB, 0, SubRegB);
+ const TargetRegisterClass *RC = MRI->getRegClass(RegB);
+ if (SubRegB) {
+ if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
+ SubRegB) &&
+ "tied subregister must be a truncation");
+ // The superreg class will not be used to constrain the subreg class.
+ RC = 0;
+ }
+ else {
+ assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
+ && "tied subregister must be a truncation");
+ }
+ }
// Update DistanceMap.
MachineBasicBlock::iterator PrevMI = MI;
}
}
- DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+ DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
MachineOperand &MO = MI->getOperand(SrcIdx);
assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
// Make sure regA is a legal regclass for the SrcIdx operand.
if (TargetRegisterInfo::isVirtualRegister(RegA) &&
TargetRegisterInfo::isVirtualRegister(RegB))
- MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
-
+ MRI->constrainRegClass(RegA, RC);
MO.setReg(RegA);
+ // The getMatchingSuper asserts guarantee that the register class projected
+ // by SubRegB is compatible with RegA with no subregister. So regardless of
+ // whether the dest oper writes a subreg, the source oper should not.
+ MO.setSubReg(0);
// Propagate SrcRegMap.
SrcRegMap[RegA] = RegB;
// Replace other (un-tied) uses of regB with LastCopiedReg.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+ MO.isUse()) {
if (MO.isKill()) {
MO.setIsKill(false);
RemovedKillFlag = true;
}
MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
}
}
}
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
unsigned Reg2 = MI->getOperand(2).getReg();
+ unsigned SubReg1 = MI->getOperand(1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(2).getSubReg();
bool Reg1IsKill = MI->getOperand(1).isKill();
bool Reg2IsKill = MI->getOperand(2).isKill();
bool ChangeReg0 = false;
// Must be two address instruction!
assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
"Expecting a two-address instruction!");
+ assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
Reg2IsKill = false;
ChangeReg0 = true;
}
.addImm((MB-1) & 31);
}
- if (ChangeReg0)
+ if (ChangeReg0) {
MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(0).setSubReg(SubReg2);
+ }
MI->getOperand(2).setReg(Reg1);
MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setSubReg(SubReg1);
+ MI->getOperand(1).setSubReg(SubReg2);
MI->getOperand(2).setIsKill(Reg1IsKill);
MI->getOperand(1).setIsKill(Reg2IsKill);
char PPCEarlyReturn::ID = 0;
FunctionPass*
llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
-
return 0;
unsigned Reg = MI->getOperand(1).getReg();
+ unsigned SubReg = MI->getOperand(1).getSubReg();
MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
MI->getOperand(2).ChangeToRegister(Reg, false);
+ MI->getOperand(2).setSubReg(SubReg);
} else {
MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
}
define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
; CHECK-LABEL: test3:
-; CHECK: cmovnel %edi, %esi
-; CHECK-NEXT: movl %esi, %edi
+; CHECK: cmov{{n?}}el %[[R1:e..]], %[[R2:e..]]
+; CHECK-NEXT: movl %[[R2]], %[[R2]]
%c = trunc i64 %a to i32
%d = trunc i64 %b to i32
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s
+; rdar:15661073 simple example of redundant adds
+;
+; MachineCSE should coalesce trivial subregister copies.
+;
+; The extra movl+addl should be removed during MachineCSE.
+; CHECK-LABEL: redundantadd
+; CHECK: cmpq
+; CHECK: movq
+; CHECK-NOT: movl
+; CHECK: addl
+; CHECK-NOT: addl
+; CHECK: ret
+
+define i64 @redundantadd(i64* %a0, i64* %a1) {
+entry:
+ %tmp8 = load i64* %a0, align 8
+ %tmp12 = load i64* %a1, align 8
+ %tmp13 = icmp ult i64 %tmp12, -281474976710656
+ br i1 %tmp13, label %exit1, label %body
+
+exit1:
+ unreachable
+
+body:
+ %tmp14 = trunc i64 %tmp8 to i32
+ %tmp15 = trunc i64 %tmp12 to i32
+ %tmp16 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp14, i32 %tmp15)
+ %tmp17 = extractvalue { i32, i1 } %tmp16, 1
+ br i1 %tmp17, label %exit2, label %return
+
+exit2:
+ unreachable
+
+return:
+ %tmp18 = add i64 %tmp12, %tmp8
+ %tmp19 = and i64 %tmp18, 4294967295
+ %tmp20 = or i64 %tmp19, -281474976710656
+ ret i64 %tmp20
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)