From: Jakob Stoklund Olesen Date: Wed, 12 Oct 2011 00:06:23 +0000 (+0000) Subject: Fix -widen-vmovs liveness issues. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=1c062c24aba08962b4687f56b274f182e5b7a8e5;p=oota-llvm.git Fix -widen-vmovs liveness issues. When widening a copy, we are reading a larger register that may not be live. Use an flag to tell the register scavenger and machine code verifier that we know the value isn't defined. We now widen: %S6 = COPY %S4, %D3 into: %D3 = VMOVD %D2, pred:14, pred:%noreg, %S4 This also keeps the flag on %S4 so we don't inadvertently kill a live value in %S5. Finally, ensure that ARMBaseInstrInfo::setExecutionDomain() preserves the flag when converting VMOVD to VORR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141746 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index a696e1641f2..408edfc20d4 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1025,13 +1025,39 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) return false; - // All clear, widen the COPY. Preserve the implicit operands, even if they - // may be superfluous now. + // A dead copy shouldn't show up here, but reject it just in case. + if (MI->getOperand(0).isDead()) + return false; + + // All clear, widen the COPY. DEBUG(dbgs() << "widening: " << *MI); + + // Get rid of the old of DstRegD. Leave it if it defines a Q-reg + // or some other super-register. + int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); + if (ImpDefIdx != -1) + MI->RemoveOperand(ImpDefIdx); + + // Change the opcode and operands. MI->setDesc(get(ARM::VMOVD)); MI->getOperand(0).setReg(DstRegD); MI->getOperand(1).setReg(SrcRegD); AddDefaultPred(MachineInstrBuilder(MI)); + + // We are now reading SrcRegD instead of SrcRegS. This may upset the + // register scavenger and machine verifier, so we need to indicate that we + // are reading an undefined value from SrcRegD, but a proper value from + // SrcRegS. + MI->getOperand(1).setIsUndef(); + MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); + + // SrcRegD may actually contain an unrelated value in the ssub_1 + // sub-register. Don't kill it. Only kill the ssub_0 sub-register. + if (MI->getOperand(1).isKill()) { + MI->getOperand(1).setIsKill(false); + MI->addRegisterKilled(SrcRegS, TRI, true); + } + DEBUG(dbgs() << "replaced by: " << *MI); return true; } @@ -2800,5 +2826,5 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Add the extra source operand and new predicates. // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addReg(MI->getOperand(1).getReg())); + AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll new file mode 100644 index 00000000000..8fd99ba7af4 --- /dev/null +++ b/test/CodeGen/ARM/widen-vmovs.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s +target triple = "thumbv7-apple-ios" + +; The 0.0 constant is loaded from the constant pool and kept in a register. +; CHECK: %entry +; CHECK: vldr.32 s +; The float loop variable is initialized with a vmovs from the constant register. +; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32. +; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]] +; CHECK: , [[DN]] +; CHECK: %for.body.i +; CHECK: vadd.f32 [[DL]], [[DL]], [[DN]] +; +; This test is verifying: +; - The VMOVS widening is happening. +; - Register liveness is verified. +; - The execution domain switch to vorr works across basic blocks. + +define void @Mm() nounwind { +entry: + br label %for.body4 + +for.body4: + br label %for.body.i + +for.body.i: + %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ] + %add.i = fadd float %tmp3.i, 0.000000e+00 + %exitcond.i = icmp eq i32 undef, 41 + br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i + +rInnerproduct.exit: + store float %add.i, float* undef, align 4 + br label %for.body4 +}