From 98ec91ea80e042907aac8d3cbd9614d29f6cba45 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 2 Jul 2010 20:36:18 +0000 Subject: [PATCH] - Two-address pass should not assume unfolding is always successful. - X86 unfolding should check if the instructions being unfolded has memoperands. If there is no memoperands, then it must assume conservative alignment. If this would introduce an expensive sse unaligned load / store, then unfoldMemoryOperand etc. should not unfold the instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107509 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 14 ++-- lib/Target/X86/X86InstrInfo.cpp | 25 +++++- test/CodeGen/X86/2010-07-02-UnfoldBug.ll | 99 +++++++++++++++++++++++ 3 files changed, 126 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/X86/2010-07-02-UnfoldBug.ll diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 0c97dad48b8..62fa0fdb771 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -926,14 +926,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; - bool Success = - TII->unfoldMemoryOperand(MF, mi, Reg, - /*UnfoldLoad=*/true, /*UnfoldStore=*/false, - NewMIs); - (void)Success; - assert(Success && - "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " - "succeeded!"); + if (!TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } assert(NewMIs.size() == 2 && "Unfolded a load into multiple instructions!"); // The load was previously folded, so this is the only use. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index caa623399de..c1d66cb5702 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2159,7 +2159,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2189,7 +2189,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2693,6 +2693,13 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; const TargetRegisterClass *RC = TOI.getRegClass(&RI); + if (!MI->hasOneMemOperand() && + RC == &X86::VR128RegClass && + !TM.getSubtarget().isUnalignedMemAccessFast()) + // Without memoperands, loadRegFromAddr and storeRegToStackSlot will + // conservatively assume the address is unaligned. That's bad for + // performance. + return false; SmallVector AddrOps; SmallVector BeforeOps; SmallVector AfterOps; @@ -2834,7 +2841,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractLoadMemRefs(cast(N)->memoperands_begin(), cast(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned load. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2871,7 +2883,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(cast(N)->memoperands_begin(), cast(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned store. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, diff --git a/test/CodeGen/X86/2010-07-02-UnfoldBug.ll b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll new file mode 100644 index 00000000000..79219dcfe60 --- /dev/null +++ b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin +; rdar://8154265 + +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + +define void @_ZN2CA3OGL20fill_surface_mesh_3dERNS0_7ContextEPKNS_6Render13MeshTransformEPKNS0_5LayerEPNS0_7SurfaceEfNS0_13TextureFilterESC_f() nounwind optsize ssp { +entry: + br i1 undef, label %bb2.thread, label %bb2 + +bb2.thread: ; preds = %entry + br i1 undef, label %bb41, label %bb10.preheader + +bb2: ; preds = %entry + unreachable + +bb10.preheader: ; preds = %bb2.thread + br i1 undef, label %bb9, label %bb12 + +bb9: ; preds = %bb9, %bb10.preheader + br i1 undef, label %bb9, label %bb12 + +bb12: ; preds = %bb9, %bb10.preheader + br i1 undef, label %bb4.i.i, label %bb3.i.i + +bb3.i.i: ; preds = %bb12 + unreachable + +bb4.i.i: ; preds = %bb12 + br i1 undef, label %bb8.i.i, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + +bb8.i.i: ; preds = %bb4.i.i + br i1 undef, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit, label %bb9.i.i + +bb9.i.i: ; preds = %bb8.i.i + br i1 undef, label %bb11.i.i, label %bb10.i.i + +bb10.i.i: ; preds = %bb9.i.i + unreachable + +bb11.i.i: ; preds = %bb9.i.i + unreachable + +_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit: ; preds = %bb8.i.i, %bb4.i.i + br i1 undef, label %bb19, label %bb14 + +bb14: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + unreachable + +bb19: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + br i1 undef, label %bb.i50, label %bb6.i + +bb.i50: ; preds = %bb19 + unreachable + +bb6.i: ; preds = %bb19 + br i1 undef, label %bb28, label %bb.nph106 + +bb22: ; preds = %bb24.preheader + br i1 undef, label %bb2.i.i, label %bb.i.i49 + +bb.i.i49: ; preds = %bb22 + %0 = load float* undef, align 4 ; [#uses=1] + %1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> , <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1] + %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> ) nounwind readnone ; <<4 x float>> [#uses=1] + %4 = extractelement <4 x float> %3, i32 0 ; [#uses=1] + store float %4, float* undef, align 4 + %5 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> , <4 x float> undef) nounwind readnone ; <<4 x float>> [#uses=1] + %6 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %5, <4 x float> ) nounwind readnone ; <<4 x float>> [#uses=1] + %7 = extractelement <4 x float> %6, i32 0 ; [#uses=1] + store float %7, float* undef, align 4 + unreachable + +bb2.i.i: ; preds = %bb22 + unreachable + +bb26.loopexit: ; preds = %bb24.preheader + br i1 undef, label %bb28, label %bb24.preheader + +bb.nph106: ; preds = %bb6.i + br label %bb24.preheader + +bb24.preheader: ; preds = %bb.nph106, %bb26.loopexit + br i1 undef, label %bb22, label %bb26.loopexit + +bb28: ; preds = %bb26.loopexit, %bb6.i + unreachable + +bb41: ; preds = %bb2.thread + br i1 undef, label %return, label %bb46 + +bb46: ; preds = %bb41 + ret void + +return: ; preds = %bb41 + ret void +} -- 2.34.1