From da52506792f1791682eda34d6319f5967116eb65 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 25 Feb 2011 06:42:42 +0000 Subject: [PATCH] Add patterns to use post-increment addressing for Neon VST1-lane instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126477 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 +++++++++++++++ lib/Target/ARM/ARMInstrInfo.td | 4 +++- lib/Target/ARM/ARMInstrNEON.td | 31 ++++++++++++++++++++---------- test/CodeGen/ARM/vstlane.ll | 26 +++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a506cffdba3..f0d5a7d7c2e 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -126,6 +126,7 @@ public: bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); + bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); @@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, return true; } +bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, + SDValue &Offset) { + LSBaseSDNode *LdSt = cast(Op); + ISD::MemIndexedMode AM = LdSt->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + Offset = N; + if (ConstantSDNode *NC = dyn_cast(N)) { + if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) + Offset = CurDAG->getRegister(0, MVT::i32); + } + return true; +} + bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c827ce3da97..6e3fe2e039f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -561,7 +561,9 @@ def addrmode6 : Operand, let EncoderMethod = "getAddrMode6AddressOpValue"; } -def am6offset : Operand { +def am6offset : Operand, + ComplexPattern { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1e2e5504e66..dc3d63e26ef 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { - // ...with address register writeback: -class VST1LNWB op11_8, bits<4> op7_4, string Dt> +class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", + [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), + addrmode6:$Rn, am6offset:$Rm))]>; +class VST1QLNWBPseudo + : VSTQLNWBPseudo { + let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr, am6offset:$offset))]; +} -def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, + NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } -def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, + NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, + extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } -def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo; -def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo; -def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo; +def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN op11_8, bits<4> op7_4, string Dt> diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index 6cc052bbeb1..d1bc15ad576 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -10,6 +10,19 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { ret void } +;Check for a post-increment updating store. +define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind { +;CHECK: vst1lanei8_update: +;CHECK: vst1.8 {d16[3]}, [r2]! + %A = load i8** %ptr + %tmp1 = load <8 x i8>* %B + %tmp2 = extractelement <8 x i8> %tmp1, i32 3 + store i8 %tmp2, i8* %A, align 8 + %tmp3 = getelementptr i8* %A, i32 1 + store i8* %tmp3, i8** %ptr + ret void +} + define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst1lanei16: ;Check the alignment value. Max for this instruction is 16 bits: @@ -66,6 +79,19 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind { ret void } +;Check for a post-increment updating store. +define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind { +;CHECK: vst1laneQi32_update: +;CHECK: vst1.32 {d17[1]}, [r1, :32]! + %A = load i32** %ptr + %tmp1 = load <4 x i32>* %B + %tmp2 = extractelement <4 x i32> %tmp1, i32 3 + store i32 %tmp2, i32* %A, align 8 + %tmp3 = getelementptr i32* %A, i32 1 + store i32* %tmp3, i32** %ptr + ret void +} + define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst1laneQf: ;CHECK: vst1.32 {d17[1]}, [r0] -- 2.34.1