From 11d98997590a1d636b04c4f0756eded6b2d037f3 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Tue, 23 Mar 2010 06:20:33 +0000 Subject: [PATCH] Change VST1 instructions for loading Q register values to operate on pairs of D registers. Add a separate VST1q instruction with a Q register source operand for use by storeRegToStackSlot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99265 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 30 ++++++++++---- lib/Target/ARM/ARMInstrNEON.td | 64 +++++++++++++++-------------- lib/Target/ARM/NEONPreAllocPass.cpp | 9 ++-- 4 files changed, 59 insertions(+), 46 deletions(-) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 943952fcf6b..cea49396022 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -738,7 +738,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) .addFrameIndex(FI).addImm(128) .addMemOperand(MMO) .addReg(SrcReg, getKillRegState(isKill))); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 332ca3cb2a7..d0d940addcb 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -132,9 +132,9 @@ private: unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVST - Select NEON store intrinsics. NumVecs should - /// be 2, 3 or 4. The opcode arrays specify the instructions used for + /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// stores of D registers and even subregs and odd subregs of Q registers. - /// For NumVecs == 2, QOpcodes1 is not used. + /// For NumVecs <= 2, QOpcodes1 is not used. SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); @@ -1048,7 +1048,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; case MVT::v2i64: OpcodeIndex = 3; - assert(NumVecs == 1 && "v2i64 type only supported for VLD1/VST1"); + assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); break; } @@ -1112,7 +1112,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { - assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); + assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; @@ -1137,6 +1137,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v2i64: OpcodeIndex = 3; + assert(NumVecs == 1 && "v2i64 type only supported for VST1"); + break; } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); @@ -1157,9 +1160,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, } EVT RegVT = GetNEONSubregVT(VT); - if (NumVecs == 2) { - // Quad registers are directly supported for VST2, - // storing 2 pairs of D regs. + if (NumVecs <= 2) { + // Quad registers are directly supported for VST1 and VST2, + // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, @@ -1170,7 +1173,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), + 5 + 2 * NumVecs); } // Otherwise, quad registers are stored with two separate instructions, @@ -1894,9 +1898,17 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } + case Intrinsic::arm_neon_vst1: { + unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, + ARM::VST1d32, ARM::VST1d64 }; + unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; + return SelectVST(N, 1, DOpcodes, QOpcodes, 0); + } + case Intrinsic::arm_neon_vst2: { unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, - ARM::VST2d32, ARM::VST2d64 }; + ARM::VST2d32, ARM::VST1q64 }; unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9156ff9e068..6d20e29dbb1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -141,6 +141,7 @@ def VLD1q_UPD } // mayLoad = 1 // Use vstmia to store a Q register as a D register pair. +// This is equivalent to VSTMD except that it has a Q register operand. def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, "vstmia", "$addr, ${src:dregpair}", [(store (v2f64 QPR:$src), addrmode4:$addr)]> { @@ -151,6 +152,20 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, let Inst{11-8} = 0b1011; } +let mayStore = 1 in { +// Use vst1 to store a Q register as a D register pair. +// This alternative to VSTRQ allows an alignment to be specified. +// This is equivalent to VST1q64 except that it has a Q register operand. +def VST1q + : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), + IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; +def VST1q_UPD + : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), + IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset", + "$addr.addr = $wb", []>; +} // mayStore = 1 + let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) @@ -477,31 +492,26 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; // FIXME: Not yet implemented. } // mayLoad = 1, hasExtraDefRegAllocReq = 1 +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + // VST1 : Vector Store (multiple single elements) -class VST1D op7_4, string Dt, ValueType Ty> +class VST1D op7_4, string Dt> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - "vst1", Dt, "\\{$src\\}, $addr", "", - [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q op7_4, string Dt, ValueType Ty> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - "vst1", Dt, "${src:dregpair}, $addr", "", - [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>; - -let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "8", v8i8>; -def VST1d16 : VST1D<0b0100, "16", v4i16>; -def VST1d32 : VST1D<0b1000, "32", v2i32>; -def VST1df : VST1D<0b1000, "32", v2f32>; -def VST1d64 : VST1D<0b1100, "64", v1i64>; - -def VST1q8 : VST1Q<0b0000, "8", v16i8>; -def VST1q16 : VST1Q<0b0100, "16", v8i16>; -def VST1q32 : VST1Q<0b1000, "32", v4i32>; -def VST1qf : VST1Q<0b1000, "32", v4f32>; -def VST1q64 : VST1Q<0b1100, "64", v2i64>; -} // hasExtraSrcRegAllocReq - -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + "vst1", Dt, "\\{$src\\}, $addr", "", []>; +class VST1Q op7_4, string Dt> + : NLdSt<0,0b00,0b1010,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; + +def VST1d8 : VST1D<0b0000, "8">; +def VST1d16 : VST1D<0b0100, "16">; +def VST1d32 : VST1D<0b1000, "32">; +def VST1d64 : VST1D<0b1100, "64">; + +def VST1q8 : VST1Q<0b0000, "8">; +def VST1q16 : VST1Q<0b0100, "16">; +def VST1q32 : VST1Q<0b1000, "32">; +def VST1q64 : VST1Q<0b1100, "64">; // ...with address register writeback: class VST1DWB op7_4, string Dt> @@ -582,9 +592,6 @@ class VST2Q op7_4, string Dt> def VST2d8 : VST2D<0b1000, 0b0000, "8">; def VST2d16 : VST2D<0b1000, 0b0100, "16">; def VST2d32 : VST2D<0b1000, 0b1000, "32">; -def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>; def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; @@ -606,11 +613,6 @@ class VST2QWB op7_4, string Dt> def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; -def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2), IIC_VST, - "vst1", "64", "\\{$src1, $src2\\}, $addr$offset", - "$addr.addr = $wb", []>; def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index ce620fa9dce..7334259bf57 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -50,10 +50,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: - FirstOpnd = 0; - NumRegs = 2; - return true; - case ARM::VLD2d8: case ARM::VLD2d16: case ARM::VLD2d32: @@ -177,10 +173,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; + case ARM::VST1q8: + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: case ARM::VST2d8: case ARM::VST2d16: case ARM::VST2d32: - case ARM::VST2d64: case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: -- 2.34.1