From: Bob Wilson Date: Wed, 19 Aug 2009 17:03:43 +0000 (+0000) Subject: Add support for Neon VEXT (vector extract) shuffles. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=de95c1b88be44d4af916af8fba9d7940b7e98e32;p=oota-llvm.git Add support for Neon VEXT (vector extract) shuffles. This is derived from a patch by Anton Korzh. I modified it to recognize the VEXT shuffles during legalization and lower them to a target-specific DAG node. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79428 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 99135b23115..859511b3c33 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -487,6 +487,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST2D: return "ARMISD::VST2D"; case ARMISD::VST3D: return "ARMISD::VST3D"; case ARMISD::VST4D: return "ARMISD::VST4D"; + case ARMISD::VEXT: return "ARMISD::VEXT"; case ARMISD::VREV64: return "ARMISD::VREV64"; case ARMISD::VREV32: return "ARMISD::VREV32"; case ARMISD::VREV16: return "ARMISD::VREV16"; @@ -2343,6 +2344,41 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SplatBitSize, DAG); } +static bool isVEXTMask(ShuffleVectorSDNode *N, bool &ReverseVEXT, + unsigned &Imm) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + ReverseVEXT = false; + Imm = N->getMaskElt(0); + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned i = 1; i < NumElts; ++i) { + + // Increment the expected index. If it wraps around, it may still be + // a VEXT but the source vectors must be swapped. + ExpectedElt += 1; + if (ExpectedElt == NumElts * 2) { + ExpectedElt = 0; + ReverseVEXT = true; + } + + if (ExpectedElt != static_cast(N->getMaskElt(i))) + return false; + } + + // Adjust the index value if the source operands will be swapped. + if (ReverseVEXT) + Imm -= NumElts; + + // VEXT only handles 8-bit elements so scale the index for larger elements. + Imm *= VT.getVectorElementType().getSizeInBits() / 8; + + return true; +} + /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) @@ -2458,8 +2494,20 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::VDUP, dl, VT, Op0.getOperand(0)); } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0), - DAG.getConstant(Lane, MVT::i32)); + DAG.getConstant(Lane, MVT::i32)); } + + bool ReverseVEXT; + unsigned Imm; + if (isVEXTMask(SVN, ReverseVEXT, Imm)) { + SDValue Op0 = SVN->getOperand(0); + SDValue Op1 = SVN->getOperand(1); + if (ReverseVEXT) + std::swap(Op0, Op1); + return DAG.getNode(ARMISD::VEXT, dl, VT, Op0, Op1, + DAG.getConstant(Imm, MVT::i32)); + } + if (isVREVMask(SVN, 64)) return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0)); if (isVREVMask(SVN, 32)) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index db6d8baaad1..7dfca2be2c4 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -128,6 +128,7 @@ namespace llvm { VST4D, // Vector shuffles: + VEXT, // extract VREV64, // reverse elements within 64-bit doublewords VREV32, // reverse elements within 32-bit words VREV16 // reverse elements within 16-bit halfwords diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index fad3308c0d9..9cd9657ef4d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -100,6 +100,10 @@ def NEONvst3d : SDNode<"ARMISD::VST3D", SDTARMVST3, def NEONvst4d : SDNode<"ARMISD::VST4D", SDTARMVST4, [SDNPHasChain, SDNPMayStore]>; +def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; +def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; + def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; @@ -1941,6 +1945,21 @@ class VREV16Q op19_18, string OpcodeStr, ValueType Ty> def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; +// Other Vector Shuffles. + +// VEXT : Vector Extract + +def VEXTd : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NoItinerary, + "vext.8\t$dst, $lhs, $rhs, $index", "", + [(set DPR:$dst, (v8i8 (NEONvext (v8i8 DPR:$lhs), + (v8i8 DPR:$rhs), imm:$index)))]>; +def VEXTq : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NoItinerary, + "vext.8\t$dst, $lhs, $rhs, $index", "", + [(set QPR:$dst, (v16i8 (NEONvext (v16i8 QPR:$lhs), + (v16i8 QPR:$rhs), imm:$index)))]>; + // VTRN : Vector Transpose def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll new file mode 100644 index 00000000000..a9d66467d88 --- /dev/null +++ b/test/CodeGen/ARM/vext.ll @@ -0,0 +1,37 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s + +define arm_apcscc <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextd: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define arm_apcscc <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextRd: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define arm_apcscc <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextq: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define arm_apcscc <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextRq: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +}