From: Jiangning Liu Date: Wed, 6 Nov 2013 02:25:49 +0000 (+0000) Subject: Implement AArch64 Neon instruction set Bitwise Extract. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=258115258f8fe15e9d74b5fb524f90b75bb917d1;p=oota-llvm.git Implement AArch64 Neon instruction set Bitwise Extract. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194118 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 6765c3339d9..ab46d7f7b34 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -907,6 +907,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { return "AArch64ISD::NEON_ST3_UPD"; case AArch64ISD::NEON_ST4_UPD: return "AArch64ISD::NEON_ST4_UPD"; + case AArch64ISD::NEON_VEXTRACT: + return "AArch64ISD::NEON_VEXTRACT"; default: return NULL; } @@ -3797,7 +3799,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc dl(Op); @@ -3811,101 +3813,126 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, ArrayRef ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EltSize <= 64) { - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; - - // Test if V1 is a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); - } - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::BUILD_VECTOR) { - bool IsScalarToVector = true; - for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF && - i != (unsigned)Lane) { - IsScalarToVector = false; - break; - } - if (IsScalarToVector) - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, - V1.getOperand(Lane)); - } - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i64)); - } - // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert - // by element from V2 to V1 . - // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a - // better choice to be inserted than V1 as less insert needed, so we count - // element to be inserted for both V1 and V2, and select less one as insert - // target. - - // Collect elements need to be inserted and their index. - SmallVector NV1Elt; - SmallVector N1Index; - SmallVector NV2Elt; - SmallVector N2Index; - int Length = ShuffleMask.size(); - int V1EltNum = V1.getValueType().getVectorNumElements(); - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != I) { - NV1Elt.push_back(ShuffleMask[I]); - N1Index.push_back(I); - } + if (EltSize > 64) + return SDValue(); + + // If the element of shuffle mask are all the same constant, we can + // transform it into either NEON_VDUP or NEON_VDUPLANE + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + + // Test if V1 is a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); } - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != (I + V1EltNum)) { - NV2Elt.push_back(ShuffleMask[I]); - N2Index.push_back(I); - } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::BUILD_VECTOR) { + bool IsScalarToVector = true; + for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF && + i != (unsigned)Lane) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, + V1.getOperand(Lane)); } + return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i64)); + } - // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 - // will be inserted. - SDValue InsV = V1; - SmallVector InsMasks = NV1Elt; - SmallVector InsIndex = N1Index; - if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { - if (NV1Elt.size() > NV2Elt.size()) { - InsV = V2; - InsMasks = NV2Elt; - InsIndex = N2Index; + int Length = ShuffleMask.size(); + int V1EltNum = V1.getValueType().getVectorNumElements(); + + // If the number of v1 elements is the same as the number of shuffle mask + // element and the shuffle masks are sequential values, we can transform + // it into NEON_VEXTRACT. + if (V1EltNum == Length) { + // Check if the shuffle mask is sequential. + bool IsSequential = true; + int CurMask = ShuffleMask[0]; + for (int I = 0; I < Length; ++I) { + if (ShuffleMask[I] != CurMask) { + IsSequential = false; + break; } - } else { - InsV = DAG.getNode(ISD::UNDEF, dl, VT); + CurMask++; } + if (IsSequential) { + assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); + unsigned VecSize = EltSize * V1EltNum; + unsigned Index = (EltSize/8) * ShuffleMask[0]; + if (VecSize == 64 || VecSize == 128) + return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, + DAG.getConstant(Index, MVT::i64)); + } + } - SDValue PassN; + // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert + // by element from V2 to V1 . + // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a + // better choice to be inserted than V1 as less insert needed, so we count + // element to be inserted for both V1 and V2, and select less one as insert + // target. + + // Collect elements need to be inserted and their index. + SmallVector NV1Elt; + SmallVector N1Index; + SmallVector NV2Elt; + SmallVector N2Index; + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != I) { + NV1Elt.push_back(ShuffleMask[I]); + N1Index.push_back(I); + } + } + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != (I + V1EltNum)) { + NV2Elt.push_back(ShuffleMask[I]); + N2Index.push_back(I); + } + } - for (int I = 0, E = InsMasks.size(); I != E; ++I) { - SDValue ExtV = V1; - int Mask = InsMasks[I]; - if (Mask > V1EltNum) { - ExtV = V2; - Mask -= V1EltNum; - } - // Any value type smaller than i32 is illegal in AArch64, and this lower - // function is called after legalize pass, so we need to legalize - // the result here. - EVT EltVT; - if (VT.getVectorElementType().isFloatingPoint()) - EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; - else - EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; - - PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, - DAG.getConstant(Mask, MVT::i64)); - PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN, - DAG.getConstant(InsIndex[I], MVT::i64)); + // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 + // will be inserted. + SDValue InsV = V1; + SmallVector InsMasks = NV1Elt; + SmallVector InsIndex = N1Index; + if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { + if (NV1Elt.size() > NV2Elt.size()) { + InsV = V2; + InsMasks = NV2Elt; + InsIndex = N2Index; } - return PassN; + } else { + InsV = DAG.getNode(ISD::UNDEF, dl, VT); } - return SDValue(); + for (int I = 0, E = InsMasks.size(); I != E; ++I) { + SDValue ExtV = V1; + int Mask = InsMasks[I]; + if (Mask >= V1EltNum) { + ExtV = V2; + Mask -= V1EltNum; + } + // Any value type smaller than i32 is illegal in AArch64, and this lower + // function is called after legalize pass, so we need to legalize + // the result here. + EVT EltVT; + if (VT.getVectorElementType().isFloatingPoint()) + EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; + else + EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; + + ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, + DAG.getConstant(Mask, MVT::i64)); + InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV, + DAG.getConstant(InsIndex[I], MVT::i64)); + } + return InsV; } AArch64TargetLowering::ConstraintType diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 7effbfd66c0..83fd79d6ba7 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -144,6 +144,9 @@ namespace AArch64ISD { // Vector dup by lane NEON_VDUPLANE, + // Vector extract + NEON_VEXTRACT, + // NEON loads with post-increment base updates: NEON_LD1_UPD = ISD::FIRST_TARGET_MEMORY_OPCODE, NEON_LD2_UPD, diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index b3e114a01b0..8a2142646e5 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -983,6 +983,24 @@ class NeonInstAlias : InstAlias { } +// Format AdvSIMD bitwise extract +class NeonI_BitExtract op2, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-24} = 0b101110; + let Inst{23-22} = op2; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + // imm4 in 14-11 + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + // Format AdvSIMD 3 vector registers with same vector type class NeonI_3VSame size, bits<5> opcode, dag outs, dag ins, string asmstr, diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 4cb5da6b8ae..8a78d14b8c9 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -50,6 +50,9 @@ def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>; +def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; //===----------------------------------------------------------------------===// // Multiclasses @@ -1062,7 +1065,7 @@ def neon_uimm8_asmoperand : AsmOperandClass def neon_uimm8 : Operand, ImmLeaf { let ParserMatchClass = neon_uimm8_asmoperand; - let PrintMethod = "printNeonUImm8Operand"; + let PrintMethod = "printUImmHexOperand"; } def neon_uimm64_mask_asmoperand : AsmOperandClass @@ -4430,31 +4433,43 @@ def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; def neon_uimm0_bare : Operand, ImmLeaf { let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm1_bare : Operand, ImmLeaf { let ParserMatchClass = neon_uimm1_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm2_bare : Operand, ImmLeaf { let ParserMatchClass = neon_uimm2_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm3_bare : Operand, ImmLeaf { let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm4_bare : Operand, ImmLeaf { let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm3 : Operand, + ImmLeaf { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printUImmHexOperand"; +} + +def neon_uimm4 : Operand, + ImmLeaf { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printUImmHexOperand"; } class NeonI_INS_main op2, string asmop, + string OpS, RegisterOperand OpVPR, Operand OpImm> + : NeonI_BitExtract{ + bits<4> Index; +} + +def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b", + VPR64, neon_uimm3> { + let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}}; +} + +def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", + VPR128, neon_uimm4> { + let Inst{14-11} = Index; +} + +class NI_Extract + : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), + (i64 OpImm:$Imm))), + (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; + +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; + // The followings are for instruction class (3V Elem) // Variant 1 diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 51335e145b7..c0816917562 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -462,8 +462,8 @@ void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, o << "#0x0"; } -void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MOUImm = MI->getOperand(OpNum); assert(MOUImm.isImm() && @@ -475,9 +475,9 @@ void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, O.write_hex(Imm); } -void AArch64InstPrinter::printNeonUImm8OperandBare(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { const MCOperand &MOUImm = MI->getOperand(OpNum); assert(MOUImm.isImm() diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 28ebfc45f1f..37b7273438d 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -169,9 +169,8 @@ public: void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNeonUImm8OperandBare(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll new file mode 100644 index 00000000000..5c52cd30676 --- /dev/null +++ b/test/CodeGen/AArch64/neon-extract.ll @@ -0,0 +1,190 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vext_s8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +entry: + %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %vext +} + +define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vext_s16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +entry: + %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %vext +} + +define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vext_s32: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +entry: + %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %vext +} + +define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) { +; CHECK: test_vext_s64: +entry: + %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> + ret <1 x i64> %vext +} + +define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vextq_s8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +entry: + %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vext +} + +define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vextq_s16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +entry: + %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vext +} + +define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vextq_s32: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +entry: + %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vext +} + +define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vextq_s64: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +entry: + %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %vext +} + +define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vext_u8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +entry: + %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %vext +} + +define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vext_u16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +entry: + %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %vext +} + +define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vext_u32: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +entry: + %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %vext +} + +define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) { +; CHECK: test_vext_u64: +entry: + %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> + ret <1 x i64> %vext +} + +define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vextq_u8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +entry: + %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vext +} + +define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vextq_u16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +entry: + %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vext +} + +define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vextq_u32: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +entry: + %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vext +} + +define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vextq_u64: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +entry: + %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %vext +} + +define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vext_f32: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +entry: + %vext = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %vext +} + +define <1 x double> @test_vext_f64(<1 x double> %a, <1 x double> %b) { +; CHECK: test_vext_f64: +entry: + %vext = shufflevector <1 x double> %a, <1 x double> %b, <1 x i32> + ret <1 x double> %vext +} + +define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vextq_f32: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +entry: + %vext = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vext +} + +define <2 x double> @test_vextq_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vextq_f64: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +entry: + %vext = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %vext +} + +define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vext_p8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +entry: + %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %vext +} + +define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vext_p16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +entry: + %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %vext +} + +define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vextq_p8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +entry: + %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vext +} + +define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vextq_p16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +entry: + %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vext +} diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 67a938ed77c..b549480a8ff 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -5194,3 +5194,44 @@ // CHECK: error: invalid operand for instruction // CHECK: sha256su1 v0.16b, v1.16b, v2.16b // CHECK: ^ + +//---------------------------------------------------------------------- +// Bitwise extract +//---------------------------------------------------------------------- + + ext v0.8b, v1.8b, v2.4h, #0x3 + ext v0.4h, v1.4h, v2.4h, #0x3 + ext v0.2s, v1.2s, v2.2s, #0x1 + ext v0.1d, v1.1d, v2.1d, #0x0 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.8b, v1.8b, v2.4h, #0x3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.4h, v1.4h, v2.4h, #0x3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.2s, v1.2s, v2.2s, #0x1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.1d, v1.1d, v2.1d, #0x0 +// CHECK-ERROR: ^ + + ext v0.16b, v1.16b, v2.8h, #0x3 + ext v0.8h, v1.8h, v2.8h, #0x3 + ext v0.4s, v1.4s, v2.4s, #0x1 + ext v0.2d, v1.2d, v2.2d, #0x0 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.16b, v1.16b, v2.8h, #0x3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.8h, v1.8h, v2.8h, #0x3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.4s, v1.4s, v2.4s, #0x1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ext v0.2d, v1.2d, v2.2d, #0x0 +// CHECK-ERROR: ^ + diff --git a/test/MC/AArch64/neon-extract.s b/test/MC/AArch64/neon-extract.s new file mode 100644 index 00000000000..2d58a75a490 --- /dev/null +++ b/test/MC/AArch64/neon-extract.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//------------------------------------------------------------------------------ +// Instructions for bitwise extract +//------------------------------------------------------------------------------ + + ext v0.8b, v1.8b, v2.8b, #0x3 + ext v0.16b, v1.16b, v2.16b, #0x3 + +// CHECK: ext v0.8b, v1.8b, v2.8b, #0x3 // encoding: [0x20,0x18,0x02,0x2e] +// CHECK: ext v0.16b, v1.16b, v2.16b, #0x3 // encoding: [0x20,0x18,0x02,0x6e] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index c320d7da099..225bb16212f 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -2042,3 +2042,12 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | 0x00,0x80,0x81,0x4c 0xef,0x45,0x82,0x4c 0xff,0x0b,0x9f,0x4c + +#---------------------------------------------------------------------- +# Bitwise extract +#---------------------------------------------------------------------- +0x20,0x18,0x02,0x2e +0x20,0x18,0x02,0x6e +# CHECK: ext v0.8b, v1.8b, v2.8b, #0x3 +# CHECK: ext v0.16b, v1.16b, v2.16b, #0x3 +