From: Chad Rosier Date: Thu, 30 Jan 2014 21:46:54 +0000 (+0000) Subject: [AArch64] Custom lower concat_vector patterns with v4i16, v4i32, v8i8, v8i16, v16i8... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=dafea158a7223491febdaedb969b1ec0e8680f56;p=oota-llvm.git [AArch64] Custom lower concat_vector patterns with v4i16, v4i32, v8i8, v8i16, v16i8 types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200491 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 7f101fffa1d..769a2bcb629 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -332,6 +332,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); + setOperationAction(ISD::SETCC, MVT::v8i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i8, Custom); setOperationAction(ISD::SETCC, MVT::v4i16, Custom); @@ -2259,6 +2265,52 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Opc, dl, VT, Vec); } +static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { + // We custom lower concat_vectors with 4, 8, or 16 operands that are all the + // same operand and of type v1* using the DUP instruction. + unsigned NumOps = Op->getNumOperands(); + if (NumOps != 4 && NumOps != 8 && NumOps != 16) + return Op; + + // Must be a single value for VDUP. + bool isConstant = true; + SDValue Op0 = Op.getOperand(0); + for (unsigned i = 1; i < NumOps; ++i) { + SDValue OpN = Op.getOperand(i); + if (Op0 != OpN) + return Op; + + if (!isa(OpN->getOperand(0))) + isConstant = false; + } + + // Verify the value type. + EVT EltVT = Op0.getValueType(); + switch (NumOps) { + default: llvm_unreachable("Unexpected number of operands"); + case 4: + if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32) + return Op; + break; + case 8: + if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16) + return Op; + break; + case 16: + if (EltVT != MVT::v1i8) + return Op; + break; + } + + SDLoc DL(Op); + EVT VT = Op.getValueType(); + // VDUP produces better code for constants. + if (isConstant) + return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0)); + return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0, + DAG.getConstant(0, MVT::i64)); +} + SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { @@ -3241,6 +3293,7 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, getSubtarget()); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 1309bf12b5c..2cf27b861b8 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -7106,6 +7106,20 @@ def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))), (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64), (i64 0)))>; +multiclass NeonI_DUP_pattern { +def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)), + (ResTy (DUPELT + (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>; +} + +defm : NeonI_DUP_pattern; +defm : NeonI_DUP_pattern; +defm : NeonI_DUP_pattern; +defm : NeonI_DUP_pattern; +defm : NeonI_DUP_pattern; + class NeonI_DUP diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index eb864211260..8d4a3388bb6 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -1301,3 +1301,89 @@ entry: %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a) ret <1 x i16> %vsqadd2.i } + +define <4 x i16> @concat_vector_v4i16_const() { +; CHECK-LABEL: concat_vector_v4i16_const: +; CHECK: dup {{v[0-9]+}}.4h, wzr + %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %r +} + +define <4 x i16> @concat_vector_v4i16_const_one() { +; CHECK-LABEL: concat_vector_v4i16_const_one: +; CHECK: movz {{w[0-9]+}}, #1 +; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} + %r = shufflevector <1 x i16> , <1 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %r +} + +define <4 x i32> @concat_vector_v4i32_const() { +; CHECK-LABEL: concat_vector_v4i32_const: +; CHECK: dup {{v[0-9]+}}.4s, wzr + %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <8 x i8> @concat_vector_v8i8_const() { +; CHECK-LABEL: concat_vector_v8i8_const: +; CHECK: dup {{v[0-9]+}}.8b, wzr + %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %r +} + +define <8 x i16> @concat_vector_v8i16_const() { +; CHECK-LABEL: concat_vector_v8i16_const: +; CHECK: dup {{v[0-9]+}}.8h, wzr + %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r +} + +define <8 x i16> @concat_vector_v8i16_const_one() { +; CHECK-LABEL: concat_vector_v8i16_const_one: +; CHECK: movz {{w[0-9]+}}, #1 +; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} + %r = shufflevector <1 x i16> , <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r +} + +define <16 x i8> @concat_vector_v16i8_const() { +; CHECK-LABEL: concat_vector_v16i8_const: +; CHECK: dup {{v[0-9]+}}.16b, wzr + %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %r +} + +define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { +; CHECK-LABEL: concat_vector_v4i16: +; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] + %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %r +} + +define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { +; CHECK-LABEL: concat_vector_v4i32: +; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { +; CHECK-LABEL: concat_vector_v8i8: +; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[0] + %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %r +} + +define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { +; CHECK-LABEL: concat_vector_v8i16: +; CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] + %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r +} + +define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { +; CHECK-LABEL: concat_vector_v16i8: +; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[0] + %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %r +}