From 3706eda52c4565016959902a3f5aaf7271516286 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 14:53:25 +0000 Subject: [PATCH] [mips][msa] Added support for matching pckev, and pckod from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191306 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 + lib/Target/Mips/MipsISelLowering.h | 2 + lib/Target/Mips/MipsMSAInstrInfo.td | 18 ++-- lib/Target/Mips/MipsSEISelLowering.cpp | 70 +++++++++++++ test/CodeGen/Mips/msa/shuffle.ll | 132 +++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 8 deletions(-) diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 6f5918d23fb..8fb1efe38f8 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -230,6 +230,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::ILVOD: return "MipsISD::ILVOD"; case MipsISD::ILVL: return "MipsISD::ILVL"; case MipsISD::ILVR: return "MipsISD::ILVR"; + case MipsISD::PCKEV: return "MipsISD::PCKEV"; + case MipsISD::PCKOD: return "MipsISD::PCKOD"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2fece8252ef..20211146e24 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -179,6 +179,8 @@ namespace llvm { ILVOD, // Interleave odd elements ILVL, // Interleave left elements ILVR, // Interleave right elements + PCKEV, // Pack even elements + PCKOD, // Pack odd elements // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index a9c421edc90..3fd52c684f4 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -48,6 +48,8 @@ def MipsILVEV : SDNode<"MipsISD::ILVEV", SDT_ILV>; def MipsILVOD : SDNode<"MipsISD::ILVOD", SDT_ILV>; def MipsILVL : SDNode<"MipsISD::ILVL", SDT_ILV>; def MipsILVR : SDNode<"MipsISD::ILVR", SDT_ILV>; +def MipsPCKEV : SDNode<"MipsISD::PCKEV", SDT_ILV>; +def MipsPCKOD : SDNode<"MipsISD::PCKOD", SDT_ILV>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -2060,15 +2062,15 @@ class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8_uimm8, MSA128B>; -class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B>; -class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H>; -class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, MSA128W>; -class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, MSA128D>; +class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", MipsPCKEV, MSA128B>; +class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", MipsPCKEV, MSA128H>; +class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", MipsPCKEV, MSA128W>; +class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", MipsPCKEV, MSA128D>; -class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, MSA128B>; -class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, MSA128H>; -class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, MSA128W>; -class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, MSA128D>; +class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", MipsPCKOD, MSA128B>; +class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", MipsPCKOD, MSA128H>; +class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", MipsPCKOD, MSA128W>; +class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", MipsPCKOD, MSA128D>; class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", ctpop, MSA128B>; class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 4710e6a5a60..e86f4cb2396 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1447,6 +1447,18 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_ori_b: return lowerMSABinaryImmIntr(Op, DAG, ISD::OR, lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_pckev_b: + case Intrinsic::mips_pckev_h: + case Intrinsic::mips_pckev_w: + case Intrinsic::mips_pckev_d: + return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_pckod_b: + case Intrinsic::mips_pckod_h: + case Intrinsic::mips_pckod_w: + case Intrinsic::mips_pckod_d: + return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_pcnt_b: case Intrinsic::mips_pcnt_h: case Intrinsic::mips_pcnt_w: @@ -1951,6 +1963,58 @@ static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, Op->getOperand(1)); } +// Lower VECTOR_SHUFFLE into PCKEV (if possible). +// +// PCKEV copies the even elements of each vector into the result vector. +// +// It is possible to lower into PCKEV when the mask takes the form: +// <0, 2, 4, ..., n, n+2, n+4, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int Idx = 0; + + for (unsigned i = 0; i < Indices.size(); ++i) { + if (Indices[i] != -1 && Indices[i] != Idx) + return SDValue(); + Idx += 2; + } + + return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into PCKOD (if possible). +// +// PCKOD copies the odd elements of each vector into the result vector. +// +// It is possible to lower into PCKOD when the mask takes the form: +// <1, 3, 5, ..., n+1, n+3, n+5, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int Idx = 1; + + for (unsigned i = 0; i < Indices.size(); ++i) { + if (Indices[i] != -1 && Indices[i] != Idx) + return SDValue(); + Idx += 2; + } + + return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + // Lower VECTOR_SHUFFLE into VSHF. // // This mostly consists of converting the shuffle indices in Indices into a @@ -2031,6 +2095,12 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, if (Result.getNode()) return Result; Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); if (Result.getNode()) return Result; return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll index a4c68600e8d..129efe4f3d9 100644 --- a/test/CodeGen/Mips/msa/shuffle.ll +++ b/test/CodeGen/Mips/msa/shuffle.ll @@ -616,3 +616,135 @@ define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind ret void ; CHECK: .size ilvr_v2i64_0 } + +define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: pckev_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, + <16 x i32> + ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size pckev_v16i8_0 +} + +define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: pckev_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size pckev_v8i16_0 +} + +define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: pckev_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size pckev_v4i32_0 +} + +define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: pckev_v2i64_0: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; pckev.d and ilvev.d are equivalent for v2i64 + ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size pckev_v2i64_0 +} + +define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: pckod_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, + <16 x i32> + ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size pckod_v16i8_0 +} + +define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: pckod_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size pckod_v8i16_0 +} + +define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: pckod_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size pckod_v4i32_0 +} + +define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: pckod_v2i64_0: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; pckod.d and ilvod.d are equivalent for v2i64 + ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size pckod_v2i64_0 +} -- 2.34.1