From ffa6c40ecf502281ea171969b42a38c2eef0493f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 16 Apr 2012 07:13:00 +0000 Subject: [PATCH] Replace vpermd/vpermps intrinic patterns with custom lowering to target specific nodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154801 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++++++ lib/Target/X86/X86InstrSSE.td | 26 ++++++++------------------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5bd89eae7bd..c0e935df584 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9597,6 +9597,12 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_vpermil_pd_256: return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: + // Operands intentionally swapped. Mask is last operand to intrinsic, + // but second operand for node/intruction. + return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(1)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5cdfb10ef14..65e3c1e19fa 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7735,24 +7735,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), // multiclass avx2_perm opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic Int> { + ValueType OpVT> { def Yrr : AVX28I, VEX_4V; + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V; def Yrm : AVX28I, + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, + (bitconvert (mem_frag addr:$src2)))))]>, VEX_4V; } -defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>; +defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; +defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>; multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, ValueType OpVT> { @@ -7775,18 +7777,6 @@ defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W; let ExeDomain = SSEPackedDouble in defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; -let Predicates = [HasAVX2] in { -def : Pat<(v8i32 (X86VPermv VR256:$src1, VR256:$src2)), - (VPERMDYrr VR256:$src1, VR256:$src2)>; -def : Pat<(v8f32 (X86VPermv VR256:$src1, VR256:$src2)), - (VPERMPSYrr VR256:$src1, VR256:$src2)>; - -def : Pat<(v8i32 (X86VPermv VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VPERMDYrm VR256:$src1, addr:$src2)>; -def : Pat<(v8f32 (X86VPermv VR256:$src1, (memopv8f32 addr:$src2))), - (VPERMPSYrm VR256:$src1, addr:$src2)>; -} - //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // -- 2.34.1