From da85caae7a2db9ce0d6fd957ae313df6b29dc2ce Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Nov 2015 00:13:24 +0000 Subject: [PATCH] [AVX512] The vpermi2 instructions require an integer vector for the index vector. This is reflected correctly in the intrinsics, but was not refelected in the isel patterns. For the floating point types, this requires adding a bitcast to the index vector when its passed through to the output. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254277 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- lib/Target/X86/X86InstrAVX512.td | 89 ++++++++++++++++--------- lib/Target/X86/X86InstrFragmentsSIMD.td | 10 ++- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 59350ab2184..0174779223c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16354,7 +16354,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget if (IntrData->Type == VPERM_3OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else - PassThru = Src2; + PassThru = DAG.getBitcast(VT, Src2); // Swap Src1 and Src2 in the node creation return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5ba6075d283..16b1f3b59b0 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -276,6 +276,22 @@ multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>; +// Similar to AVX512_maskable_3rc but in this case the input VT for the tied +// operand differs from the output VT. This requires a bitconvert on +// the preserved vector going into the vselect. +multiclass AVX512_maskable_3src_cast O, Format F, X86VectorVTInfo OutVT, + X86VectorVTInfo InVT, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common; + multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, @@ -1136,71 +1152,82 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", avx512vl_i64_info, VK8>, VEX_W; //===----------------------------------------------------------------------===// -// -- VPERM2I - 3 source operands form -- +// -- VPERMI2 - 3 source operands form -- multiclass avx512_perm_i opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _> { + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst" in { - defm rr: AVX512_maskable_3src, EVEX_4V, + (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V, AVX5128IBase; let mayLoad = 1 in - defm rm: AVX512_maskable_3src, EVEX_4V, AVX5128IBase; } } multiclass avx512_perm_i_mb opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _> { + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let mayLoad = 1, Constraints = "$src1 = $dst" in - defm rmb: AVX512_maskable_3src, AVX5128IBase, EVEX_4V, EVEX_B; } multiclass avx512_perm_i_sizes opc, string OpcodeStr, - SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { - defm NAME: avx512_perm_i, - avx512_perm_i_mb, EVEX_V512; + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo ShuffleMask> { + defm NAME: avx512_perm_i, + avx512_perm_i_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_i, - avx512_perm_i_mb, EVEX_V128; - defm NAME#256: avx512_perm_i, - avx512_perm_i_mb, EVEX_V256; + defm NAME#128: avx512_perm_i, + avx512_perm_i_mb, EVEX_V128; + defm NAME#256: avx512_perm_i, + avx512_perm_i_mb, EVEX_V256; } } multiclass avx512_perm_i_sizes_w opc, string OpcodeStr, - SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo Idx> { let Predicates = [HasBWI] in - defm NAME: avx512_perm_i, EVEX_V512; + defm NAME: avx512_perm_i, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm NAME#128: avx512_perm_i, EVEX_V128; - defm NAME#256: avx512_perm_i, EVEX_V256; + defm NAME#128: avx512_perm_i, EVEX_V128; + defm NAME#256: avx512_perm_i, EVEX_V256; } } -defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", X86VPermi2X, - avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", X86VPermi2X, - avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w", X86VPermi2X, - avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", X86VPermi2X, - avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", X86VPermi2X, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", + avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", + avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w", + avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", + avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", + avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -// VPERMT +// VPERMT2 multiclass avx512_perm_t opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst" in { diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 8d7d48bd61a..25f247e9d62 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -292,8 +292,6 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI", def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; -def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>; def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameSizeAs<0,2>, @@ -389,7 +387,13 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3", SDTCisSameSizeAs<0,2>, SDTCisSameAs<0,3>]>, []>; -def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; +def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>, + SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<0,2>, + SDTCisSameAs<0,3>]>, []>; + def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; -- 2.34.1