X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=463ab6232702374a785d0c4047bfedd2ee108a5e;hb=dfe88a08c7f595801e733d3e6f4504d2695d5309;hp=04dc681432a88397304ca255c8ba201914fb2ce5;hpb=59cb03d3298bc830dfda5b4ca4981d179c31b0fe;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 04dc681432a..463ab623270 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2,9 +2,11 @@ // EltVT). These are things like the register class for the writemask, etc. // The idea is to pass one of these as the template argument rather than the // individual arguments. -class X86VectorVTInfo { RegisterClass RC = rc; + ValueType EltVT = eltvt; int NumElts = numelts; // Corresponding mask register class. @@ -23,7 +25,13 @@ class X86VectorVTInfo(VTName); @@ -57,9 +65,11 @@ class X86VectorVTInfo("memopfsf32"), + !if (!eq (NumElts#EltTypeName, "1f64"), !cast("memopfsf64"), !if (!eq (TypeVariantName, "f"), !cast("memop" # VTName), !if (!eq (EltTypeName, "i64"), !cast("memop" # VTName), - !if (!eq (VTName, "v16i32"), !cast("memop" # VTName), ?))); + !if (!eq (VTName, "v16i32"), !cast("memop" # VTName), ?))))); // The corresponding float type, e.g. v16f32 for v16i32 // Note: For EltSize < 32, FloatVT is illegal and TableGen @@ -114,6 +124,11 @@ def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; +// We map scalar types to the smallest (128-bit) vector type +// with the appropriate element type. This allows to use the same masking logic. +def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; +def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; + class AVX512VLVectorVTInfo { X86VectorVTInfo info512 = i512; @@ -129,6 +144,10 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo; def avx512vl_i64_info : AVX512VLVectorVTInfo; +def avx512vl_f32_info : AVX512VLVectorVTInfo; +def avx512vl_f64_info : AVX512VLVectorVTInfo; // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. @@ -142,20 +161,21 @@ multiclass AVX512_maskable_custom O, Format F, list Pattern, list MaskingPattern, list ZeroMaskingPattern, + string Round = "", string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512; // Prefer over VMOV*rrk Pat<> let AddedComplexity = 20 in def NAME#k: AVX512, EVEX_K { // In case of the 3src subclass this is overridden with a let. @@ -163,8 +183,8 @@ multiclass AVX512_maskable_custom O, Format F, } let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> def NAME#kz: AVX512, EVEX_KZ; @@ -178,6 +198,7 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, + SDNode Select = vselect, string Round = "", string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : @@ -186,23 +207,39 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, [(set _.RC:$dst, RHS)], [(set _.RC:$dst, MaskingRHS)], [(set _.RC:$dst, - (vselect _.KRCWM:$mask, RHS, _.ImmAllZerosV))], - MaskingConstraint, NoItinerary, IsCommutable>; + (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], + Round, MaskingConstraint, NoItinerary, IsCommutable>; // This multiclass generates the unconditional/non-masking, the masking and -// the zero-masking variant of the instruction. In the masking case, the +// the zero-masking variant of the vector instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, InstrItinClass itin = NoItinerary, + dag RHS, string Round = "", + InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : AVX512_maskable_common; + (vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect, + Round, "$src0 = $dst", itin, IsCommutable>; + +// This multiclass generates the unconditional/non-masking, the masking and +// the zero-masking variant of the scalar instruction. +multiclass AVX512_maskable_scalar O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, string Round = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : + AVX512_maskable_common; // Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved @@ -228,7 +265,7 @@ multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, AVX512_maskable_custom; // Bitcasts between 512-bit vector types. Return the original type since @@ -573,105 +610,175 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), //===---------------------------------------------------------------------===// // AVX-512 BROADCAST //--- -multiclass avx512_fp_broadcast opc, string OpcodeStr, - RegisterClass DestRC, - RegisterClass SrcRC, X86MemOperand x86memop> { - def rr : AVX5128I, EVEX; - def rm : AVX5128I, EVEX; +multiclass avx512_fp_broadcast opc, SDNode OpNode, RegisterClass SrcRC, + ValueType svt, X86VectorVTInfo _> { + defm r : AVX512_maskable, + T8PD, EVEX; + + let mayLoad = 1 in { + defm m : AVX512_maskable, + T8PD, EVEX; + } +} + +multiclass avx512_fp_broadcast_vl opc, SDNode OpNode, + AVX512VLVectorVTInfo _> { + defm Z : avx512_fp_broadcast, + EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z256 : avx512_fp_broadcast, + EVEX_V256; + } } + let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512, - VR128X, f32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast, + avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>; + let Predicates = [HasVLX] in { + defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X, + v4f32, v4f32x_info>, EVEX_V128, + EVEX_CD8<32, CD8VT1>; + } } let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512, - VR128X, f64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast, + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>; +} + +// avx512_broadcast_pat introduces patterns for broadcast with a scalar argument. +// Later, we can canonize broadcast instructions before ISel phase and +// eliminate additional patterns on ISel. +// SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar +// representations of source +multiclass avx512_broadcast_pat { + def : Pat<(_.VT (OpNode (_.EltVT SrcRC_s:$src))), + (!cast(InstName##"r") + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + + let AddedComplexity = 30 in { + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (_.EltVT SrcRC_s:$src)), _.RC:$src0)), + (!cast(InstName##"rk") _.RC:$src0, _.KRCWM:$mask, + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + + def : Pat<(_.VT(vselect _.KRCWM:$mask, + (OpNode (_.EltVT SrcRC_s:$src)), _.ImmAllZerosV)), + (!cast(InstName##"rkz") _.KRCWM:$mask, + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + } +} + +defm : avx512_broadcast_pat<"VBROADCASTSSZ", X86VBroadcast, v16f32_info, + VR128X, FR32X>; +defm : avx512_broadcast_pat<"VBROADCASTSDZ", X86VBroadcast, v8f64_info, + VR128X, FR64X>; + +let Predicates = [HasVLX] in { + defm : avx512_broadcast_pat<"VBROADCASTSSZ256", X86VBroadcast, + v8f32x_info, VR128X, FR32X>; + defm : avx512_broadcast_pat<"VBROADCASTSSZ128", X86VBroadcast, + v4f32x_info, VR128X, FR32X>; + defm : avx512_broadcast_pat<"VBROADCASTSDZ256", X86VBroadcast, + v4f64x_info, VR128X, FR64X>; } def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSZrm addr:$src)>; + (VBROADCASTSSZm addr:$src)>; def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDZrm addr:$src)>; + (VBROADCASTSDZm addr:$src)>; def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src), - (VBROADCASTSSZrm addr:$src)>; + (VBROADCASTSSZm addr:$src)>; def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src), - (VBROADCASTSDZrm addr:$src)>; - -multiclass avx512_int_broadcast_reg opc, string OpcodeStr, - RegisterClass SrcRC, RegisterClass KRC> { - def Zrr : AVX5128I, EVEX, EVEX_V512; - def Zkrr : AVX5128I, EVEX, EVEX_V512, EVEX_KZ; -} - -defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>; -defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>, - VEX_W; - + (VBROADCASTSDZm addr:$src)>; + +multiclass avx512_int_broadcast_reg opc, X86VectorVTInfo _, + RegisterClass SrcRC> { + defm r : AVX512_maskable_in_asm, T8PD, EVEX; +} + +multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, + RegisterClass SrcRC, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_int_broadcast_reg, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_int_broadcast_reg, EVEX_V256; + defm Z128 : avx512_int_broadcast_reg, EVEX_V128; + } +} + +defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR32, + HasBWI>; +defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR32, + HasBWI>; +defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, GR32, + HasAVX512>; +defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, GR64, + HasAVX512>, VEX_W; + def : Pat <(v16i32 (X86vzext VK16WM:$mask)), - (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>; + (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>; def : Pat <(v8i64 (X86vzext VK8WM:$mask)), - (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>; + (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>; def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), - (VPBROADCASTDrZrr GR32:$src)>; + (VPBROADCASTDrZr GR32:$src)>; def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))), - (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>; + (VPBROADCASTDrZrkz VK16WM:$mask, GR32:$src)>; def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), - (VPBROADCASTQrZrr GR64:$src)>; + (VPBROADCASTQrZr GR64:$src)>; def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))), - (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>; + (VPBROADCASTQrZrkz VK8WM:$mask, GR64:$src)>; def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))), - (VPBROADCASTDrZrr GR32:$src)>; + (VPBROADCASTDrZr GR32:$src)>; def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))), - (VPBROADCASTQrZrr GR64:$src)>; + (VPBROADCASTQrZr GR64:$src)>; def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src), (v16i32 immAllZerosV), (i16 GR16:$mask))), - (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>; + (VPBROADCASTDrZrkz (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>; def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src), (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))), - (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>; + (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>; multiclass avx512_int_broadcast_rm opc, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, RegisterClass KRC> { def rr : AVX5128I, EVEX; def krr : AVX5128I, EVEX, EVEX_KZ; let mayLoad = 1 in { def rm : AVX5128I, EVEX; def krm : AVX5128I, EVEX, EVEX_KZ; } } @@ -688,12 +795,12 @@ multiclass avx512_int_subvec_broadcast_rm opc, string OpcodeStr, RegisterClass KRC> { let mayLoad = 1 in { def rm : AVX5128I, EVEX; def krm : AVX5128I, EVEX, EVEX_KZ; } } @@ -710,15 +817,10 @@ def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), (VPBROADCASTQZrr VR128X:$src)>; -def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), - (VBROADCASTSSZrr VR128X:$src)>; -def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), - (VBROADCASTSDZrr VR128X:$src)>; - def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), - (VBROADCASTSSZrr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; + (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), - (VBROADCASTSDZrr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; + (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; @@ -726,21 +828,21 @@ def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), - (VBROADCASTSSZrr VR128X:$src)>; + (VBROADCASTSSZr VR128X:$src)>; def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))), - (VBROADCASTSDZrr VR128X:$src)>; - + (VBROADCASTSDZr VR128X:$src)>; + // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), - (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; + (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), - (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; let Predicates = [HasAVX512] in { def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), - (EXTRACT_SUBREG + (EXTRACT_SUBREG (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), addr:$src)), sub_ymm)>; } @@ -752,15 +854,15 @@ multiclass avx512_mask_broadcast opc, string OpcodeStr, RegisterClass KRC> { let Predicates = [HasCDI] in def Zrr : AVX512XS8I, EVEX, EVEX_V512; - + let Predicates = [HasCDI, HasVLX] in { def Z128rr : AVX512XS8I, EVEX, EVEX_V128; def Z256rr : AVX512XS8I, EVEX, EVEX_V256; } } @@ -782,14 +884,14 @@ multiclass avx512_perm_imm opc, string OpcodeStr, SDNode OpNode, def ri : AVX512AIi8, EVEX; def mi : AVX512AIi8, @@ -804,7 +906,7 @@ multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, def rr : AVX5128I, @@ -812,7 +914,7 @@ multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, def rm : AVX5128I, @@ -836,20 +938,20 @@ def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), (VPERMILPDZri VR512:$src1, imm:$imm)>; // -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, +multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { def rr : AVX5128I, EVEX_4V; def rm : AVX5128I, EVEX_4V; @@ -857,13 +959,13 @@ multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, +defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let ExeDomain = SSEPackedSingle in defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, +defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // -- VPERM2I - 3 source operands form -- @@ -874,7 +976,7 @@ let Constraints = "$src1 = $dst" in { def rr : AVX5128I, EVEX_4V; @@ -882,7 +984,7 @@ let Constraints = "$src1 = $dst" in { def rrk : AVX5128I, EVEX_4V; @@ -914,7 +1016,7 @@ let Constraints = "$src1 = $dst" in { def rmk : AVX5128I opc, string OpcodeStr, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, PatFrag mem_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX5128I opc, string OpcodeStr, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in { + def rr : AVX5128I, EVEX_4V, EVEX_K; - let mayLoad = 1 in - def rm : AVX5128I, EVEX_4V; + def rrk : AVX5128I, EVEX_4V, EVEX_K; + "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), + [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1), + (_.VT _.RC:$src2)))]>, EVEX_4V, EVEX_K; + def rrkz : AVX5128I, EVEX_4V, EVEX_KZ; + let mayLoad = 1 in { + def rm : AVX5128I, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + def rmk : AVX5128I, + EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>; + def rmkz : AVX5128I, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>; + } + } } +multiclass avx512_blendmask_rmb opc, string OpcodeStr, X86VectorVTInfo _> { + + def rmbk : AVX5128I, + EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + + def rmb : AVX5128I, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + +} + +multiclass blendmask_dq opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo> { + defm Z : avx512_blendmask , + avx512_blendmask_rmb , EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z256 : avx512_blendmask, + avx512_blendmask_rmb , EVEX_V256; + defm Z128 : avx512_blendmask, + avx512_blendmask_rmb , EVEX_V128; + } +} + +multiclass blendmask_bw opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasBWI] in + defm Z : avx512_blendmask , EVEX_V512; + + let Predicates = [HasBWI, HasVLX] in { + defm Z256 : avx512_blendmask , EVEX_V256; + defm Z128 : avx512_blendmask , EVEX_V128; + } +} + + +defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>; +defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W; +defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>; +defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W; +defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>; +defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W; -let ExeDomain = SSEPackedSingle in -defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", - VK16WM, VR512, f512mem, - memopv16f32, vselect, v16f32>, - EVEX_CD8<32, CD8VF>, EVEX_V512; -let ExeDomain = SSEPackedDouble in -defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", - VK8WM, VR512, f512mem, - memopv8f64, vselect, v8f64>, - VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; - -def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), (i16 GR16:$mask))), - (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), - VR512:$src1, VR512:$src2)>; - -def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), (i8 GR8:$mask))), - (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), - VR512:$src1, VR512:$src2)>; - -defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", - VK16WM, VR512, f512mem, - memopv16i32, vselect, v16i32>, - EVEX_CD8<32, CD8VF>, EVEX_V512; - -defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", - VK8WM, VR512, f512mem, - memopv8i64, vselect, v8i64>, - VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; - -def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (i16 GR16:$mask))), - (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16), - VR512:$src1, VR512:$src2)>; - -def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (i8 GR8:$mask))), - (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8), - VR512:$src1, VR512:$src2)>; let Predicates = [HasAVX512] in { def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), (v8f32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (EXTRACT_SUBREG + (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (EXTRACT_SUBREG + (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; } @@ -1391,17 +1526,17 @@ multiclass avx512_cmp_packed; def rrib: AVX512PIi8<0xC2, MRMSrcReg, (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), !strconcat("vcmp${cc}", suffix, - " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), + "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), [], d>, EVEX_B; def rmi : AVX512PIi8<0xC2, MRMSrcMem, (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), !strconcat("vcmp${cc}", suffix, - " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"), + "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set KRC:$dst, (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; @@ -1410,11 +1545,11 @@ multiclass avx512_cmp_packed; + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), !strconcat("vcmp", suffix, - " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; } } @@ -1442,25 +1577,25 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), imm:$cc), VK8)>; def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), imm:$cc, (i16 -1), + (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1), FROUND_NO_EXC)), (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR16)>; - + def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), imm:$cc, (i8 -1), + (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1), FROUND_NO_EXC)), (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR8)>; def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), imm:$cc, (i16 -1), + (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1), FROUND_CURRENT)), (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR16)>; def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), imm:$cc, (i8 -1), + (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1), FROUND_CURRENT)), (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR8)>; @@ -1475,14 +1610,14 @@ multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, ValueType vvt, ValueType ivt, X86MemOperand x86memop> { let hasSideEffects = 0 in { def kk : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; let mayLoad = 1 in def km : I; let mayStore = 1 in def mk : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; } } @@ -1491,9 +1626,9 @@ multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, RegisterClass KRC, RegisterClass GRC> { let hasSideEffects = 0 in { def kr : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; def rk : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; } } @@ -1643,7 +1778,7 @@ multiclass avx512_mask_unop opc, string OpcodeStr, Predicate prd> { let Predicates = [prd] in def rr : I; } @@ -1697,7 +1832,7 @@ multiclass avx512_mask_binop opc, string OpcodeStr, let Predicates = [prd] in def rr : I; } @@ -1773,7 +1908,7 @@ multiclass avx512_mask_unpck opc, string OpcodeStr, let Predicates = [HasAVX512] in def rr : I; + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { @@ -1802,7 +1937,7 @@ multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode> { let Predicates = [HasAVX512], Defs = [EFLAGS] in def rr : I; } @@ -1823,7 +1958,7 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, let Predicates = [HasAVX512] in def ri : Ii8; } @@ -1978,7 +2113,7 @@ multiclass avx512_load_vl opc, string OpcodeStr, string ld_pat, multiclass avx512_store opc, string OpcodeStr, PatFrag st_frag, ValueType OpVT, RegisterClass KRC, RegisterClass RC, X86MemOperand memop, Domain d> { - let isAsmParserOnly = 1, hasSideEffects = 0 in { + let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_alt : AVX512PI, EVEX; @@ -2074,6 +2209,46 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src), (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src)>; +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), + (VMOVUPSZmrk addr:$ptr, + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), + (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; + +def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; + +def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)), + (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; + +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)), + (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; + +def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)), + (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, + (bc_v16f32 (v16i32 immAllZerosV)))), + (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))), + (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, + (bc_v8f64 (v16i32 immAllZerosV)))), + (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))), + (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), + (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk + (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm), + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; + defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32", "16", "8", "4", SSEPackedInt, HasAVX512>, avx512_store_vl<0x7F, "vmovdqa32", "alignedstore", @@ -2148,6 +2323,46 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } +def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))), + (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)), + (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))), + (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, + (bc_v8i64 (v16i32 immAllZerosV)))), + (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))), + (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; + +def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)), + (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; + +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)), + (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; + +// SKX replacement +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), + (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>; + +// KNL replacement +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), + (VMOVDQU32Zmrk addr:$ptr, + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; + +def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; + + // Move Int Doubleword to Packed Double Int // def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), @@ -2254,12 +2469,12 @@ def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { let hasSideEffects = 0 in { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), + def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128X:$dst, (vt (OpNode VR128X:$src1, (scalar_to_vector RC:$src2))))], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; @@ -2267,19 +2482,19 @@ multiclass avx512_move_scalar , EVEX_4V, VEX_LIG, EVEX_K; def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, " \t{$src, $dst|$dst, $src}"), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, EVEX, VEX_LIG; let mayStore = 1 in { def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, " \t{$src, $dst|$dst, $src}"), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, EVEX, VEX_LIG; def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src), - !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), + !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), [], IIC_SSE_MOV_S_MR>, EVEX, VEX_LIG, EVEX_K; } // mayStore @@ -2336,7 +2551,7 @@ let Predicates = [HasAVX512] in { // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (v4f32 (V_SET0)), + (VMOVSSZrr (v4f32 (V_SET0)), (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), (SUBREG_TO_REG (i32 0), @@ -2465,7 +2680,7 @@ let AddedComplexity = 15 in def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, (v2i64 (X86vzmovl + [(set VR128X:$dst, (v2i64 (X86vzmovl (v2i64 VR128X:$src))))], IIC_SSE_MOVQ_RR>, EVEX, VEX_W; @@ -2487,7 +2702,7 @@ let Predicates = [HasAVX512] in { (VMOV64toPQIZrr GR64:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIZrr GR32:$src)>; - + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), @@ -2601,7 +2816,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), - itins.rr, IsCommutable>, + "", itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in @@ -2610,7 +2825,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))), - itins.rm>, + "", itins.rm>, AVX512BIBase, EVEX_4V; } @@ -2626,7 +2841,7 @@ multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), - itins.rm>, + "", itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B; } @@ -2728,48 +2943,48 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, ValueType DstVT, { def rr : AVX512BI, EVEX_4V; def rrk : AVX512BI, EVEX_4V, EVEX_K; def rrkz : AVX512BI, EVEX_4V, EVEX_KZ; } let mayLoad = 1 in { def rm : AVX512BI, EVEX_4V; def rmk : AVX512BI, EVEX_4V, EVEX_K; def rmkz : AVX512BI, EVEX_4V, EVEX_KZ; def rmb : AVX512BI, EVEX_4V, EVEX_B; def rmbk : AVX512BI, EVEX_4V, EVEX_B, EVEX_K; def rmbkz : AVX512BI, EVEX_4V, EVEX_B, EVEX_KZ; @@ -2897,12 +3112,12 @@ multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, X86MemOperand x86memop> { def rr : AVX512BI, EVEX_4V; def rm : AVX512BI, EVEX_4V; @@ -2924,19 +3139,19 @@ defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, // multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, + SDNode OpNode, PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { def ri : AVX512Ii8, EVEX; def mi : AVX512Ii8, EVEX; @@ -2984,118 +3199,58 @@ defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; } multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, - RegisterClass KRC, - RegisterClass RC, ValueType vt, - X86MemOperand x86memop, PatFrag mem_frag, - X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, - Domain d, OpndItins itins, bit commutable> { - let isCommutable = commutable in { - def rr : PI, - EVEX_4V; - - def rrk: PI, EVEX_4V, EVEX_K; - - def rrkz: PI, EVEX_4V, EVEX_KZ; - } - + X86VectorVTInfo _, bit IsCommutable> { + defm rr: AVX512_maskable, EVEX_4V; let mayLoad = 1 in { - def rm : PI, EVEX_4V; - - def rmb : PI, EVEX_4V, EVEX_B; - - def rmk : PI, EVEX_4V, EVEX_K; - - def rmkz : PI, EVEX_4V, EVEX_KZ; - - def rmbk : PI, EVEX_4V, EVEX_B, EVEX_K; - - def rmbkz : PI, EVEX_4V, EVEX_B, EVEX_KZ; + defm rm: AVX512_maskable, EVEX_4V; + defm rmb: AVX512_maskable, + EVEX_4V, EVEX_B; + }//let mayLoad = 1 +} + +multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, + bit IsCommutable = 0> { + defm PSZ : avx512_fp_packed, EVEX_V512, PS, + EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_packed, EVEX_V512, PD, VEX_W, + EVEX_CD8<64, CD8VF>; + + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp_packed, EVEX_V128, PS, + EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp_packed, EVEX_V256, PS, + EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp_packed, EVEX_V128, PD, VEX_W, + EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp_packed, EVEX_V256, PD, VEX_W, + EVEX_CD8<64, CD8VF>; } } -defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - -defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; -defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; -defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - -defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; -defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - -defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; +defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>; +defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>; +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>; +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>; +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>; +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>; def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), @@ -3120,18 +3275,18 @@ def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1), // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// -multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, +multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, SDNode OpNode, ValueType vt> { def rr : AVX512PI, EVEX_4V; def rm : AVX512PI, EVEX_4V; } @@ -3158,154 +3313,122 @@ def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1), def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))), (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>; + //===----------------------------------------------------------------------===// // AVX-512 Shift instructions //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, RegisterClass RC, - ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, - RegisterClass KRC> { - def ri : AVX512BIi8, EVEX_4V; - def rik : AVX512BIi8, EVEX_4V, EVEX_K; - def mi: AVX512BIi8, EVEX_4V; - def mik: AVX512BIi8, EVEX_4V, EVEX_K; + string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + defm ri : AVX512_maskable, AVX512BIi8Base, EVEX_4V; + defm mi : AVX512_maskable, AVX512BIi8Base, EVEX_4V; } multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, ValueType SrcVT, - PatFrag bc_frag, RegisterClass KRC> { - // src2 is always 128-bit - def rr : AVX512BI, EVEX_4V; - def rrk : AVX512BI, EVEX_4V, EVEX_K; - def rm : AVX512BI, EVEX_4V; - def rmk : AVX512BI, EVEX_4V, EVEX_K; + ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { + // src2 is always 128-bit + defm rr : AVX512_maskable, AVX512BIBase, EVEX_4V; + defm rm : AVX512_maskable, AVX512BIBase, EVEX_4V; +} + +multiclass avx512_shift_sizes opc, string OpcodeStr, SDNode OpNode, + ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { + defm Z : avx512_shift_rrm, EVEX_V512; +} + +multiclass avx512_shift_types opcd, bits<8> opcq, string OpcodeStr, + SDNode OpNode> { + defm D : avx512_shift_sizes, EVEX_CD8<32, CD8VQ>; + defm Q : avx512_shift_sizes, EVEX_CD8<64, CD8VQ>, VEX_W; } defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, + v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + v8i64_info>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512, + v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + v8i64_info>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, + v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + v8i64_info>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSLL : avx512_shift_types<0xF2, 0xF3, "vpsll", X86vshl>; +defm VPSRA : avx512_shift_types<0xE2, 0xE2, "vpsra", X86vsra>; +defm VPSRL : avx512_shift_types<0xD2, 0xD3, "vpsrl", X86vsrl>; //===-------------------------------------------------------------------===// // Variable Bit Shifts //===-------------------------------------------------------------------===// multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, - X86MemOperand x86memop, PatFrag mem_frag> { - def rr : AVX5128I, - EVEX_4V; - def rm : AVX5128I, - EVEX_4V; + X86VectorVTInfo _> { + defm rr : AVX512_maskable, AVX5128IBase, EVEX_4V; + defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V; +} + +multiclass avx512_var_shift_sizes opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + defm Z : avx512_var_shift, EVEX_V512; } -defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; +multiclass avx512_var_shift_types opc, string OpcodeStr, + SDNode OpNode> { + defm D : avx512_var_shift_sizes, EVEX_CD8<32, CD8VQ>; + defm Q : avx512_var_shift_sizes, EVEX_CD8<64, CD8VQ>, VEX_W; +} + +defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>; +defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>; +defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// -multiclass avx512_movddup { def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; } @@ -3322,11 +3445,11 @@ multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr, ValueType vt, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop> { def rr : AVX512XSI, EVEX; let mayLoad = 1 in def rm : AVX512XSI, EVEX; } @@ -3385,73 +3508,70 @@ multiclass avx512_fma3p_rm opc, string OpcodeStr, X86VectorVTInfo _, AVX512FMA3Base; let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; -} + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B; + } } // Constraints = "$src1 = $dst" multiclass avx512_fma3p_forms opc213, bits<8> opc231, string OpcodeStr, X86VectorVTInfo VTI, SDPatternOperator OpNode> { - defm v213 : avx512_fma3p_rm, - EVEX_V512, EVEX_CD8; + defm v213r : avx512_fma3p_rm, EVEX_CD8; - defm v231 : avx512_fma3p_rm, - EVEX_V512, EVEX_CD8; + defm v231r : avx512_fma3p_rm, EVEX_CD8; } +multiclass avx512_fma3p opc213, bits<8> opc231, + string OpcodeStr, + SDPatternOperator OpNode> { let ExeDomain = SSEPackedSingle in { - defm VFMADDPSZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd", - v16f32_info, X86Fmadd>; - defm VFMSUBPSZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub", - v16f32_info, X86Fmsub>; - defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub", - v16f32_info, X86Fmaddsub>; - defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd", - v16f32_info, X86Fmsubadd>; - defm VFNMADDPSZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd", - v16f32_info, X86Fnmadd>; - defm VFNMSUBPSZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub", - v16f32_info, X86Fnmsub>; -} + defm NAME##PSZ : avx512_fma3p_forms, EVEX_V512; + defm NAME##PSZ256 : avx512_fma3p_forms, EVEX_V256; + defm NAME##PSZ128 : avx512_fma3p_forms, EVEX_V128; + } let ExeDomain = SSEPackedDouble in { - defm VFMADDPDZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd", - v8f64_info, X86Fmadd>, VEX_W; - defm VFMSUBPDZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub", - v8f64_info, X86Fmsub>, VEX_W; - defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub", - v8f64_info, X86Fmaddsub>, VEX_W; - defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd", - v8f64_info, X86Fmsubadd>, VEX_W; - defm VFNMADDPDZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd", - v8f64_info, X86Fnmadd>, VEX_W; - defm VFNMSUBPDZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub", - v8f64_info, X86Fnmsub>, VEX_W; + defm NAME##PDZ : avx512_fma3p_forms, EVEX_V512, VEX_W; + defm NAME##PDZ256 : avx512_fma3p_forms, EVEX_V256, VEX_W; + defm NAME##PDZ128 : avx512_fma3p_forms, EVEX_V128, VEX_W; + } } +defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>; +defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>; +defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>; +defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>; +defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>; +defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>; + let Constraints = "$src1 = $dst" in { multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { let mayLoad = 1 in def m: AVX512FMA3; def mb: AVX512FMA3 opc, string OpcodeStr, SDNode OpNode, } // Constraints = "$src1 = $dst" +multiclass avx512_fma3p_m132_f opc, + string OpcodeStr, + SDNode OpNode> { + let ExeDomain = SSEPackedSingle in { - defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", X86Fmadd, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", X86Fmsub, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", X86Fmaddsub, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", X86Fmsubadd, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", X86Fnmadd, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", X86Fnmsub, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -} + defm NAME##PSZ : avx512_fma3p_m132, EVEX_V512, EVEX_CD8<32, CD8VF>; + defm NAME##PSZ256 : avx512_fma3p_m132, EVEX_V256, EVEX_CD8<32, CD8VF>; + defm NAME##PSZ128 : avx512_fma3p_m132, EVEX_V128, EVEX_CD8<32, CD8VF>; + } let ExeDomain = SSEPackedDouble in { - defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", X86Fmadd, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", X86Fmsub, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", X86Fmaddsub, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", X86Fmsubadd, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", X86Fnmadd, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", X86Fnmsub, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm NAME##PDZ : avx512_fma3p_m132, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>; + defm NAME##PDZ256 : avx512_fma3p_m132, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>; + defm NAME##PDZ128 : avx512_fma3p_m132, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>; + } } +defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>; +defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>; +defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>; +defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; +defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; +defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; + + // Scalar FMA let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, +multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, PatFrag mem_frag> { let isCommutable = 1 in def r : AVX512FMA3; let mayLoad = 1 in def m : AVX512FMA3; @@ -3552,12 +3661,12 @@ multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let hasSideEffects = 0 in { def rr : SI, + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V; let mayLoad = 1 in def rm : SI, + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V; } // hasSideEffects = 0 } @@ -3625,12 +3734,12 @@ multiclass avx512_cvt_s_int opc, RegisterClass SrcRC, RegisterClass DstR string asm> { let hasSideEffects = 0 in { def rr : SI, EVEX, VEX_LIG, Requires<[HasAVX512]>; let mayLoad = 1 in def rm : SI, EVEX, VEX_LIG, + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG, Requires<[HasAVX512]>; } // hasSideEffects = 0 } @@ -3728,10 +3837,10 @@ multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm> { def rr : SI, EVEX; def rm : SI, EVEX; } @@ -3804,21 +3913,21 @@ def : Pat<(extloadf32 addr:$src), def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, Requires<[HasAVX512]>; -multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, +multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT, ValueType InVT, Domain d> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX; def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; let mayLoad = 1 in def rm : AVX512PI, EVEX; } // hasSideEffects = 0 @@ -3830,12 +3939,12 @@ multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, Domain d> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX; let mayLoad = 1 in def rm : AVX512PI, EVEX; } // hasSideEffects = 0 @@ -3852,7 +3961,7 @@ defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; - + def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))), (VCVTPD2PSZrr VR512:$src)>; @@ -3881,7 +3990,7 @@ defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - memopv8f64, f512mem, v8i32, v8f64, + memopv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; @@ -3899,7 +4008,7 @@ defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uin memopv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, PS, VEX_W, EVEX_CD8<64, CD8VF>; - + // cvttpd2udq (src, 0, mask-all-ones, sae-current) def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src), (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)), @@ -3909,16 +4018,16 @@ defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, memopv4i64, f256mem, v8f64, v8i32, SSEPackedDouble>, EVEX_V512, XS, EVEX_CD8<32, CD8VH>; - + defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, memopv16i32, f512mem, v16f32, v16i32, SSEPackedSingle>, EVEX_V512, XD, EVEX_CD8<32, CD8VF>; def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), - (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - + def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; @@ -3926,7 +4035,7 @@ def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - + def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; @@ -3953,14 +4062,14 @@ multiclass avx512_vcvt_fp2int opc, string asm, RegisterClass SrcRC, X86MemOperand x86memop, Domain d> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX; def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; let mayLoad = 1 in def rm : AVX512PI, EVEX; } // hasSideEffects = 0 } @@ -4019,12 +4128,12 @@ multiclass avx512_cvtps2ph { def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst), (ins srcRC:$src1, i32i8imm:$src2), - "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX; let hasSideEffects = 0, mayStore = 1 in def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2), - "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX; + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX; } defm VCVTPH2PSZ : avx512_cvtph2ps, EVEX_V512, @@ -4071,7 +4180,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } - + /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop> { @@ -4079,12 +4188,12 @@ multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, def rr : AVX5128I, EVEX_4V; + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; let mayLoad = 1 in { def rm : AVX5128I, EVEX_4V; + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; } } } @@ -4179,126 +4288,100 @@ def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src), (VRCP14PDZr VR512:$src)>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd -multiclass avx512_fp28_s opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop> { - let hasSideEffects = 0, Predicates = [HasERI] in { - def rr : AVX5128I, EVEX_4V; - def rrb : AVX5128I, EVEX_4V, EVEX_B; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX_4V; - } +multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, + SDNode OpNode> { + + defm r : AVX512_maskable_scalar; + + defm rb : AVX512_maskable_scalar, EVEX_B; + + defm m : AVX512_maskable_scalar; } + +multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode> { + defm SS : avx512_fp28_s, + EVEX_CD8<32, CD8VT1>; + defm SD : avx512_fp28_s, + EVEX_CD8<64, CD8VT1>, VEX_W; } -defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; -defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; +let hasSideEffects = 0, Predicates = [HasERI] in { + defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V; + defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; +} +/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd -def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; +multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { -def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; + defm r : AVX512_maskable; -def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; + defm rb : AVX512_maskable, EVEX_B; -def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; + defm m : AVX512_maskable; -/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd -multiclass avx512_fp28_p opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop> { - let hasSideEffects = 0, Predicates = [HasERI] in { - def r : AVX5128I, EVEX; - def rb : AVX5128I, EVEX, EVEX_B; - def m : AVX5128I, EVEX; - } + defm mb : AVX512_maskable, EVEX_B; } -defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), - (VRSQRT28PSZrb VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), - (VRSQRT28PDZrb VR512:$src)>; - -def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), - (VRCP28PSZrb VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), - (VRCP28PDZrb VR512:$src)>; - -multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, - OpndItins itins_s, OpndItins itins_d> { - def PSZrr :AVX512PSI, - EVEX, EVEX_V512; - let mayLoad = 1 in - def PSZrm : AVX512PSI, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; +multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { + defm PS : avx512_fp28_p, + EVEX_CD8<32, CD8VF>; + defm PD : avx512_fp28_p, + VEX_W, EVEX_CD8<32, CD8VF>; +} - def PDZrr : AVX512PDI, - EVEX, EVEX_V512; +let Predicates = [HasERI], hasSideEffects = 0 in { - let mayLoad = 1 in - def PDZrm : AVX512PDI, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; +} + +multiclass avx512_sqrt_packed opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm r: AVX512_maskable, EVEX; + let mayLoad = 1 in { + defm m: AVX512_maskable, EVEX; + defm mb: AVX512_maskable, + EVEX, EVEX_B; + } } multiclass avx512_sqrt_scalar opc, string OpcodeStr, @@ -4314,7 +4397,7 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, VR128X:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, + [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2))], itins_s.rr>, XS, EVEX_4V; let mayLoad = 1 in { @@ -4328,7 +4411,7 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, + [(set VR128X:$dst, (F32Int VR128X:$src1, sse_load_f32:$src2))], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; } @@ -4342,7 +4425,7 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, VR128X:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, + [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2))], itins_s.rr>, XD, EVEX_4V, VEX_W; let mayLoad = 1 in { @@ -4356,27 +4439,51 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, - (F64Int VR128X:$src1, sse_load_f64:$src2))]>, + [(set VR128X:$dst, + (F64Int VR128X:$src1, sse_load_f64:$src2))]>, XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; } } +multiclass avx512_sqrt_packed_all opc, string OpcodeStr, + SDNode OpNode> { + defm PSZ : avx512_sqrt_packed, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed, + EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_sqrt_packed, + EVEX_V128, PS, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_sqrt_packed, + EVEX_V256, PS, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_sqrt_packed, + EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_sqrt_packed, + EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; + } +} + +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; -defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", - int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, - SSE_SQRTSS, SSE_SQRTSD>, - avx512_sqrt_packed<0x51, "vsqrt", fsqrt, - SSE_SQRTPS, SSE_SQRTPD>; +defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", + int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, + SSE_SQRTSS, SSE_SQRTSD>; let Predicates = [HasAVX512] in { def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), - (VSQRTPSZrr VR512:$src1)>; + (VSQRTPSZr VR512:$src1)>; def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), - (VSQRTPDZrr VR512:$src1)>; - + (VSQRTPDZr VR512:$src1)>; + def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -4485,7 +4592,7 @@ let ExeDomain = GenericDomain in { (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128X:$dst, (F32Int VR128X:$src1, + [(set VR128X:$dst, (F32Int VR128X:$src1, sse_load_f32:$src2, imm:$src3))]>, EVEX_CD8<32, CD8VT1>; @@ -4526,14 +4633,14 @@ let ExeDomain = d in { def r : AVX512AIi8, EVEX; // Vector intrinsic operation, mem def m : AVX512AIi8, EVEX; } // ExeDomain } @@ -4564,20 +4671,20 @@ let ExeDomain = d in { def r : AVX512AIi8, EVEX_4V; def m : AVX512AIi8, EVEX_4V; } // ExeDomain } defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X, SSEPackedSingle>, EVEX_CD8<32, CD8VT1>; - + defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X, SSEPackedDouble>, EVEX_CD8<64, CD8VT1>; @@ -4633,32 +4740,32 @@ multiclass avx512_trunc_sat opc, string OpcodeStr, RegisterClass KRC, X86MemOperand x86memop> { def rr : AVX512XS8I, EVEX; def rrk : AVX512XS8I, EVEX, EVEX_K; def rrkz : AVX512XS8I, EVEX, EVEX_KZ; def mr : AVX512XS8I, EVEX; def mrk : AVX512XS8I, EVEX, EVEX_K; } -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, +defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; @@ -4712,36 +4819,36 @@ multiclass avx512_extend opc, string OpcodeStr, RegisterClass KRC, def rr : AVX5128I, EVEX; def rrk : AVX5128I, EVEX, EVEX_K; def rrkz : AVX5128I, EVEX, EVEX_KZ; let mayLoad = 1 in { def rm : AVX5128I, EVEX; def rmk : AVX5128I, EVEX, EVEX_K; def rmkz : AVX5128I, EVEX, EVEX_KZ; } @@ -4789,7 +4896,7 @@ let mayLoad = 1, def rm : AVX5128I, EVEX, EVEX_K; } @@ -4806,7 +4913,7 @@ defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; } - + defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, @@ -4823,7 +4930,7 @@ let mayStore = 1, Constraints = "$mask = $mask_wb" in def mr : AVX5128I, EVEX, EVEX_K; } @@ -4856,7 +4963,7 @@ multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeSt RegisterClass KRC, X86MemOperand memop> { let Predicates = [HasPFI], hasSideEffects = 1 in def m : AVX5128I, EVEX, EVEX_K; } @@ -4871,7 +4978,7 @@ defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; - + defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; @@ -4916,14 +5023,14 @@ multiclass avx512_shufp, EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>; def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), !strconcat(OpcodeStr, - " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, EVEX_4V, Sched<[WriteShuffle]>; @@ -4963,7 +5070,7 @@ multiclass avx512_valign { def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3), !strconcat("valign"##_.Suffix, - " \t{$src3, $src2, $src1, $dst|" + "\t{$src3, $src2, $src1, $dst|" "$dst, $src1, $src2, $src3}"), []>, EVEX_4V; } @@ -4979,43 +5086,43 @@ multiclass avx512_vpabs opc, string OpcodeStr, ValueType OpVT, X86MemOperand x86memop, X86MemOperand x86scalar_mop, string BrdcstStr> { def rr : AVX5128I, EVEX; def rrk : AVX5128I, EVEX, EVEX_K; def rrkz : AVX5128I, EVEX, EVEX_KZ; let mayLoad = 1 in { def rm : AVX5128I, EVEX; def rmk : AVX5128I, EVEX, EVEX_K; def rmkz : AVX5128I, EVEX, EVEX_KZ; def rmb : AVX5128I, EVEX, EVEX_B; def rmbk : AVX5128I, EVEX, EVEX_B, EVEX_K; def rmbkz : AVX5128I, EVEX, EVEX_B, EVEX_KZ; @@ -5045,54 +5152,54 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), (VPABSQZrr VR512:$src)>; -multiclass avx512_conflict opc, string OpcodeStr, +multiclass avx512_conflict opc, string OpcodeStr, RegisterClass RC, RegisterClass KRC, X86MemOperand x86memop, X86MemOperand x86scalar_mop, string BrdcstStr> { def rr : AVX5128I, EVEX; def rm : AVX5128I, EVEX; def rmb : AVX5128I, EVEX, EVEX_B; def rrkz : AVX5128I, EVEX, EVEX_KZ; def rmkz : AVX5128I, EVEX, EVEX_KZ; def rmbkz : AVX5128I, EVEX, EVEX_KZ, EVEX_B; - + let Constraints = "$src1 = $dst" in { def rrk : AVX5128I, EVEX, EVEX_K; def rmk : AVX5128I, EVEX, EVEX_K; def rmbk : AVX5128I, EVEX, EVEX_K, EVEX_B; } @@ -5168,10 +5275,10 @@ def : Pat<(truncstorei1 GR8:$src, addr:$dst), multiclass cvt_by_vec_width opc, X86VectorVTInfo Vec, string OpcodeStr > { def rr : AVX512XS8I, EVEX; } - + multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo, string OpcodeStr, Predicate prd> { let Predicates = [prd] in @@ -5193,5 +5300,108 @@ multiclass avx512_convert_mask_to_vector { defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr, HasDQI>, VEX_W; } - + defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">; + +//===----------------------------------------------------------------------===// +// AVX-512 - COMPRESS and EXPAND +// +multiclass compress_by_vec_width opc, X86VectorVTInfo _, + string OpcodeStr> { + def rrkz : AVX5128I, EVEX_KZ; + + let Constraints = "$src0 = $dst" in + def rrk : AVX5128I, EVEX_K; + + let mayStore = 1 in { + def mrk : AVX5128I, + EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + } +} + +multiclass compress_by_elt_width opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo> { + defm Z : compress_by_vec_width, EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z256 : compress_by_vec_width, EVEX_V256; + defm Z128 : compress_by_vec_width, EVEX_V128; + } +} + +defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>, + EVEX; +defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>, + EVEX, VEX_W; +defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>, + EVEX; +defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>, + EVEX, VEX_W; + +// expand +multiclass expand_by_vec_width opc, X86VectorVTInfo _, + string OpcodeStr> { + def rrkz : AVX5128I, EVEX_KZ; + + let Constraints = "$src0 = $dst" in + def rrk : AVX5128I, EVEX_K; + + let mayLoad = 1, Constraints = "$src0 = $dst" in + def rmk : AVX5128I, + EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + + let mayLoad = 1 in + def rmkz : AVX5128I, + EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; + +} + +multiclass expand_by_elt_width opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo> { + defm Z : expand_by_vec_width, EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z256 : expand_by_vec_width, EVEX_V256; + defm Z128 : expand_by_vec_width, EVEX_V128; + } +} + +defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>, + EVEX; +defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>, + EVEX, VEX_W; +defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, + EVEX; +defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, + EVEX, VEX_W;