// EltVT). These are things like the register class for the writemask, etc.
// The idea is to pass one of these as the template argument rather than the
// individual arguments.
-class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
+class X86VectorVTInfo<int numelts, ValueType EltVT, RegisterClass rc,
string suffix = ""> {
RegisterClass RC = rc;
+ int NumElts = numelts;
// Corresponding mask register class.
RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
VTName)), VTName));
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
+ // Load patterns used for memory operands. We only have this defined in
+ // case of i64 element types for sub-512 integer vectors. For now, keep
+ // MemOpFrag undefined in these cases.
+ PatFrag MemOpFrag =
+ !if (!eq (TypeVariantName, "f"), !cast<PatFrag>("memop" # VTName),
+ !if (!eq (EltTypeName, "i64"), !cast<PatFrag>("memop" # VTName),
+ !if (!eq (VTName, "v16i32"), !cast<PatFrag>("memop" # VTName), ?)));
+
// The corresponding float type, e.g. v16f32 for v16i32
// Note: For EltSize < 32, FloatVT is illegal and TableGen
// fails to compile, so we choose FloatVT = VT
// The string to specify embedded broadcast in assembly.
string BroadcastStr = "{1to" # NumElts # "}";
+ // 8-bit compressed displacement tuple/subvector format. This is only
+ // defined for NumElts <= 8.
+ CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
+ !cast<CD8VForm>("CD8VT" # NumElts), ?);
+
SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
!if (!eq (Size, 256), sub_ymm, ?));
Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
!if (!eq (EltTypeName, "f64"), SSEPackedDouble,
SSEPackedInt));
+
+ // A vector type of the same width with element type i32. This is used to
+ // create the canonical constant zero node ImmAllZerosV.
+ ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
+ dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
}
def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
+def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
+def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
// "x" in v32i8x_info means RC = VR256X
def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
+def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
+def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
+def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
+def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
X86VectorVTInfo i128> {
v4i32x_info>;
def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
v2i64x_info>;
-
-
-// Common base class of AVX512_masking and AVX512_masking_3src.
-multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
- dag MaskingIns, dag ZeroMaskingIns,
- string OpcodeStr,
- string AttSrcAsm, string IntelSrcAsm,
- dag RHS, dag MaskingRHS, ValueType OpVT,
- RegisterClass RC, RegisterClass KRC,
- string MaskingConstraint = ""> {
- def NAME: AVX512<O, F, Outs, Ins,
- OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
- "$dst, "#IntelSrcAsm#"}",
- [(set RC:$dst, RHS)]>;
+def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
+ v4f32x_info>;
+def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
+ v2f64x_info>;
+
+// This multiclass generates the masking variants from the non-masking
+// variant. It only provides the assembly pieces for the masking variants.
+// It assumes custom ISel patterns for masking which can be provided as
+// template arguments.
+multiclass AVX512_maskable_custom<bits<8> O, Format F,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ list<dag> Pattern,
+ list<dag> MaskingPattern,
+ list<dag> ZeroMaskingPattern,
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
+ def NAME: AVX512<O, F, Outs, Ins,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
+ "$dst, "#IntelSrcAsm#"}",
+ Pattern, itin>;
// Prefer over VMOV*rrk Pat<>
let AddedComplexity = 20 in
def NAME#k: AVX512<O, F, Outs, MaskingIns,
- OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
- "$dst {${mask}}, "#IntelSrcAsm#"}",
- [(set RC:$dst, MaskingRHS)]>,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
+ "$dst {${mask}}, "#IntelSrcAsm#"}",
+ MaskingPattern, itin>,
EVEX_K {
// In case of the 3src subclass this is overridden with a let.
string Constraints = MaskingConstraint;
}
let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
- OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
- "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
- [(set RC:$dst,
- (vselect KRC:$mask, RHS,
- (OpVT (bitconvert
- (v16i32 immAllZerosV)))))]>,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
+ ZeroMaskingPattern,
+ itin>,
EVEX_KZ;
}
+
+// Common base class of AVX512_maskable and AVX512_maskable_3src.
+multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskingRHS,
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst, MaskingRHS)],
+ [(set _.RC:$dst,
+ (vselect _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
+ MaskingConstraint, NoItinerary, IsCommutable>;
+
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
-multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
- string OpcodeStr,
- string AttSrcAsm, string IntelSrcAsm,
- dag RHS, ValueType OpVT, RegisterClass RC,
- RegisterClass KRC> :
- AVX512_masking_common<O, F, Outs,
- Ins,
- !con((ins RC:$src0, KRC:$mask), Ins),
- !con((ins KRC:$mask), Ins),
- OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
- "$src0 = $dst">;
-
-// Similar to AVX512_masking but in this case one of the source operands
+multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_common<O, F, _, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect _.KRCWM:$mask, RHS, _.RC:$src0),
+ "$src0 = $dst", itin, IsCommutable>;
+
+// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
// $src1.
-multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
- string OpcodeStr,
- string AttSrcAsm, string IntelSrcAsm,
- dag RHS, ValueType OpVT,
- RegisterClass RC, RegisterClass KRC> :
- AVX512_masking_common<O, F, Outs,
- !con((ins RC:$src1), NonTiedIns),
- !con((ins RC:$src1), !con((ins KRC:$mask),
- NonTiedIns)),
- !con((ins RC:$src1), !con((ins KRC:$mask),
- NonTiedIns)),
- OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
+multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS> :
+ AVX512_maskable_common<O, F, _, Outs,
+ !con((ins _.RC:$src1), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+
+
+multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ list<dag> Pattern> :
+ AVX512_maskable_custom<O, F, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
+ "$src0 = $dst">;
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
-// -- 32x8 form --
-let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
-def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
- "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512;
-let mayLoad = 1 in
-def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
- "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
-}
-
-// -- 64x4 fp form --
-let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
-def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
- "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W;
-let mayLoad = 1 in
-def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
- "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
+
+multiclass vinsert_for_size_no_alt<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm> {
+ let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+ def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
+ (ins VR512:$src1, From.RC:$src2, i8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts #
+ "\t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}",
+ [(set To.RC:$dst, (vinsert_insert:$src3 (To.VT VR512:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm)))]>,
+ EVEX_4V, EVEX_V512;
+
+ let mayLoad = 1 in
+ def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
+ (ins VR512:$src1, From.MemOp:$src2, i8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts #
+ "\t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}",
+ []>,
+ EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
+ }
}
-// -- 32x4 integer form --
-let hasSideEffects = 0 in {
-def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
- "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512;
-let mayLoad = 1 in
-def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
- "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
+
+multiclass vinsert_for_size<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
+ PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm> :
+ vinsert_for_size_no_alt<Opcode, From, To,
+ vinsert_insert, INSERT_get_vinsert_imm> {
+ // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
+ // vinserti32x4. Only add this if 64x2 and friends are not supported
+ // natively via AVX512DQ.
+ let Predicates = [NoDQI] in
+ def : Pat<(vinsert_insert:$ins
+ (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
+ (AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
+ VR512:$src1, From.RC:$src2,
+ (INSERT_get_vinsert_imm VR512:$ins)))>;
+}
+
+multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
+ ValueType EltVT64, int Opcode256> {
+ defm NAME # "32x4" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ vinsert128_insert,
+ INSERT_get_vinsert128_imm>;
+ let Predicates = [HasDQI] in
+ defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ vinsert128_insert,
+ INSERT_get_vinsert128_imm>, VEX_W;
+ defm NAME # "64x4" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 8, EltVT32, VR256>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert,
+ INSERT_get_vinsert256_imm>, VEX_W;
+ let Predicates = [HasDQI] in
+ defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert,
+ INSERT_get_vinsert256_imm>;
}
-let hasSideEffects = 0 in {
-// -- 64x4 form --
-def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
- "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W;
-let mayLoad = 1 in
-def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
- "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
-}
-
-def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
- (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
- (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
- (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
- (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-
-def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
- (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
- (bc_v4i32 (loadv2i64 addr:$src2)),
- (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
- (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
- (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-
-def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
- (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
- (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
- (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
- (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-
-def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
- (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
- (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
- (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
- (bc_v8i32 (loadv4i64 addr:$src2)),
- (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
+defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
+defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
PatFrag vextract_extract,
SDNodeXForm EXTRACT_get_vextract_imm> {
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
- def rr : AVX512AIi8<Opcode, MRMDestReg, (outs To.RC:$dst),
- (ins VR512:$src1, i8imm:$idx),
- "vextract" # To.EltTypeName # "x4\t{$idx, $src1, $dst|"
- "$dst, $src1, $idx}",
- [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
- (iPTR imm)))]>,
- EVEX, EVEX_V512;
+ defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
+ (ins VR512:$src1, i8imm:$idx),
+ "vextract" # To.EltTypeName # "x4",
+ "$idx, $src1", "$src1, $idx",
+ [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
+ (iPTR imm)))]>,
+ AVX512AIi8Base, EVEX, EVEX_V512;
let mayStore = 1 in
def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
(ins To.MemOp:$dst, VR512:$src1, i8imm:$src2),
def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
(AltTo.VT
(EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
+
+ // Intrinsic call with masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
+ (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+ VR512:$src1, imm:$idx)>;
+
+ // Intrinsic call with zero-masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
+ (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+ VR512:$src1, imm:$idx)>;
+
+ // Intrinsic call without masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rr")
+ VR512:$src1, imm:$idx)>;
}
multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
-multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass DestRC,
- RegisterClass SrcRC, X86MemOperand x86memop> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
+multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+ ValueType svt, X86VectorVTInfo _> {
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
+ "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
+ T8PD, EVEX;
+
+ let mayLoad = 1 in {
+ defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src),
+ "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
+ (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
+ T8PD, EVEX;
+ }
}
+
+multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+ EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
+ EVEX_V256;
+ }
+}
+
let ExeDomain = SSEPackedSingle in {
- defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
- VR128X, f32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
+ avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+ let Predicates = [HasVLX] in {
+ defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
+ v4f32, v4f32x_info>, EVEX_V128,
+ EVEX_CD8<32, CD8VT1>;
+ }
}
let ExeDomain = SSEPackedDouble in {
- defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
- VR128X, f64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
+ avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
}
def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDZrm addr:$src)>;
+ (VBROADCASTSDZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
- (VBROADCASTSDZrm addr:$src)>;
+ (VBROADCASTSDZm addr:$src)>;
multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
RegisterClass SrcRC, RegisterClass KRC> {
(VPBROADCASTQZrr VR128X:$src)>;
def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
+ (VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
+ (VBROADCASTSDZr VR128X:$src)>;
+
+def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
+ (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
+ (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
+
+def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
+ (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
+def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
+ (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
+ (VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
+ (VBROADCASTSDZr VR128X:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
- (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+ (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
- (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+ (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
let Predicates = [HasAVX512] in {
//---
multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass DstRC, RegisterClass KRC,
- ValueType OpVT, ValueType SrcVT> {
-def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
+ RegisterClass KRC> {
+let Predicates = [HasCDI] in
+def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
+ []>, EVEX, EVEX_V512;
+
+let Predicates = [HasCDI, HasVLX] in {
+def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V128;
+def Z256rr : AVX512XS8I<opc, MRMSrcReg, (outs VR256:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V256;
+}
}
let Predicates = [HasCDI] in {
-defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
- VK16, v16i32, v16i1>, EVEX_V512;
-defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
- VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
+ VK16>;
+defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
+ VK8>, VEX_W;
}
//===----------------------------------------------------------------------===//
// AVX-512 - VPERM
//
// -- immediate form --
-multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT> {
- def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, i8imm:$src2),
+multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
+ def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ [(set _.RC:$dst,
+ (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
EVEX;
- def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
+ def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.MemOp:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2))))]>, EVEX;
+ [(set _.RC:$dst,
+ (_.VT (OpNode (_.MemOpFrag addr:$src1),
+ (i8 imm:$src2))))]>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VF>;
+}
}
-defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
- i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
- f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
+ X86VectorVTInfo Ctrl> :
+ avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
+ let ExeDomain = _.ExeDomain in {
+ def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
+ !strconcat("vpermil" # _.Suffix,
+ " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (X86VPermilpv _.RC:$src1,
+ (Ctrl.VT Ctrl.RC:$src2))))]>,
+ EVEX_4V;
+ def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, Ctrl.MemOp:$src2),
+ !strconcat("vpermil" # _.Suffix,
+ " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (X86VPermilpv _.RC:$src1,
+ (Ctrl.VT (Ctrl.MemOpFrag addr:$src2)))))]>,
+ EVEX_4V;
+ }
+}
+
+defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>,
+ EVEX_V512, VEX_W;
+defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>,
+ EVEX_V512, VEX_W;
+
+defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
+ EVEX_V512;
+defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
+ EVEX_V512, VEX_W;
+
+def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPSZri VR512:$src1, imm:$imm)>;
+def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPDZri VR512:$src1, imm:$imm)>;
// -- VPERM - register form --
multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
+let Predicates = [HasVLX] in {
+ def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
+ def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+ def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
+ def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
+}
+
def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass KRC,
- RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, PatFrag scalar_mfrag,
- X86MemOperand x86scalar_mop, string BrdcstStr,
- OpndItins itins, bit IsCommutable = 0> {
- let isCommutable = IsCommutable in
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
- itins.rr>, EVEX_4V;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
- RC:$src0)))],
- itins.rr>, EVEX_4V, EVEX_K;
- def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
- "|$dst {${mask}} {z}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
- (OpVT immAllZerosV))))],
- itins.rr>, EVEX_4V, EVEX_KZ;
+ X86VectorVTInfo _, OpndItins itins,
+ bit IsCommutable = 0> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ itins.rr, IsCommutable>,
+ AVX512BIBase, EVEX_4V;
+
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src2)))),
+ itins.rm>,
+ AVX512BIBase, EVEX_4V;
+}
+
+multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, OpndItins itins,
+ bit IsCommutable = 0> :
+ avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
+ let mayLoad = 1 in
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1,
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))),
+ itins.rm>,
+ AVX512BIBase, EVEX_4V, EVEX_B;
+}
+
+multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, OpndItins itins,
+ Predicate prd, bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ IsCommutable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ IsCommutable>, EVEX_V256;
+ defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ IsCommutable>, EVEX_V128;
}
+}
- let mayLoad = 1 in {
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
- itins.rm>, EVEX_4V;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
- RC:$src0)))],
- itins.rm>, EVEX_4V, EVEX_K;
- def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
- (OpVT immAllZerosV))))],
- itins.rm>, EVEX_4V, EVEX_KZ;
- }
- def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
- itins.rm>, EVEX_4V, EVEX_B;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
- BrdcstStr, "}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1),
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
- RC:$src0)))],
- itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
- def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
- BrdcstStr, "}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1),
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
- (OpVT immAllZerosV))))],
- itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
- }
+multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, OpndItins itins,
+ Predicate prd, bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ IsCommutable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ IsCommutable>, EVEX_V256;
+ defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ IsCommutable>, EVEX_V128;
}
}
+multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ itins, prd, IsCommutable>,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+ itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
+ itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
+ itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+ SDNode OpNode, OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+
+ defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+}
+
+multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
+ SDNode OpNode, OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+
+ defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+}
+
+multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
+ bits<8> opc_d, bits<8> opc_q,
+ string OpcodeStr, SDNode OpNode,
+ OpndItins itins, bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+ itins, HasAVX512, IsCommutable>,
+ avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+ itins, HasBWI, IsCommutable>;
+}
+
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
PatFrag memop_frag, X86MemOperand x86memop,
}
}
-defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
-
-defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
+ SSE_INTALU_ITINS_P, 1>;
+defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
+ SSE_INTALU_ITINS_P, 0>;
+defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
(v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
(VPMULDQZrr VR512:$src1, VR512:$src2)>;
-defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedSingle in
-defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilpi,
- memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilpi,
- memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
- VEX_W, EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
- (VPERMILPSZri VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
- (VPERMILPDZri VR512:$src1, imm:$imm)>;
-
//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
-defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
}
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass KRC,
- RegisterClass RC, ValueType vt,
- X86MemOperand x86memop, PatFrag mem_frag,
- X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr,
- Domain d, OpndItins itins, bit commutable> {
- let isCommutable = commutable in {
- def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
- EVEX_4V;
-
- def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
- [], itins.rr, d>, EVEX_4V, EVEX_K;
-
- def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rr, d>, EVEX_4V, EVEX_KZ;
- }
-
+ X86VectorVTInfo _, bit IsCommutable> {
+ defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V;
let mayLoad = 1 in {
- def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>, EVEX_4V;
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))>,
+ EVEX_4V, EVEX_B;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit IsCommutable = 0> {
+ defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
+ IsCommutable>, EVEX_V512, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
+ IsCommutable>, EVEX_V512, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
- def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
- itins.rm, d>, EVEX_4V, EVEX_B;
-
- def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], itins.rm, d>, EVEX_4V, EVEX_K;
-
- def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rm, d>, EVEX_4V, EVEX_KZ;
-
- def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
- " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
- [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
-
- def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
- " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
- BrdcstStr, "}"),
- [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
+ IsCommutable>, EVEX_V128, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
+ IsCommutable>, EVEX_V256, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
+ IsCommutable>, EVEX_V128, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
+ IsCommutable>, EVEX_V256, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
}
}
-defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 0>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 0>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>;
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>;
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>;
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>;
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
//
+
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr, SDNode OpNode, ValueType OpVT,
- RegisterClass KRC> {
- defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src2, RC:$src3),
+// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
+multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDPatternOperator OpNode = null_frag> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
AVX512FMA3Base;
let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, _.MemOp:$src3),
!strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3))))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
- !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
- ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (_.MemOpFrag addr:$src3))))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, _.ScalarMemOp:$src3),
+ !strconcat(OpcodeStr, " \t{${src3}", _.BroadcastStr,
+ ", $src2, $dst|$dst, $src2, ${src3}", _.BroadcastStr, "}"),
+ [(set _.RC:$dst, (OpNode _.RC:$src1, _.RC:$src2,
+ (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))]>, EVEX_B;
}
} // Constraints = "$src1 = $dst"
+multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, X86VectorVTInfo VTI,
+ SDPatternOperator OpNode> {
+ defm v213 : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
+ VTI, OpNode>,
+ EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
+
+ defm v231 : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
+ VTI>,
+ EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
+}
+
let ExeDomain = SSEPackedSingle in {
- defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmadd, v16f32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsub, v16f32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmaddsub, v16f32, VK16WM>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsubadd, v16f32, VK16WM>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
+ defm VFMADDPSZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
+ v16f32_info, X86Fmadd>;
+ defm VFMSUBPSZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
+ v16f32_info, X86Fmsub>;
+ defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
+ v16f32_info, X86Fmaddsub>;
+ defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
+ v16f32_info, X86Fmsubadd>;
+ defm VFNMADDPSZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
+ v16f32_info, X86Fnmadd>;
+ defm VFNMSUBPSZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
+ v16f32_info, X86Fnmsub>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmadd, v8f64, VK8WM>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmaddsub, v8f64, VK8WM>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsubadd, v8f64, VK8WM>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+ defm VFMADDPDZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
+ v8f64_info, X86Fmadd>, VEX_W;
+ defm VFMSUBPDZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
+ v8f64_info, X86Fmsub>, VEX_W;
+ defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
+ v8f64_info, X86Fmaddsub>, VEX_W;
+ defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
+ v8f64_info, X86Fmsubadd>, VEX_W;
+ defm VFNMADDPDZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
+ v8f64_info, X86Fnmadd>, VEX_W;
+ defm VFNMSUBPDZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
+ v8f64_info, X86Fnmsub>, VEX_W;
}
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr, SDNode OpNode, ValueType OpVT> {
+multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src3, x86memop:$src2),
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2),
+ _.RC:$src3)))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, " \t{${src2}", _.BroadcastStr,
+ ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))),
+ _.RC:$src3))]>, EVEX_B;
}
} // Constraints = "$src1 = $dst"
let ExeDomain = SSEPackedSingle in {
- defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmaddsub, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsubadd, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
+ defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", X86Fmadd,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", X86Fmsub,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", X86Fmaddsub,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", X86Fmsubadd,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", X86Fnmadd,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", X86Fnmsub,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmadd, v8f64>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+ defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", X86Fmadd,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", X86Fmsub,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", X86Fmaddsub,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", X86Fmsubadd,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", X86Fnmadd,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", X86Fnmsub,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
// Scalar FMA
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, ValueType OpVt> {
- def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,
- " \t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
- EVEX;
- def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
- EVEX;
-}
-defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
- memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
- memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
- memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
- memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
+ defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ EVEX, T8PD, EVEX_B;
+ }
+}
+
+multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+}
+
+defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
+defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
(VRCP28PDZrb VR512:$src)>;
-multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins_s, OpndItins itins_d> {
- def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
- EVEX, EVEX_V512;
-
- let mayLoad = 1 in
- def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst,
- (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
- itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
- def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
- EVEX, EVEX_V512;
-
- let mayLoad = 1 in
- def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (OpNode
- (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
- itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
+multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+ defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.FloatVT (OpNode _.RC:$src))>, EVEX;
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))))>, EVEX;
+
+ defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ EVEX, EVEX_B;
+ }
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
}
}
+multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ v16f32_info>,
+ EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ v8f64_info>,
+ EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v4f32x_info>,
+ EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v8f32x_info>,
+ EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ }
+}
+
+defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
- SSE_SQRTSS, SSE_SQRTSD>,
- avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
- SSE_SQRTPS, SSE_SQRTPD>;
+ SSE_SQRTSS, SSE_SQRTSD>;
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
- (VSQRTPSZrr VR512:$src1)>;
+ (VSQRTPSZr VR512:$src1)>;
def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
- (VSQRTPDZrr VR512:$src1)>;
+ (VSQRTPDZr VR512:$src1)>;
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
multiclass avx512_valign<X86VectorVTInfo _> {
- defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
+ defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
"valign"##_.Suffix,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
- (i8 imm:$src3))),
- _.VT, _.RC, _.KRCWM>,
+ (i8 imm:$src3)))>,
AVX512AIi8Base, EVEX_4V;
// Also match valign of packed floats.
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
(MOV8mr addr:$dst, GR8:$src)>;
+multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
+def rr : AVX512XS8I<opc, MRMDestReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
+ !strconcat(OpcodeStr##Vec.Suffix, " \t{$src, $dst|$dst, $src}"),
+ [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
+}
+
+multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
+ string OpcodeStr, Predicate prd> {
+let Predicates = [prd] in
+ defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
+ defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ }
+}
+
+multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
+ defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr,
+ HasBWI>;
+ defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
+ HasBWI>, VEX_W;
+ defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
+ HasDQI>;
+ defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
+ HasDQI>, VEX_W;
+}
+
+defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;