1 // Group template arguments that can be derived from the vector type (EltNum x
2 // EltVT). These are things like the register class for the writemask, etc.
3 // The idea is to pass one of these as the template argument rather than the
4 // individual arguments.
5 class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
25 string VTName = "v" # NumElts # EltVT;
28 ValueType VT = !cast<ValueType>(VTName);
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
32 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
35 // "i" for integer types and "f" for floating-point types
36 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
38 // Size of RC in bits, e.g. 512 for VR512.
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
43 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
55 // The corresponding float type, e.g. v16f32 for v16i32
56 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
69 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
70 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
71 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
72 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
74 // "x" in v32i8x_info means RC = VR256X
75 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
76 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
77 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
78 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
80 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
81 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
82 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
83 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
85 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
86 X86VectorVTInfo i128> {
87 X86VectorVTInfo info512 = i512;
88 X86VectorVTInfo info256 = i256;
89 X86VectorVTInfo info128 = i128;
92 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
94 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
96 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
98 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
102 // Common base class of AVX512_masking and AVX512_masking_3src.
103 multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
104 dag MaskingIns, dag ZeroMaskingIns,
106 string AttSrcAsm, string IntelSrcAsm,
107 dag RHS, dag MaskingRHS, ValueType OpVT,
108 RegisterClass RC, RegisterClass KRC,
109 string MaskingConstraint = ""> {
110 def NAME: AVX512<O, F, Outs, Ins,
111 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
112 "$dst, "#IntelSrcAsm#"}",
113 [(set RC:$dst, RHS)]>;
115 // Prefer over VMOV*rrk Pat<>
116 let AddedComplexity = 20 in
117 def NAME#k: AVX512<O, F, Outs, MaskingIns,
118 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
119 "$dst {${mask}}, "#IntelSrcAsm#"}",
120 [(set RC:$dst, MaskingRHS)]>,
122 // In case of the 3src subclass this is overridden with a let.
123 string Constraints = MaskingConstraint;
125 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
126 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
127 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
128 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
130 (vselect KRC:$mask, RHS,
132 (v16i32 immAllZerosV)))))]>,
136 // This multiclass generates the unconditional/non-masking, the masking and
137 // the zero-masking variant of the instruction. In the masking case, the
138 // perserved vector elements come from a new dummy input operand tied to $dst.
139 multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
141 string AttSrcAsm, string IntelSrcAsm,
142 dag RHS, ValueType OpVT, RegisterClass RC,
144 AVX512_masking_common<O, F, Outs,
146 !con((ins RC:$src0, KRC:$mask), Ins),
147 !con((ins KRC:$mask), Ins),
148 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
149 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
152 // Similar to AVX512_masking but in this case one of the source operands
153 // ($src1) is already tied to $dst so we just use that for the preserved
154 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
156 multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
158 string AttSrcAsm, string IntelSrcAsm,
159 dag RHS, ValueType OpVT,
160 RegisterClass RC, RegisterClass KRC> :
161 AVX512_masking_common<O, F, Outs,
162 !con((ins RC:$src1), NonTiedIns),
163 !con((ins RC:$src1), !con((ins KRC:$mask),
165 !con((ins RC:$src1), !con((ins KRC:$mask),
167 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
168 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
170 // Bitcasts between 512-bit vector types. Return the original type since
171 // no instruction is needed for the conversion
172 let Predicates = [HasAVX512] in {
173 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
174 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
175 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
176 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
177 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
178 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
179 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
180 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
181 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
182 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
183 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
184 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
185 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
186 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
187 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
188 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
189 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
190 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
191 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
192 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
193 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
194 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
195 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
196 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
197 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
200 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
201 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
202 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
203 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
205 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
206 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
207 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
208 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
209 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
210 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
211 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
212 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
213 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
214 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
215 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
216 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
217 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
218 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
219 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
220 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
221 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
222 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
223 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
224 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
225 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
226 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
227 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
228 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
229 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
230 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
231 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
232 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
233 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
234 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
236 // Bitcasts between 256-bit vector types. Return the original type since
237 // no instruction is needed for the conversion
238 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
239 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
240 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
241 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
242 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
243 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
244 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
245 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
246 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
247 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
248 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
249 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
250 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
251 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
252 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
253 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
254 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
255 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
256 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
257 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
258 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
259 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
260 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
261 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
262 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
263 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
264 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
265 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
266 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
267 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
271 // AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
274 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
275 isPseudo = 1, Predicates = [HasAVX512] in {
276 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
277 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
280 let Predicates = [HasAVX512] in {
281 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
282 def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
283 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
286 //===----------------------------------------------------------------------===//
287 // AVX-512 - VECTOR INSERT
290 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
291 def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
292 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
293 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
294 []>, EVEX_4V, EVEX_V512;
296 def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
297 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
302 // -- 64x4 fp form --
303 let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
304 def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
305 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
306 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
307 []>, EVEX_4V, EVEX_V512, VEX_W;
309 def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
310 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
314 // -- 32x4 integer form --
315 let hasSideEffects = 0 in {
316 def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
317 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
318 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512;
321 def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
322 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
327 let hasSideEffects = 0 in {
329 def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
330 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
331 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
332 []>, EVEX_4V, EVEX_V512, VEX_W;
334 def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
335 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
340 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
341 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
342 (INSERT_get_vinsert128_imm VR512:$ins))>;
343 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
344 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
345 (INSERT_get_vinsert128_imm VR512:$ins))>;
346 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
347 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
348 (INSERT_get_vinsert128_imm VR512:$ins))>;
349 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
350 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
351 (INSERT_get_vinsert128_imm VR512:$ins))>;
353 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
354 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
357 (bc_v4i32 (loadv2i64 addr:$src2)),
358 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
359 (INSERT_get_vinsert128_imm VR512:$ins))>;
360 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
364 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
365 (INSERT_get_vinsert128_imm VR512:$ins))>;
367 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
368 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
369 (INSERT_get_vinsert256_imm VR512:$ins))>;
370 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
371 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
372 (INSERT_get_vinsert256_imm VR512:$ins))>;
373 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
374 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
375 (INSERT_get_vinsert256_imm VR512:$ins))>;
376 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
377 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
378 (INSERT_get_vinsert256_imm VR512:$ins))>;
380 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
381 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
384 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386 def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
387 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
388 (INSERT_get_vinsert256_imm VR512:$ins))>;
389 def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
390 (bc_v8i32 (loadv4i64 addr:$src2)),
391 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
394 // vinsertps - insert f32 to XMM
395 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
396 (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
397 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
398 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
400 def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
401 (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
403 [(set VR128X:$dst, (X86insertps VR128X:$src1,
404 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
405 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
407 //===----------------------------------------------------------------------===//
408 // AVX-512 VECTOR EXTRACT
410 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
412 def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
413 (ins VR512:$src1, i8imm:$src2),
414 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
415 []>, EVEX, EVEX_V512;
416 def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
417 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
418 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
419 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
422 def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
423 (ins VR512:$src1, i8imm:$src2),
424 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
425 []>, EVEX, EVEX_V512, VEX_W;
427 def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
428 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
429 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
430 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
433 let hasSideEffects = 0 in {
435 def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
436 (ins VR512:$src1, i8imm:$src2),
437 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
438 []>, EVEX, EVEX_V512;
439 def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
440 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
441 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
442 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
445 def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
446 (ins VR512:$src1, i8imm:$src2),
447 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
448 []>, EVEX, EVEX_V512, VEX_W;
450 def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
451 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
452 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
453 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
456 def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
457 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
458 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
460 def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
461 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
462 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
464 def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
465 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
466 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
468 def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
469 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
470 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
473 def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
474 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
475 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
477 def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
478 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
479 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
481 def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
482 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
483 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
485 def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
486 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
487 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
489 // A 256-bit subvector extract from the first 512-bit vector position
490 // is a subregister copy that needs no instruction.
491 def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
492 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
493 def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
494 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
495 def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
496 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
497 def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
498 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
501 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
502 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
503 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
504 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
505 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
506 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
507 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
508 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
511 // A 128-bit subvector insert to the first 512-bit vector position
512 // is a subregister copy that needs no instruction.
513 def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
515 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
517 def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
518 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
519 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
521 def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
523 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
525 def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
526 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
527 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
530 def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
531 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
532 def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
533 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
534 def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
535 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
536 def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
537 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
539 // vextractps - extract 32 bits from XMM
540 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
541 (ins VR128X:$src1, i32i8imm:$src2),
542 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
543 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
546 def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
547 (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
548 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
549 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
550 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
552 //===---------------------------------------------------------------------===//
555 multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
556 RegisterClass DestRC,
557 RegisterClass SrcRC, X86MemOperand x86memop> {
558 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
559 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
561 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
562 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
564 let ExeDomain = SSEPackedSingle in {
565 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
567 EVEX_V512, EVEX_CD8<32, CD8VT1>;
570 let ExeDomain = SSEPackedDouble in {
571 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
573 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
576 def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
577 (VBROADCASTSSZrm addr:$src)>;
578 def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
579 (VBROADCASTSDZrm addr:$src)>;
581 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
582 (VBROADCASTSSZrm addr:$src)>;
583 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
584 (VBROADCASTSDZrm addr:$src)>;
586 multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
587 RegisterClass SrcRC, RegisterClass KRC> {
588 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
589 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
590 []>, EVEX, EVEX_V512;
591 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
592 (ins KRC:$mask, SrcRC:$src),
593 !strconcat(OpcodeStr,
594 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
595 []>, EVEX, EVEX_V512, EVEX_KZ;
598 defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
599 defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
602 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
603 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
605 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
606 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
608 def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
609 (VPBROADCASTDrZrr GR32:$src)>;
610 def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
611 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
612 def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
613 (VPBROADCASTQrZrr GR64:$src)>;
614 def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
615 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
617 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
618 (VPBROADCASTDrZrr GR32:$src)>;
619 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
620 (VPBROADCASTQrZrr GR64:$src)>;
622 def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
623 (v16i32 immAllZerosV), (i16 GR16:$mask))),
624 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
625 def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
626 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
627 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
629 multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
630 X86MemOperand x86memop, PatFrag ld_frag,
631 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
633 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
634 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
636 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
637 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
639 !strconcat(OpcodeStr,
640 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
642 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
645 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
646 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
648 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
649 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
651 !strconcat(OpcodeStr,
652 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
653 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
654 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
658 defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
659 loadi32, VR512, v16i32, v4i32, VK16WM>,
660 EVEX_V512, EVEX_CD8<32, CD8VT1>;
661 defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
662 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
663 EVEX_CD8<64, CD8VT1>;
665 multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
666 X86MemOperand x86memop, PatFrag ld_frag,
669 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
670 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
672 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
674 !strconcat(OpcodeStr,
675 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
680 defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
681 i128mem, loadv2i64, VK16WM>,
682 EVEX_V512, EVEX_CD8<32, CD8VT4>;
683 defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
684 i256mem, loadv4i64, VK16WM>, VEX_W,
685 EVEX_V512, EVEX_CD8<64, CD8VT4>;
687 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
688 (VPBROADCASTDZrr VR128X:$src)>;
689 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
690 (VPBROADCASTQZrr VR128X:$src)>;
692 def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
693 (VBROADCASTSSZrr VR128X:$src)>;
694 def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
695 (VBROADCASTSDZrr VR128X:$src)>;
697 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
698 (VBROADCASTSSZrr VR128X:$src)>;
699 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
700 (VBROADCASTSDZrr VR128X:$src)>;
702 // Provide fallback in case the load node that is used in the patterns above
703 // is used by additional users, which prevents the pattern selection.
704 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
705 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
706 def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
707 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
710 let Predicates = [HasAVX512] in {
711 def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
713 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
714 addr:$src)), sub_ymm)>;
716 //===----------------------------------------------------------------------===//
717 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
720 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
721 RegisterClass DstRC, RegisterClass KRC,
722 ValueType OpVT, ValueType SrcVT> {
723 def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
724 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
728 let Predicates = [HasCDI] in {
729 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
730 VK16, v16i32, v16i1>, EVEX_V512;
731 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
732 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
735 //===----------------------------------------------------------------------===//
738 // -- immediate form --
739 multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
740 SDNode OpNode, PatFrag mem_frag,
741 X86MemOperand x86memop, ValueType OpVT> {
742 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
743 (ins RC:$src1, i8imm:$src2),
744 !strconcat(OpcodeStr,
745 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
747 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
749 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
750 (ins x86memop:$src1, i8imm:$src2),
751 !strconcat(OpcodeStr,
752 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
754 (OpVT (OpNode (mem_frag addr:$src1),
755 (i8 imm:$src2))))]>, EVEX;
758 defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
759 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760 let ExeDomain = SSEPackedDouble in
761 defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
762 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
764 // -- VPERM - register form --
765 multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
766 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
768 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
769 (ins RC:$src1, RC:$src2),
770 !strconcat(OpcodeStr,
771 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
773 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
775 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
776 (ins RC:$src1, x86memop:$src2),
777 !strconcat(OpcodeStr,
778 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
780 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
784 defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
785 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
786 defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
787 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
788 let ExeDomain = SSEPackedSingle in
789 defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
790 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
791 let ExeDomain = SSEPackedDouble in
792 defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
793 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
795 // -- VPERM2I - 3 source operands form --
796 multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
797 PatFrag mem_frag, X86MemOperand x86memop,
798 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
799 let Constraints = "$src1 = $dst" in {
800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
801 (ins RC:$src1, RC:$src2, RC:$src3),
802 !strconcat(OpcodeStr,
803 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
805 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
808 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
809 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
810 !strconcat(OpcodeStr,
811 " \t{$src3, $src2, $dst {${mask}}|"
812 "$dst {${mask}}, $src2, $src3}"),
813 [(set RC:$dst, (OpVT (vselect KRC:$mask,
814 (OpNode RC:$src1, RC:$src2,
819 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
821 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
822 !strconcat(OpcodeStr,
823 " \t{$src3, $src2, $dst {${mask}} {z} |",
824 "$dst {${mask}} {z}, $src2, $src3}"),
825 [(set RC:$dst, (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
829 (v16i32 immAllZerosV))))))]>,
832 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
833 (ins RC:$src1, RC:$src2, x86memop:$src3),
834 !strconcat(OpcodeStr,
835 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
837 (OpVT (OpNode RC:$src1, RC:$src2,
838 (mem_frag addr:$src3))))]>, EVEX_4V;
840 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
841 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
842 !strconcat(OpcodeStr,
843 " \t{$src3, $src2, $dst {${mask}}|"
844 "$dst {${mask}}, $src2, $src3}"),
846 (OpVT (vselect KRC:$mask,
847 (OpNode RC:$src1, RC:$src2,
848 (mem_frag addr:$src3)),
852 let AddedComplexity = 10 in // Prefer over the rrkz variant
853 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
854 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
855 !strconcat(OpcodeStr,
856 " \t{$src3, $src2, $dst {${mask}} {z}|"
857 "$dst {${mask}} {z}, $src2, $src3}"),
859 (OpVT (vselect KRC:$mask,
860 (OpNode RC:$src1, RC:$src2,
861 (mem_frag addr:$src3)),
863 (v16i32 immAllZerosV))))))]>,
867 defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
868 i512mem, X86VPermiv3, v16i32, VK16WM>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
870 defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
871 i512mem, X86VPermiv3, v8i64, VK8WM>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
873 defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
874 i512mem, X86VPermiv3, v16f32, VK16WM>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
876 defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
877 i512mem, X86VPermiv3, v8f64, VK8WM>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
880 multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
881 PatFrag mem_frag, X86MemOperand x86memop,
882 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
883 ValueType MaskVT, RegisterClass MRC> :
884 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
886 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
887 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
888 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
890 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
891 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
892 (!cast<Instruction>(NAME#rrk) VR512:$src1,
893 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
896 defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
897 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
898 EVEX_V512, EVEX_CD8<32, CD8VF>;
899 defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
900 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
901 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
902 defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
903 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
904 EVEX_V512, EVEX_CD8<32, CD8VF>;
905 defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
906 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
907 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
909 //===----------------------------------------------------------------------===//
910 // AVX-512 - BLEND using mask
912 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
913 RegisterClass KRC, RegisterClass RC,
914 X86MemOperand x86memop, PatFrag mem_frag,
915 SDNode OpNode, ValueType vt> {
916 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
917 (ins KRC:$mask, RC:$src1, RC:$src2),
918 !strconcat(OpcodeStr,
919 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
920 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
921 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
923 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
924 (ins KRC:$mask, RC:$src1, x86memop:$src2),
925 !strconcat(OpcodeStr,
926 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
927 []>, EVEX_4V, EVEX_K;
930 let ExeDomain = SSEPackedSingle in
931 defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
932 VK16WM, VR512, f512mem,
933 memopv16f32, vselect, v16f32>,
934 EVEX_CD8<32, CD8VF>, EVEX_V512;
935 let ExeDomain = SSEPackedDouble in
936 defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
937 VK8WM, VR512, f512mem,
938 memopv8f64, vselect, v8f64>,
939 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
941 def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
942 (v16f32 VR512:$src2), (i16 GR16:$mask))),
943 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
944 VR512:$src1, VR512:$src2)>;
946 def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
947 (v8f64 VR512:$src2), (i8 GR8:$mask))),
948 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
949 VR512:$src1, VR512:$src2)>;
951 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
952 VK16WM, VR512, f512mem,
953 memopv16i32, vselect, v16i32>,
954 EVEX_CD8<32, CD8VF>, EVEX_V512;
956 defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
957 VK8WM, VR512, f512mem,
958 memopv8i64, vselect, v8i64>,
959 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
961 def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
962 (v16i32 VR512:$src2), (i16 GR16:$mask))),
963 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
964 VR512:$src1, VR512:$src2)>;
966 def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
967 (v8i64 VR512:$src2), (i8 GR8:$mask))),
968 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
969 VR512:$src1, VR512:$src2)>;
971 let Predicates = [HasAVX512] in {
972 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
973 (v8f32 VR256X:$src2))),
975 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
976 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
977 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
979 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
980 (v8i32 VR256X:$src2))),
982 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
983 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
984 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
986 //===----------------------------------------------------------------------===//
987 // Compare Instructions
988 //===----------------------------------------------------------------------===//
990 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
991 multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
992 Operand CC, SDNode OpNode, ValueType VT,
993 PatFrag ld_frag, string asm, string asm_alt> {
994 def rr : AVX512Ii8<0xC2, MRMSrcReg,
995 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
996 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
997 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
998 def rm : AVX512Ii8<0xC2, MRMSrcMem,
999 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
1000 [(set VK1:$dst, (OpNode (VT RC:$src1),
1001 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1002 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1003 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
1004 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1005 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1006 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
1007 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1008 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1012 let Predicates = [HasAVX512] in {
1013 defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
1014 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1015 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1017 defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
1018 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1019 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1023 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1024 X86VectorVTInfo _> {
1025 def rr : AVX512BI<opc, MRMSrcReg,
1026 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1027 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1028 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
1029 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1031 def rm : AVX512BI<opc, MRMSrcMem,
1032 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1034 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1035 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
1036 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1037 def rrk : AVX512BI<opc, MRMSrcReg,
1038 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1040 "$dst {${mask}}, $src1, $src2}"),
1041 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1042 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1043 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1045 def rmk : AVX512BI<opc, MRMSrcMem,
1046 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1048 "$dst {${mask}}, $src1, $src2}"),
1049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1050 (OpNode (_.VT _.RC:$src1),
1052 (_.LdFrag addr:$src2))))))],
1053 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1056 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1057 X86VectorVTInfo _> :
1058 avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
1059 let mayLoad = 1 in {
1060 def rmb : AVX512BI<opc, MRMSrcMem,
1061 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1062 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1063 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1064 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1065 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1066 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1067 def rmbk : AVX512BI<opc, MRMSrcMem,
1068 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1069 _.ScalarMemOp:$src2),
1070 !strconcat(OpcodeStr,
1071 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1072 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1073 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1074 (OpNode (_.VT _.RC:$src1),
1076 (_.ScalarLdFrag addr:$src2)))))],
1077 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1081 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1082 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1083 let Predicates = [prd] in
1084 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1087 let Predicates = [prd, HasVLX] in {
1088 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1090 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1095 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1096 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1098 let Predicates = [prd] in
1099 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1102 let Predicates = [prd, HasVLX] in {
1103 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1105 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1110 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1111 avx512vl_i8_info, HasBWI>,
1114 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1115 avx512vl_i16_info, HasBWI>,
1116 EVEX_CD8<16, CD8VF>;
1118 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1119 avx512vl_i32_info, HasAVX512>,
1120 EVEX_CD8<32, CD8VF>;
1122 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1123 avx512vl_i64_info, HasAVX512>,
1124 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1126 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1127 avx512vl_i8_info, HasBWI>,
1130 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1131 avx512vl_i16_info, HasBWI>,
1132 EVEX_CD8<16, CD8VF>;
1134 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1135 avx512vl_i32_info, HasAVX512>,
1136 EVEX_CD8<32, CD8VF>;
1138 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1139 avx512vl_i64_info, HasAVX512>,
1140 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1142 def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1143 (COPY_TO_REGCLASS (VPCMPGTDZrr
1144 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1145 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1147 def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1148 (COPY_TO_REGCLASS (VPCMPEQDZrr
1149 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1150 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1152 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1153 X86VectorVTInfo _> {
1154 def rri : AVX512AIi8<opc, MRMSrcReg,
1155 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1156 !strconcat("vpcmp${cc}", Suffix,
1157 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1158 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1160 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1162 def rmi : AVX512AIi8<opc, MRMSrcMem,
1163 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
1164 !strconcat("vpcmp${cc}", Suffix,
1165 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1166 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1167 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1169 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1170 def rrik : AVX512AIi8<opc, MRMSrcReg,
1171 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1173 !strconcat("vpcmp${cc}", Suffix,
1174 "\t{$src2, $src1, $dst {${mask}}|",
1175 "$dst {${mask}}, $src1, $src2}"),
1176 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1177 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1179 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1181 def rmik : AVX512AIi8<opc, MRMSrcMem,
1182 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1184 !strconcat("vpcmp${cc}", Suffix,
1185 "\t{$src2, $src1, $dst {${mask}}|",
1186 "$dst {${mask}}, $src1, $src2}"),
1187 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1188 (OpNode (_.VT _.RC:$src1),
1189 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1191 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1193 // Accept explicit immediate argument form instead of comparison code.
1194 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1195 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
1196 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
1197 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1198 "$dst, $src1, $src2, $cc}"),
1199 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1200 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
1201 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
1202 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1203 "$dst, $src1, $src2, $cc}"),
1204 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1205 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1206 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1208 !strconcat("vpcmp", Suffix,
1209 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1210 "$dst {${mask}}, $src1, $src2, $cc}"),
1211 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1212 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1213 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1215 !strconcat("vpcmp", Suffix,
1216 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1217 "$dst {${mask}}, $src1, $src2, $cc}"),
1218 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1222 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
1223 X86VectorVTInfo _> :
1224 avx512_icmp_cc<opc, Suffix, OpNode, _> {
1225 let mayLoad = 1 in {
1226 def rmib : AVX512AIi8<opc, MRMSrcMem,
1227 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1229 !strconcat("vpcmp${cc}", Suffix,
1230 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1231 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1232 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1233 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1235 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1236 def rmibk : AVX512AIi8<opc, MRMSrcMem,
1237 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1238 _.ScalarMemOp:$src2, AVXCC:$cc),
1239 !strconcat("vpcmp${cc}", Suffix,
1240 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1241 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1242 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1243 (OpNode (_.VT _.RC:$src1),
1244 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1246 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1249 // Accept explicit immediate argument form instead of comparison code.
1250 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1251 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1252 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1254 !strconcat("vpcmp", Suffix,
1255 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1256 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1257 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1258 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1259 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1260 _.ScalarMemOp:$src2, i8imm:$cc),
1261 !strconcat("vpcmp", Suffix,
1262 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1263 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1264 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1268 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1269 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1270 let Predicates = [prd] in
1271 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1273 let Predicates = [prd, HasVLX] in {
1274 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1275 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1279 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1280 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1281 let Predicates = [prd] in
1282 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1285 let Predicates = [prd, HasVLX] in {
1286 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1288 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1293 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1294 HasBWI>, EVEX_CD8<8, CD8VF>;
1295 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1296 HasBWI>, EVEX_CD8<8, CD8VF>;
1298 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1299 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1300 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1301 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1303 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1304 HasAVX512>, EVEX_CD8<32, CD8VF>;
1305 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1306 HasAVX512>, EVEX_CD8<32, CD8VF>;
1308 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1309 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1310 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1311 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1313 // avx512_cmp_packed - compare packed instructions
1314 multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
1315 X86MemOperand x86memop, ValueType vt,
1316 string suffix, Domain d> {
1317 def rri : AVX512PIi8<0xC2, MRMSrcReg,
1318 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1319 !strconcat("vcmp${cc}", suffix,
1320 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1321 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1322 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
1323 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1324 !strconcat("vcmp${cc}", suffix,
1325 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
1327 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
1328 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
1329 !strconcat("vcmp${cc}", suffix,
1330 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1332 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
1334 // Accept explicit immediate argument form instead of comparison code.
1335 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1336 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
1337 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1338 !strconcat("vcmp", suffix,
1339 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1340 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
1341 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1342 !strconcat("vcmp", suffix,
1343 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1347 defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
1348 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
1349 EVEX_CD8<32, CD8VF>;
1350 defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
1351 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
1352 EVEX_CD8<64, CD8VF>;
1354 def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1355 (COPY_TO_REGCLASS (VCMPPSZrri
1356 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1357 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1359 def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1360 (COPY_TO_REGCLASS (VPCMPDZrri
1361 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1362 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1364 def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1365 (COPY_TO_REGCLASS (VPCMPUDZrri
1366 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1367 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1370 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1371 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1373 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
1374 (I8Imm imm:$cc)), GR16)>;
1376 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1377 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1379 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
1380 (I8Imm imm:$cc)), GR8)>;
1382 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1383 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1385 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1386 (I8Imm imm:$cc)), GR16)>;
1388 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1389 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1391 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1392 (I8Imm imm:$cc)), GR8)>;
1394 // Mask register copy, including
1395 // - copy between mask registers
1396 // - load/store mask registers
1397 // - copy from GPR to mask register and vice versa
1399 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1400 string OpcodeStr, RegisterClass KRC,
1401 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
1402 let hasSideEffects = 0 in {
1403 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1404 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1406 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
1407 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1408 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
1410 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
1411 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1415 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1417 RegisterClass KRC, RegisterClass GRC> {
1418 let hasSideEffects = 0 in {
1419 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
1420 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1421 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
1422 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1426 let Predicates = [HasDQI] in
1427 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1429 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1432 let Predicates = [HasAVX512] in
1433 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1435 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
1438 let Predicates = [HasBWI] in {
1439 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1440 i32mem>, VEX, PD, VEX_W;
1441 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1445 let Predicates = [HasBWI] in {
1446 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1447 i64mem>, VEX, PS, VEX_W;
1448 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1452 // GR from/to mask register
1453 let Predicates = [HasDQI] in {
1454 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1455 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1456 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1457 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1459 let Predicates = [HasAVX512] in {
1460 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1461 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1462 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1463 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
1465 let Predicates = [HasBWI] in {
1466 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1467 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1469 let Predicates = [HasBWI] in {
1470 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1471 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1475 let Predicates = [HasDQI] in {
1476 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1477 (KMOVBmk addr:$dst, VK8:$src)>;
1479 let Predicates = [HasAVX512] in {
1480 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
1481 (KMOVWmk addr:$dst, VK16:$src)>;
1482 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1483 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
1484 def : Pat<(i1 (load addr:$src)),
1485 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
1486 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
1487 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
1489 let Predicates = [HasBWI] in {
1490 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1491 (KMOVDmk addr:$dst, VK32:$src)>;
1493 let Predicates = [HasBWI] in {
1494 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1495 (KMOVQmk addr:$dst, VK64:$src)>;
1498 let Predicates = [HasAVX512] in {
1499 def : Pat<(i1 (trunc (i64 GR64:$src))),
1500 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1503 def : Pat<(i1 (trunc (i32 GR32:$src))),
1504 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
1506 def : Pat<(i1 (trunc (i8 GR8:$src))),
1508 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1510 def : Pat<(i1 (trunc (i16 GR16:$src))),
1512 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1515 def : Pat<(i32 (zext VK1:$src)),
1516 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
1517 def : Pat<(i8 (zext VK1:$src)),
1520 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
1521 def : Pat<(i64 (zext VK1:$src)),
1522 (AND64ri8 (SUBREG_TO_REG (i64 0),
1523 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
1524 def : Pat<(i16 (zext VK1:$src)),
1526 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1528 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1529 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1530 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1531 (COPY_TO_REGCLASS VK1:$src, VK8)>;
1533 let Predicates = [HasBWI] in {
1534 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1535 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1536 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1537 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1541 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1542 let Predicates = [HasAVX512] in {
1543 // GR from/to 8-bit mask without native support
1544 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1546 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1548 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1550 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1553 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
1554 (COPY_TO_REGCLASS VK16:$src, VK1)>;
1555 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
1556 (COPY_TO_REGCLASS VK8:$src, VK1)>;
1558 let Predicates = [HasBWI] in {
1559 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1560 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1561 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1562 (COPY_TO_REGCLASS VK64:$src, VK1)>;
1565 // Mask unary operation
1567 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
1568 RegisterClass KRC, SDPatternOperator OpNode,
1570 let Predicates = [prd] in
1571 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1572 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1573 [(set KRC:$dst, (OpNode KRC:$src))]>;
1576 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1577 SDPatternOperator OpNode> {
1578 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1580 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1581 HasAVX512>, VEX, PS;
1582 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1583 HasBWI>, VEX, PD, VEX_W;
1584 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1585 HasBWI>, VEX, PS, VEX_W;
1588 defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
1590 multiclass avx512_mask_unop_int<string IntName, string InstName> {
1591 let Predicates = [HasAVX512] in
1592 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1594 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1595 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1597 defm : avx512_mask_unop_int<"knot", "KNOT">;
1599 let Predicates = [HasDQI] in
1600 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1601 let Predicates = [HasAVX512] in
1602 def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
1603 let Predicates = [HasBWI] in
1604 def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1605 let Predicates = [HasBWI] in
1606 def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1608 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1609 let Predicates = [HasAVX512] in {
1610 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1611 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1613 def : Pat<(not VK8:$src),
1615 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
1618 // Mask binary operation
1619 // - KAND, KANDN, KOR, KXNOR, KXOR
1620 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
1621 RegisterClass KRC, SDPatternOperator OpNode,
1623 let Predicates = [prd] in
1624 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1625 !strconcat(OpcodeStr,
1626 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1627 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1630 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1631 SDPatternOperator OpNode> {
1632 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1633 HasDQI>, VEX_4V, VEX_L, PD;
1634 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1635 HasAVX512>, VEX_4V, VEX_L, PS;
1636 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1637 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1638 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1639 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
1642 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1643 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1645 let isCommutable = 1 in {
1646 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1647 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1648 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1649 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
1651 let isCommutable = 0 in
1652 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
1654 def : Pat<(xor VK1:$src1, VK1:$src2),
1655 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1656 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1658 def : Pat<(or VK1:$src1, VK1:$src2),
1659 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1660 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1662 def : Pat<(and VK1:$src1, VK1:$src2),
1663 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1664 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1666 multiclass avx512_mask_binop_int<string IntName, string InstName> {
1667 let Predicates = [HasAVX512] in
1668 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1669 (i16 GR16:$src1), (i16 GR16:$src2)),
1670 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1671 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1672 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1675 defm : avx512_mask_binop_int<"kand", "KAND">;
1676 defm : avx512_mask_binop_int<"kandn", "KANDN">;
1677 defm : avx512_mask_binop_int<"kor", "KOR">;
1678 defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1679 defm : avx512_mask_binop_int<"kxor", "KXOR">;
1681 // With AVX-512, 8-bit mask is promoted to 16-bit mask.
1682 multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1683 let Predicates = [HasAVX512] in
1684 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1686 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1687 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1690 defm : avx512_binop_pat<and, KANDWrr>;
1691 defm : avx512_binop_pat<andn, KANDNWrr>;
1692 defm : avx512_binop_pat<or, KORWrr>;
1693 defm : avx512_binop_pat<xnor, KXNORWrr>;
1694 defm : avx512_binop_pat<xor, KXORWrr>;
1697 multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
1698 RegisterClass KRC> {
1699 let Predicates = [HasAVX512] in
1700 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1701 !strconcat(OpcodeStr,
1702 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
1705 multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
1706 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
1710 defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
1711 def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1712 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1713 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1716 multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1717 let Predicates = [HasAVX512] in
1718 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1719 (i16 GR16:$src1), (i16 GR16:$src2)),
1720 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1721 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1722 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1724 defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
1727 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1729 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1730 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
1731 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
1732 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1735 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1736 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1740 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
1742 def : Pat<(X86cmp VK1:$src1, (i1 0)),
1743 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1744 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
1747 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1749 let Predicates = [HasAVX512] in
1750 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1751 !strconcat(OpcodeStr,
1752 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
1753 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1756 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1758 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1762 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1763 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
1765 // Mask setting all 0s or 1s
1766 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1767 let Predicates = [HasAVX512] in
1768 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1769 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1770 [(set KRC:$dst, (VT Val))]>;
1773 multiclass avx512_mask_setop_w<PatFrag Val> {
1774 defm B : avx512_mask_setop<VK8, v8i1, Val>;
1775 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1778 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1779 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1781 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1782 let Predicates = [HasAVX512] in {
1783 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1784 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
1785 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1786 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1787 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1789 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1790 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1792 def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1793 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1795 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1796 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1798 def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1799 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1801 def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1802 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1803 //===----------------------------------------------------------------------===//
1804 // AVX-512 - Aligned and unaligned load and store
1807 multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1808 RegisterClass KRC, RegisterClass RC,
1809 ValueType vt, ValueType zvt, X86MemOperand memop,
1810 Domain d, bit IsReMaterializable = 1> {
1811 let hasSideEffects = 0 in {
1812 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
1813 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1815 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
1816 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1817 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
1819 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1820 SchedRW = [WriteLoad] in
1821 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1822 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1823 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1826 let AddedComplexity = 20 in {
1827 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1828 let hasSideEffects = 0 in
1829 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1830 (ins RC:$src0, KRC:$mask, RC:$src1),
1831 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1832 "${dst} {${mask}}, $src1}"),
1833 [(set RC:$dst, (vt (vselect KRC:$mask,
1837 let mayLoad = 1, SchedRW = [WriteLoad] in
1838 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1839 (ins RC:$src0, KRC:$mask, memop:$src1),
1840 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1841 "${dst} {${mask}}, $src1}"),
1844 (vt (bitconvert (ld_frag addr:$src1))),
1848 let mayLoad = 1, SchedRW = [WriteLoad] in
1849 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1850 (ins KRC:$mask, memop:$src),
1851 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1852 "${dst} {${mask}} {z}, $src}"),
1855 (vt (bitconvert (ld_frag addr:$src))),
1856 (vt (bitconvert (zvt immAllZerosV))))))],
1861 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1862 string elty, string elsz, string vsz512,
1863 string vsz256, string vsz128, Domain d,
1864 Predicate prd, bit IsReMaterializable = 1> {
1865 let Predicates = [prd] in
1866 defm Z : avx512_load<opc, OpcodeStr,
1867 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1868 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1869 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1870 !cast<X86MemOperand>(elty##"512mem"), d,
1871 IsReMaterializable>, EVEX_V512;
1873 let Predicates = [prd, HasVLX] in {
1874 defm Z256 : avx512_load<opc, OpcodeStr,
1875 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1876 "v"##vsz256##elty##elsz, "v4i64")),
1877 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1878 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1879 !cast<X86MemOperand>(elty##"256mem"), d,
1880 IsReMaterializable>, EVEX_V256;
1882 defm Z128 : avx512_load<opc, OpcodeStr,
1883 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1884 "v"##vsz128##elty##elsz, "v2i64")),
1885 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1886 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1887 !cast<X86MemOperand>(elty##"128mem"), d,
1888 IsReMaterializable>, EVEX_V128;
1893 multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1894 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1895 X86MemOperand memop, Domain d> {
1896 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1897 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
1898 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
1900 let Constraints = "$src1 = $dst" in
1901 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1902 (ins RC:$src1, KRC:$mask, RC:$src2),
1903 !strconcat(OpcodeStr,
1904 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
1906 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1907 (ins KRC:$mask, RC:$src),
1908 !strconcat(OpcodeStr,
1909 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
1910 [], d>, EVEX, EVEX_KZ;
1912 let mayStore = 1 in {
1913 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1914 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1915 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
1916 def mrk : AVX512PI<opc, MRMDestMem, (outs),
1917 (ins memop:$dst, KRC:$mask, RC:$src),
1918 !strconcat(OpcodeStr,
1919 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
1920 [], d>, EVEX, EVEX_K;
1925 multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1926 string st_suff_512, string st_suff_256,
1927 string st_suff_128, string elty, string elsz,
1928 string vsz512, string vsz256, string vsz128,
1929 Domain d, Predicate prd> {
1930 let Predicates = [prd] in
1931 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1932 !cast<ValueType>("v"##vsz512##elty##elsz),
1933 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1934 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1936 let Predicates = [prd, HasVLX] in {
1937 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1938 !cast<ValueType>("v"##vsz256##elty##elsz),
1939 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1940 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1942 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1943 !cast<ValueType>("v"##vsz128##elty##elsz),
1944 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1945 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1949 defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1950 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1951 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1952 "512", "256", "", "f", "32", "16", "8", "4",
1953 SSEPackedSingle, HasAVX512>,
1954 PS, EVEX_CD8<32, CD8VF>;
1956 defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1957 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1958 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1959 "512", "256", "", "f", "64", "8", "4", "2",
1960 SSEPackedDouble, HasAVX512>,
1961 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1963 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1964 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1965 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1966 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1967 PS, EVEX_CD8<32, CD8VF>;
1969 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1970 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1971 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1972 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1973 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1975 def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
1976 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
1977 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
1979 def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1980 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1981 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
1983 def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1985 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1987 def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1989 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1992 defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1993 "16", "8", "4", SSEPackedInt, HasAVX512>,
1994 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1995 "512", "256", "", "i", "32", "16", "8", "4",
1996 SSEPackedInt, HasAVX512>,
1997 PD, EVEX_CD8<32, CD8VF>;
1999 defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
2000 "8", "4", "2", SSEPackedInt, HasAVX512>,
2001 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
2002 "512", "256", "", "i", "64", "8", "4", "2",
2003 SSEPackedInt, HasAVX512>,
2004 PD, VEX_W, EVEX_CD8<64, CD8VF>;
2006 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
2007 "64", "32", "16", SSEPackedInt, HasBWI>,
2008 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
2009 "i", "8", "64", "32", "16", SSEPackedInt,
2010 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
2012 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
2013 "32", "16", "8", SSEPackedInt, HasBWI>,
2014 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
2015 "i", "16", "32", "16", "8", SSEPackedInt,
2016 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
2018 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
2019 "16", "8", "4", SSEPackedInt, HasAVX512>,
2020 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
2021 "i", "32", "16", "8", "4", SSEPackedInt,
2022 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
2024 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
2025 "8", "4", "2", SSEPackedInt, HasAVX512>,
2026 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
2027 "i", "64", "8", "4", "2", SSEPackedInt,
2028 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
2030 def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
2031 (v16i32 immAllZerosV), GR16:$mask)),
2032 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
2034 def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
2035 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2036 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
2038 def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
2040 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2042 def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
2044 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2047 let AddedComplexity = 20 in {
2048 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
2049 (bc_v8i64 (v16i32 immAllZerosV)))),
2050 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
2052 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
2053 (v8i64 VR512:$src))),
2054 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
2057 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2058 (v16i32 immAllZerosV))),
2059 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
2061 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
2062 (v16i32 VR512:$src))),
2063 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
2066 // Move Int Doubleword to Packed Double Int
2068 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
2069 "vmovd\t{$src, $dst|$dst, $src}",
2071 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2073 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
2074 "vmovd\t{$src, $dst|$dst, $src}",
2076 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2077 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2078 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
2079 "vmovq\t{$src, $dst|$dst, $src}",
2081 (v2i64 (scalar_to_vector GR64:$src)))],
2082 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
2083 let isCodeGenOnly = 1 in {
2084 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
2085 "vmovq\t{$src, $dst|$dst, $src}",
2086 [(set FR64:$dst, (bitconvert GR64:$src))],
2087 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2088 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
2089 "vmovq\t{$src, $dst|$dst, $src}",
2090 [(set GR64:$dst, (bitconvert FR64:$src))],
2091 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2093 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
2094 "vmovq\t{$src, $dst|$dst, $src}",
2095 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2096 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2097 EVEX_CD8<64, CD8VT1>;
2099 // Move Int Doubleword to Single Scalar
2101 let isCodeGenOnly = 1 in {
2102 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
2103 "vmovd\t{$src, $dst|$dst, $src}",
2104 [(set FR32X:$dst, (bitconvert GR32:$src))],
2105 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2107 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
2108 "vmovd\t{$src, $dst|$dst, $src}",
2109 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2110 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2113 // Move doubleword from xmm register to r/m32
2115 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
2116 "vmovd\t{$src, $dst|$dst, $src}",
2117 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2118 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2120 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2121 (ins i32mem:$dst, VR128X:$src),
2122 "vmovd\t{$src, $dst|$dst, $src}",
2123 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2124 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2125 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2127 // Move quadword from xmm1 register to r/m64
2129 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
2130 "vmovq\t{$src, $dst|$dst, $src}",
2131 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2133 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
2134 Requires<[HasAVX512, In64BitMode]>;
2136 def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
2137 (ins i64mem:$dst, VR128X:$src),
2138 "vmovq\t{$src, $dst|$dst, $src}",
2139 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2140 addr:$dst)], IIC_SSE_MOVDQ>,
2141 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
2142 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2144 // Move Scalar Single to Double Int
2146 let isCodeGenOnly = 1 in {
2147 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
2149 "vmovd\t{$src, $dst|$dst, $src}",
2150 [(set GR32:$dst, (bitconvert FR32X:$src))],
2151 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
2152 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2153 (ins i32mem:$dst, FR32X:$src),
2154 "vmovd\t{$src, $dst|$dst, $src}",
2155 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2156 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2159 // Move Quadword Int to Packed Quadword Int
2161 def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
2163 "vmovq\t{$src, $dst|$dst, $src}",
2165 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2166 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2168 //===----------------------------------------------------------------------===//
2169 // AVX-512 MOVSS, MOVSD
2170 //===----------------------------------------------------------------------===//
2172 multiclass avx512_move_scalar <string asm, RegisterClass RC,
2173 SDNode OpNode, ValueType vt,
2174 X86MemOperand x86memop, PatFrag mem_pat> {
2175 let hasSideEffects = 0 in {
2176 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
2177 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2178 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2179 (scalar_to_vector RC:$src2))))],
2180 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
2181 let Constraints = "$src1 = $dst" in
2182 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2183 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2185 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
2186 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
2187 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2188 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2189 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2191 let mayStore = 1 in {
2192 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2193 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2194 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2196 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2197 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2198 [], IIC_SSE_MOV_S_MR>,
2199 EVEX, VEX_LIG, EVEX_K;
2201 } //hasSideEffects = 0
2204 let ExeDomain = SSEPackedSingle in
2205 defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
2206 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2208 let ExeDomain = SSEPackedDouble in
2209 defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
2210 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2212 def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2213 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2214 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2216 def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2217 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2218 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
2220 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2221 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2222 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2224 // For the disassembler
2225 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
2226 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2227 (ins VR128X:$src1, FR32X:$src2),
2228 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2230 XS, EVEX_4V, VEX_LIG;
2231 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2232 (ins VR128X:$src1, FR64X:$src2),
2233 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2235 XD, EVEX_4V, VEX_LIG, VEX_W;
2238 let Predicates = [HasAVX512] in {
2239 let AddedComplexity = 15 in {
2240 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2241 // MOVS{S,D} to the lower bits.
2242 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2243 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2244 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2245 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2246 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2247 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2248 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2249 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2251 // Move low f32 and clear high bits.
2252 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2253 (SUBREG_TO_REG (i32 0),
2254 (VMOVSSZrr (v4f32 (V_SET0)),
2255 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2256 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2257 (SUBREG_TO_REG (i32 0),
2258 (VMOVSSZrr (v4i32 (V_SET0)),
2259 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2262 let AddedComplexity = 20 in {
2263 // MOVSSrm zeros the high parts of the register; represent this
2264 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2265 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2266 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2267 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2268 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2269 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2270 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2272 // MOVSDrm zeros the high parts of the register; represent this
2273 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2274 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2275 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2276 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2277 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2278 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2279 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2280 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2281 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2282 def : Pat<(v2f64 (X86vzload addr:$src)),
2283 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2285 // Represent the same patterns above but in the form they appear for
2287 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2288 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
2289 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
2290 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2291 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2292 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2293 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2294 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2295 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2297 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2298 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2299 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2300 FR32X:$src)), sub_xmm)>;
2301 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2302 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2303 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2304 FR64X:$src)), sub_xmm)>;
2305 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2306 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
2307 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
2309 // Move low f64 and clear high bits.
2310 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2311 (SUBREG_TO_REG (i32 0),
2312 (VMOVSDZrr (v2f64 (V_SET0)),
2313 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2315 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2316 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2317 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2319 // Extract and store.
2320 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2322 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2323 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2325 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2327 // Shuffle with VMOVSS
2328 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2329 (VMOVSSZrr (v4i32 VR128X:$src1),
2330 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2331 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2332 (VMOVSSZrr (v4f32 VR128X:$src1),
2333 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2336 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2337 (SUBREG_TO_REG (i32 0),
2338 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2339 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2341 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2342 (SUBREG_TO_REG (i32 0),
2343 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2344 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2347 // Shuffle with VMOVSD
2348 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2349 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2350 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2351 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2352 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2353 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2354 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2355 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2358 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2359 (SUBREG_TO_REG (i32 0),
2360 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2361 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2363 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2364 (SUBREG_TO_REG (i32 0),
2365 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2366 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2369 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2370 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2371 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2372 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2373 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2374 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2375 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2376 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2379 let AddedComplexity = 15 in
2380 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2382 "vmovq\t{$src, $dst|$dst, $src}",
2383 [(set VR128X:$dst, (v2i64 (X86vzmovl
2384 (v2i64 VR128X:$src))))],
2385 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2387 let AddedComplexity = 20 in
2388 def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2390 "vmovq\t{$src, $dst|$dst, $src}",
2391 [(set VR128X:$dst, (v2i64 (X86vzmovl
2392 (loadv2i64 addr:$src))))],
2393 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2394 EVEX_CD8<8, CD8VT8>;
2396 let Predicates = [HasAVX512] in {
2397 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2398 let AddedComplexity = 20 in {
2399 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2400 (VMOVDI2PDIZrm addr:$src)>;
2401 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2402 (VMOV64toPQIZrr GR64:$src)>;
2403 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2404 (VMOVDI2PDIZrr GR32:$src)>;
2406 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2407 (VMOVDI2PDIZrm addr:$src)>;
2408 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2409 (VMOVDI2PDIZrm addr:$src)>;
2410 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2411 (VMOVZPQILo2PQIZrm addr:$src)>;
2412 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2413 (VMOVZPQILo2PQIZrr VR128X:$src)>;
2414 def : Pat<(v2i64 (X86vzload addr:$src)),
2415 (VMOVZPQILo2PQIZrm addr:$src)>;
2418 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2419 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2420 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2421 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2422 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2423 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2424 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2427 def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2428 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2430 def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2431 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2433 def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2434 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2436 def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2437 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2439 //===----------------------------------------------------------------------===//
2440 // AVX-512 - Non-temporals
2441 //===----------------------------------------------------------------------===//
2442 let SchedRW = [WriteLoad] in {
2443 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2444 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2445 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2446 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2447 EVEX_CD8<64, CD8VF>;
2449 let Predicates = [HasAVX512, HasVLX] in {
2450 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2452 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2453 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2454 EVEX_CD8<64, CD8VF>;
2456 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2458 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2459 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2460 EVEX_CD8<64, CD8VF>;
2464 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2465 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2466 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2467 let SchedRW = [WriteStore], mayStore = 1,
2468 AddedComplexity = 400 in
2469 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2470 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2471 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2474 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2475 string elty, string elsz, string vsz512,
2476 string vsz256, string vsz128, Domain d,
2477 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2478 let Predicates = [prd] in
2479 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2480 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2481 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2484 let Predicates = [prd, HasVLX] in {
2485 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2486 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2487 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2490 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2491 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2492 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2497 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2498 "i", "64", "8", "4", "2", SSEPackedInt,
2499 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2501 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2502 "f", "64", "8", "4", "2", SSEPackedDouble,
2503 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2505 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2506 "f", "32", "16", "8", "4", SSEPackedSingle,
2507 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2509 //===----------------------------------------------------------------------===//
2510 // AVX-512 - Integer arithmetic
2512 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2513 ValueType OpVT, RegisterClass KRC,
2514 RegisterClass RC, PatFrag memop_frag,
2515 X86MemOperand x86memop, PatFrag scalar_mfrag,
2516 X86MemOperand x86scalar_mop, string BrdcstStr,
2517 OpndItins itins, bit IsCommutable = 0> {
2518 let isCommutable = IsCommutable in
2519 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2520 (ins RC:$src1, RC:$src2),
2521 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2522 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2524 let AddedComplexity = 30 in {
2525 let Constraints = "$src0 = $dst" in
2526 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2527 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2528 !strconcat(OpcodeStr,
2529 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2530 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2531 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2533 itins.rr>, EVEX_4V, EVEX_K;
2534 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2535 (ins KRC:$mask, RC:$src1, RC:$src2),
2536 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2537 "|$dst {${mask}} {z}, $src1, $src2}"),
2538 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2539 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2540 (OpVT immAllZerosV))))],
2541 itins.rr>, EVEX_4V, EVEX_KZ;
2544 let mayLoad = 1 in {
2545 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2546 (ins RC:$src1, x86memop:$src2),
2547 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2548 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2550 let AddedComplexity = 30 in {
2551 let Constraints = "$src0 = $dst" in
2552 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2553 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2554 !strconcat(OpcodeStr,
2555 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2556 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2557 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2559 itins.rm>, EVEX_4V, EVEX_K;
2560 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2561 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2562 !strconcat(OpcodeStr,
2563 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2564 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2565 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2566 (OpVT immAllZerosV))))],
2567 itins.rm>, EVEX_4V, EVEX_KZ;
2569 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2570 (ins RC:$src1, x86scalar_mop:$src2),
2571 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2572 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2573 [(set RC:$dst, (OpNode RC:$src1,
2574 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2575 itins.rm>, EVEX_4V, EVEX_B;
2576 let AddedComplexity = 30 in {
2577 let Constraints = "$src0 = $dst" in
2578 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2579 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2580 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2581 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2583 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2584 (OpNode (OpVT RC:$src1),
2585 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2587 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2588 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2589 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2590 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2591 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2593 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2594 (OpNode (OpVT RC:$src1),
2595 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2596 (OpVT immAllZerosV))))],
2597 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2602 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2603 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2604 PatFrag memop_frag, X86MemOperand x86memop,
2605 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2606 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
2607 let isCommutable = IsCommutable in
2609 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2610 (ins RC:$src1, RC:$src2),
2611 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2613 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2614 (ins KRC:$mask, RC:$src1, RC:$src2),
2615 !strconcat(OpcodeStr,
2616 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2617 [], itins.rr>, EVEX_4V, EVEX_K;
2618 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2619 (ins KRC:$mask, RC:$src1, RC:$src2),
2620 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2621 "|$dst {${mask}} {z}, $src1, $src2}"),
2622 [], itins.rr>, EVEX_4V, EVEX_KZ;
2624 let mayLoad = 1 in {
2625 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2626 (ins RC:$src1, x86memop:$src2),
2627 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2629 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2630 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2631 !strconcat(OpcodeStr,
2632 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2633 [], itins.rm>, EVEX_4V, EVEX_K;
2634 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2635 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2636 !strconcat(OpcodeStr,
2637 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2638 [], itins.rm>, EVEX_4V, EVEX_KZ;
2639 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2640 (ins RC:$src1, x86scalar_mop:$src2),
2641 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2642 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2643 [], itins.rm>, EVEX_4V, EVEX_B;
2644 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2645 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2646 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2647 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2649 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2650 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2651 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2652 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2653 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2655 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2659 defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2660 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2661 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2663 defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2664 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2665 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2667 defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2668 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2669 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2671 defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2672 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2673 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
2675 defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2676 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2677 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2679 defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2680 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2681 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2682 EVEX_CD8<64, CD8VF>, VEX_W;
2684 defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2685 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2686 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
2688 def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2689 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2691 def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2692 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2693 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2694 def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2695 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2696 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2698 defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2699 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2700 SSE_INTALU_ITINS_P, 1>,
2701 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2702 defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2703 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2704 SSE_INTALU_ITINS_P, 0>,
2705 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2707 defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2708 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2709 SSE_INTALU_ITINS_P, 1>,
2710 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2711 defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2712 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2713 SSE_INTALU_ITINS_P, 0>,
2714 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2716 defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2717 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2718 SSE_INTALU_ITINS_P, 1>,
2719 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2720 defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2721 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2722 SSE_INTALU_ITINS_P, 0>,
2723 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2725 defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2726 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2727 SSE_INTALU_ITINS_P, 1>,
2728 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2729 defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2730 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2731 SSE_INTALU_ITINS_P, 0>,
2732 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2734 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2735 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2736 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2737 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2738 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2739 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2740 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2741 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2742 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2743 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2744 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2745 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2746 def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2747 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2748 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2749 def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2750 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2751 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2752 def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2753 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2754 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2755 def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2756 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2757 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
2758 //===----------------------------------------------------------------------===//
2759 // AVX-512 - Unpack Instructions
2760 //===----------------------------------------------------------------------===//
2762 multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2763 PatFrag mem_frag, RegisterClass RC,
2764 X86MemOperand x86memop, string asm,
2766 def rr : AVX512PI<opc, MRMSrcReg,
2767 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2769 (vt (OpNode RC:$src1, RC:$src2)))],
2771 def rm : AVX512PI<opc, MRMSrcMem,
2772 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2774 (vt (OpNode RC:$src1,
2775 (bitconvert (mem_frag addr:$src2)))))],
2779 defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2780 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2781 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2782 defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2783 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2784 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2785 defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2786 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2787 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2788 defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2789 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2790 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2792 multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2793 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2794 X86MemOperand x86memop> {
2795 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2796 (ins RC:$src1, RC:$src2),
2797 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2798 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2799 IIC_SSE_UNPCK>, EVEX_4V;
2800 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2801 (ins RC:$src1, x86memop:$src2),
2802 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2803 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2804 (bitconvert (memop_frag addr:$src2)))))],
2805 IIC_SSE_UNPCK>, EVEX_4V;
2807 defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2808 VR512, memopv16i32, i512mem>, EVEX_V512,
2809 EVEX_CD8<32, CD8VF>;
2810 defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2811 VR512, memopv8i64, i512mem>, EVEX_V512,
2812 VEX_W, EVEX_CD8<64, CD8VF>;
2813 defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2814 VR512, memopv16i32, i512mem>, EVEX_V512,
2815 EVEX_CD8<32, CD8VF>;
2816 defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2817 VR512, memopv8i64, i512mem>, EVEX_V512,
2818 VEX_W, EVEX_CD8<64, CD8VF>;
2819 //===----------------------------------------------------------------------===//
2823 multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2824 SDNode OpNode, PatFrag mem_frag,
2825 X86MemOperand x86memop, ValueType OpVT> {
2826 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2827 (ins RC:$src1, i8imm:$src2),
2828 !strconcat(OpcodeStr,
2829 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2831 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2833 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2834 (ins x86memop:$src1, i8imm:$src2),
2835 !strconcat(OpcodeStr,
2836 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2838 (OpVT (OpNode (mem_frag addr:$src1),
2839 (i8 imm:$src2))))]>, EVEX;
2842 defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
2843 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2845 let ExeDomain = SSEPackedSingle in
2846 defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilpi,
2847 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
2848 EVEX_CD8<32, CD8VF>;
2849 let ExeDomain = SSEPackedDouble in
2850 defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilpi,
2851 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
2852 VEX_W, EVEX_CD8<32, CD8VF>;
2854 def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
2855 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2856 def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
2857 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2859 //===----------------------------------------------------------------------===//
2860 // AVX-512 Logical Instructions
2861 //===----------------------------------------------------------------------===//
2863 defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
2864 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2865 EVEX_V512, EVEX_CD8<32, CD8VF>;
2866 defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
2867 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2868 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2869 defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
2870 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2871 EVEX_V512, EVEX_CD8<32, CD8VF>;
2872 defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
2873 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2874 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2875 defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
2876 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2877 EVEX_V512, EVEX_CD8<32, CD8VF>;
2878 defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
2879 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2880 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2881 defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
2882 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2883 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2884 defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2885 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2886 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2888 //===----------------------------------------------------------------------===//
2889 // AVX-512 FP arithmetic
2890 //===----------------------------------------------------------------------===//
2892 multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2894 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
2895 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2896 EVEX_CD8<32, CD8VT1>;
2897 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
2898 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2899 EVEX_CD8<64, CD8VT1>;
2902 let isCommutable = 1 in {
2903 defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2904 defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2905 defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2906 defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2908 let isCommutable = 0 in {
2909 defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2910 defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2913 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2915 RegisterClass RC, ValueType vt,
2916 X86MemOperand x86memop, PatFrag mem_frag,
2917 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2919 Domain d, OpndItins itins, bit commutable> {
2920 let isCommutable = commutable in {
2921 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
2922 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2923 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
2926 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2927 !strconcat(OpcodeStr,
2928 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2929 [], itins.rr, d>, EVEX_4V, EVEX_K;
2931 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2932 !strconcat(OpcodeStr,
2933 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2934 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2937 let mayLoad = 1 in {
2938 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2939 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2940 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
2941 itins.rm, d>, EVEX_4V;
2943 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2944 (ins RC:$src1, x86scalar_mop:$src2),
2945 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2946 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2947 [(set RC:$dst, (OpNode RC:$src1,
2948 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2949 itins.rm, d>, EVEX_4V, EVEX_B;
2951 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2952 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2953 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2954 [], itins.rm, d>, EVEX_4V, EVEX_K;
2956 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2957 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2958 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2959 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2961 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2962 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2963 " \t{${src2}", BrdcstStr,
2964 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2965 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2967 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2968 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2969 " \t{${src2}", BrdcstStr,
2970 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2972 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2976 defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
2977 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2978 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2980 defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
2981 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2982 SSE_ALU_ITINS_P.d, 1>,
2983 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2985 defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
2986 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2987 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2988 defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
2989 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2990 SSE_ALU_ITINS_P.d, 1>,
2991 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2993 defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
2994 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2995 SSE_ALU_ITINS_P.s, 1>,
2996 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2997 defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
2998 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2999 SSE_ALU_ITINS_P.s, 1>,
3000 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3002 defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
3003 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3004 SSE_ALU_ITINS_P.d, 1>,
3005 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3006 defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
3007 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3008 SSE_ALU_ITINS_P.d, 1>,
3009 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3011 defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
3012 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3013 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3014 defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
3015 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3016 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3018 defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
3019 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3020 SSE_ALU_ITINS_P.d, 0>,
3021 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3022 defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
3023 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3024 SSE_ALU_ITINS_P.d, 0>,
3025 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3027 def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
3028 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3029 (i16 -1), FROUND_CURRENT)),
3030 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
3032 def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
3033 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3034 (i8 -1), FROUND_CURRENT)),
3035 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
3037 def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
3038 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3039 (i16 -1), FROUND_CURRENT)),
3040 (VMINPSZrr VR512:$src1, VR512:$src2)>;
3042 def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
3043 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3044 (i8 -1), FROUND_CURRENT)),
3045 (VMINPDZrr VR512:$src1, VR512:$src2)>;
3046 //===----------------------------------------------------------------------===//
3047 // AVX-512 VPTESTM instructions
3048 //===----------------------------------------------------------------------===//
3050 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3051 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3052 SDNode OpNode, ValueType vt> {
3053 def rr : AVX512PI<opc, MRMSrcReg,
3054 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
3055 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3056 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
3057 SSEPackedInt>, EVEX_4V;
3058 def rm : AVX512PI<opc, MRMSrcMem,
3059 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
3060 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3061 [(set KRC:$dst, (OpNode (vt RC:$src1),
3062 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
3065 defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
3066 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
3067 EVEX_CD8<32, CD8VF>;
3068 defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
3069 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
3070 EVEX_CD8<64, CD8VF>;
3072 let Predicates = [HasCDI] in {
3073 defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
3074 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
3075 EVEX_CD8<32, CD8VF>;
3076 defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
3077 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
3078 EVEX_CD8<64, CD8VF>;
3081 def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3082 (v16i32 VR512:$src2), (i16 -1))),
3083 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3085 def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3086 (v8i64 VR512:$src2), (i8 -1))),
3087 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
3088 //===----------------------------------------------------------------------===//
3089 // AVX-512 Shift instructions
3090 //===----------------------------------------------------------------------===//
3091 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3092 string OpcodeStr, SDNode OpNode, RegisterClass RC,
3093 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
3094 RegisterClass KRC> {
3095 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
3096 (ins RC:$src1, i8imm:$src2),
3097 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3098 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
3099 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3100 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
3101 (ins KRC:$mask, RC:$src1, i8imm:$src2),
3102 !strconcat(OpcodeStr,
3103 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3104 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3105 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3106 (ins x86memop:$src1, i8imm:$src2),
3107 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3108 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
3109 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3110 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3111 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
3112 !strconcat(OpcodeStr,
3113 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3114 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3117 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3118 RegisterClass RC, ValueType vt, ValueType SrcVT,
3119 PatFrag bc_frag, RegisterClass KRC> {
3120 // src2 is always 128-bit
3121 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3122 (ins RC:$src1, VR128X:$src2),
3123 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3124 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3125 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3126 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3127 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3128 !strconcat(OpcodeStr,
3129 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3130 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3131 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3132 (ins RC:$src1, i128mem:$src2),
3133 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3134 [(set RC:$dst, (vt (OpNode RC:$src1,
3135 (bc_frag (memopv2i64 addr:$src2)))))],
3136 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3137 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3138 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3139 !strconcat(OpcodeStr,
3140 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3141 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3144 defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3145 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3146 EVEX_V512, EVEX_CD8<32, CD8VF>;
3147 defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3148 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3149 EVEX_CD8<32, CD8VQ>;
3151 defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3152 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3153 EVEX_CD8<64, CD8VF>, VEX_W;
3154 defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3155 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3156 EVEX_CD8<64, CD8VQ>, VEX_W;
3158 defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3159 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3160 EVEX_CD8<32, CD8VF>;
3161 defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3162 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3163 EVEX_CD8<32, CD8VQ>;
3165 defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3166 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3167 EVEX_CD8<64, CD8VF>, VEX_W;
3168 defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3169 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3170 EVEX_CD8<64, CD8VQ>, VEX_W;
3172 defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3173 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3174 EVEX_V512, EVEX_CD8<32, CD8VF>;
3175 defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3176 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3177 EVEX_CD8<32, CD8VQ>;
3179 defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3180 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3181 EVEX_CD8<64, CD8VF>, VEX_W;
3182 defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3183 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3184 EVEX_CD8<64, CD8VQ>, VEX_W;
3186 //===-------------------------------------------------------------------===//
3187 // Variable Bit Shifts
3188 //===-------------------------------------------------------------------===//
3189 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3190 RegisterClass RC, ValueType vt,
3191 X86MemOperand x86memop, PatFrag mem_frag> {
3192 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3193 (ins RC:$src1, RC:$src2),
3194 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3196 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3198 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3199 (ins RC:$src1, x86memop:$src2),
3200 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3202 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3206 defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3207 i512mem, memopv16i32>, EVEX_V512,
3208 EVEX_CD8<32, CD8VF>;
3209 defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3210 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3211 EVEX_CD8<64, CD8VF>;
3212 defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3213 i512mem, memopv16i32>, EVEX_V512,
3214 EVEX_CD8<32, CD8VF>;
3215 defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3216 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3217 EVEX_CD8<64, CD8VF>;
3218 defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3219 i512mem, memopv16i32>, EVEX_V512,
3220 EVEX_CD8<32, CD8VF>;
3221 defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3222 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3223 EVEX_CD8<64, CD8VF>;
3225 //===----------------------------------------------------------------------===//
3226 // AVX-512 - MOVDDUP
3227 //===----------------------------------------------------------------------===//
3229 multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3230 X86MemOperand x86memop, PatFrag memop_frag> {
3231 def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3232 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3233 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3234 def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3235 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3237 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3240 defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3241 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3242 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3243 (VMOVDDUPZrm addr:$src)>;
3245 //===---------------------------------------------------------------------===//
3246 // Replicate Single FP - MOVSHDUP and MOVSLDUP
3247 //===---------------------------------------------------------------------===//
3248 multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3249 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3250 X86MemOperand x86memop> {
3251 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3252 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3253 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3255 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3256 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3257 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3260 defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3261 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3262 EVEX_CD8<32, CD8VF>;
3263 defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3264 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3265 EVEX_CD8<32, CD8VF>;
3267 def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3268 def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3269 (VMOVSHDUPZrm addr:$src)>;
3270 def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3271 def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3272 (VMOVSLDUPZrm addr:$src)>;
3274 //===----------------------------------------------------------------------===//
3275 // Move Low to High and High to Low packed FP Instructions
3276 //===----------------------------------------------------------------------===//
3277 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3278 (ins VR128X:$src1, VR128X:$src2),
3279 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3280 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3281 IIC_SSE_MOV_LH>, EVEX_4V;
3282 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3283 (ins VR128X:$src1, VR128X:$src2),
3284 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3285 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3286 IIC_SSE_MOV_LH>, EVEX_4V;
3288 let Predicates = [HasAVX512] in {
3290 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3291 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3292 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3293 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
3296 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3297 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3300 //===----------------------------------------------------------------------===//
3301 // FMA - Fused Multiply Operations
3303 let Constraints = "$src1 = $dst" in {
3304 multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3305 RegisterClass RC, X86MemOperand x86memop,
3306 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3307 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3308 RegisterClass KRC> {
3309 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3310 (ins RC:$src2, RC:$src3),
3311 OpcodeStr, "$src3, $src2", "$src2, $src3",
3312 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3316 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3317 (ins RC:$src1, RC:$src2, x86memop:$src3),
3318 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3319 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3320 (mem_frag addr:$src3))))]>;
3321 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3322 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
3323 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
3324 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3325 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3326 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3328 } // Constraints = "$src1 = $dst"
3330 let ExeDomain = SSEPackedSingle in {
3331 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3332 memopv16f32, f32mem, loadf32, "{1to16}",
3333 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
3334 EVEX_CD8<32, CD8VF>;
3335 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3336 memopv16f32, f32mem, loadf32, "{1to16}",
3337 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
3338 EVEX_CD8<32, CD8VF>;
3339 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3340 memopv16f32, f32mem, loadf32, "{1to16}",
3341 X86Fmaddsub, v16f32, VK16WM>,
3342 EVEX_V512, EVEX_CD8<32, CD8VF>;
3343 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3344 memopv16f32, f32mem, loadf32, "{1to16}",
3345 X86Fmsubadd, v16f32, VK16WM>,
3346 EVEX_V512, EVEX_CD8<32, CD8VF>;
3347 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3348 memopv16f32, f32mem, loadf32, "{1to16}",
3349 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
3350 EVEX_CD8<32, CD8VF>;
3351 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3352 memopv16f32, f32mem, loadf32, "{1to16}",
3353 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
3354 EVEX_CD8<32, CD8VF>;
3356 let ExeDomain = SSEPackedDouble in {
3357 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3358 memopv8f64, f64mem, loadf64, "{1to8}",
3359 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
3360 VEX_W, EVEX_CD8<64, CD8VF>;
3361 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3362 memopv8f64, f64mem, loadf64, "{1to8}",
3363 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3364 EVEX_CD8<64, CD8VF>;
3365 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3366 memopv8f64, f64mem, loadf64, "{1to8}",
3367 X86Fmaddsub, v8f64, VK8WM>,
3368 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3369 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3370 memopv8f64, f64mem, loadf64, "{1to8}",
3371 X86Fmsubadd, v8f64, VK8WM>,
3372 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3373 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3374 memopv8f64, f64mem, loadf64, "{1to8}",
3375 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
3376 EVEX_CD8<64, CD8VF>;
3377 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3378 memopv8f64, f64mem, loadf64, "{1to8}",
3379 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3380 EVEX_CD8<64, CD8VF>;
3383 let Constraints = "$src1 = $dst" in {
3384 multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3385 RegisterClass RC, X86MemOperand x86memop,
3386 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3387 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3389 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3390 (ins RC:$src1, RC:$src3, x86memop:$src2),
3391 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
3392 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3393 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3394 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
3395 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
3396 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3397 [(set RC:$dst, (OpNode RC:$src1,
3398 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3400 } // Constraints = "$src1 = $dst"
3403 let ExeDomain = SSEPackedSingle in {
3404 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3405 memopv16f32, f32mem, loadf32, "{1to16}",
3406 X86Fmadd, v16f32>, EVEX_V512,
3407 EVEX_CD8<32, CD8VF>;
3408 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3409 memopv16f32, f32mem, loadf32, "{1to16}",
3410 X86Fmsub, v16f32>, EVEX_V512,
3411 EVEX_CD8<32, CD8VF>;
3412 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3413 memopv16f32, f32mem, loadf32, "{1to16}",
3414 X86Fmaddsub, v16f32>,
3415 EVEX_V512, EVEX_CD8<32, CD8VF>;
3416 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3417 memopv16f32, f32mem, loadf32, "{1to16}",
3418 X86Fmsubadd, v16f32>,
3419 EVEX_V512, EVEX_CD8<32, CD8VF>;
3420 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3421 memopv16f32, f32mem, loadf32, "{1to16}",
3422 X86Fnmadd, v16f32>, EVEX_V512,
3423 EVEX_CD8<32, CD8VF>;
3424 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3425 memopv16f32, f32mem, loadf32, "{1to16}",
3426 X86Fnmsub, v16f32>, EVEX_V512,
3427 EVEX_CD8<32, CD8VF>;
3429 let ExeDomain = SSEPackedDouble in {
3430 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3431 memopv8f64, f64mem, loadf64, "{1to8}",
3432 X86Fmadd, v8f64>, EVEX_V512,
3433 VEX_W, EVEX_CD8<64, CD8VF>;
3434 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3435 memopv8f64, f64mem, loadf64, "{1to8}",
3436 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3437 EVEX_CD8<64, CD8VF>;
3438 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3439 memopv8f64, f64mem, loadf64, "{1to8}",
3440 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3441 EVEX_CD8<64, CD8VF>;
3442 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3443 memopv8f64, f64mem, loadf64, "{1to8}",
3444 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3445 EVEX_CD8<64, CD8VF>;
3446 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3447 memopv8f64, f64mem, loadf64, "{1to8}",
3448 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3449 EVEX_CD8<64, CD8VF>;
3450 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3451 memopv8f64, f64mem, loadf64, "{1to8}",
3452 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3453 EVEX_CD8<64, CD8VF>;
3457 let Constraints = "$src1 = $dst" in {
3458 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3459 RegisterClass RC, ValueType OpVT,
3460 X86MemOperand x86memop, Operand memop,
3462 let isCommutable = 1 in
3463 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3464 (ins RC:$src1, RC:$src2, RC:$src3),
3465 !strconcat(OpcodeStr,
3466 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3468 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3470 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3471 (ins RC:$src1, RC:$src2, f128mem:$src3),
3472 !strconcat(OpcodeStr,
3473 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3475 (OpVT (OpNode RC:$src2, RC:$src1,
3476 (mem_frag addr:$src3))))]>;
3479 } // Constraints = "$src1 = $dst"
3481 defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
3482 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3483 defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
3484 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3485 defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
3486 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3487 defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
3488 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3489 defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
3490 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3491 defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
3492 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3493 defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
3494 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3495 defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
3496 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3498 //===----------------------------------------------------------------------===//
3499 // AVX-512 Scalar convert from sign integer to float/double
3500 //===----------------------------------------------------------------------===//
3502 multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3503 X86MemOperand x86memop, string asm> {
3504 let hasSideEffects = 0 in {
3505 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
3506 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3509 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3510 (ins DstRC:$src1, x86memop:$src),
3511 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3513 } // hasSideEffects = 0
3515 let Predicates = [HasAVX512] in {
3516 defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
3517 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3518 defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
3519 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3520 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
3521 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3522 defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
3523 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3525 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3526 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3527 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
3528 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3529 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3530 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3531 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
3532 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3534 def : Pat<(f32 (sint_to_fp GR32:$src)),
3535 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3536 def : Pat<(f32 (sint_to_fp GR64:$src)),
3537 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3538 def : Pat<(f64 (sint_to_fp GR32:$src)),
3539 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3540 def : Pat<(f64 (sint_to_fp GR64:$src)),
3541 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3543 defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
3544 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3545 defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
3546 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3547 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
3548 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3549 defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
3550 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3552 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3553 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3554 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3555 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3556 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3557 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3558 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3559 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3561 def : Pat<(f32 (uint_to_fp GR32:$src)),
3562 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3563 def : Pat<(f32 (uint_to_fp GR64:$src)),
3564 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3565 def : Pat<(f64 (uint_to_fp GR32:$src)),
3566 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3567 def : Pat<(f64 (uint_to_fp GR64:$src)),
3568 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3571 //===----------------------------------------------------------------------===//
3572 // AVX-512 Scalar convert from float/double to integer
3573 //===----------------------------------------------------------------------===//
3574 multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3575 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3577 let hasSideEffects = 0 in {
3578 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3579 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3580 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3581 Requires<[HasAVX512]>;
3583 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
3584 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
3585 Requires<[HasAVX512]>;
3586 } // hasSideEffects = 0
3588 let Predicates = [HasAVX512] in {
3589 // Convert float/double to signed/unsigned int 32/64
3590 defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
3591 ssmem, sse_load_f32, "cvtss2si">,
3592 XS, EVEX_CD8<32, CD8VT1>;
3593 defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
3594 ssmem, sse_load_f32, "cvtss2si">,
3595 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3596 defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
3597 ssmem, sse_load_f32, "cvtss2usi">,
3598 XS, EVEX_CD8<32, CD8VT1>;
3599 defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3600 int_x86_avx512_cvtss2usi64, ssmem,
3601 sse_load_f32, "cvtss2usi">, XS, VEX_W,
3602 EVEX_CD8<32, CD8VT1>;
3603 defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
3604 sdmem, sse_load_f64, "cvtsd2si">,
3605 XD, EVEX_CD8<64, CD8VT1>;
3606 defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
3607 sdmem, sse_load_f64, "cvtsd2si">,
3608 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3609 defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
3610 sdmem, sse_load_f64, "cvtsd2usi">,
3611 XD, EVEX_CD8<64, CD8VT1>;
3612 defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3613 int_x86_avx512_cvtsd2usi64, sdmem,
3614 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
3615 EVEX_CD8<64, CD8VT1>;
3617 let isCodeGenOnly = 1 in {
3618 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3619 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3620 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3621 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3622 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3623 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3624 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3625 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3626 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3627 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3628 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3629 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3631 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3632 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3633 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3634 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3635 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3636 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3637 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3638 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3639 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3640 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3641 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3642 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3643 } // isCodeGenOnly = 1
3645 // Convert float/double to signed/unsigned int 32/64 with truncation
3646 let isCodeGenOnly = 1 in {
3647 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3648 ssmem, sse_load_f32, "cvttss2si">,
3649 XS, EVEX_CD8<32, CD8VT1>;
3650 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3651 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3652 "cvttss2si">, XS, VEX_W,
3653 EVEX_CD8<32, CD8VT1>;
3654 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3655 sdmem, sse_load_f64, "cvttsd2si">, XD,
3656 EVEX_CD8<64, CD8VT1>;
3657 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3658 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3659 "cvttsd2si">, XD, VEX_W,
3660 EVEX_CD8<64, CD8VT1>;
3661 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3662 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3663 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3664 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3665 int_x86_avx512_cvttss2usi64, ssmem,
3666 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3667 EVEX_CD8<32, CD8VT1>;
3668 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3669 int_x86_avx512_cvttsd2usi,
3670 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3671 EVEX_CD8<64, CD8VT1>;
3672 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3673 int_x86_avx512_cvttsd2usi64, sdmem,
3674 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3675 EVEX_CD8<64, CD8VT1>;
3676 } // isCodeGenOnly = 1
3678 multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3679 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3681 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3682 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3683 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3684 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3685 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3686 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3689 defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
3690 loadf32, "cvttss2si">, XS,
3691 EVEX_CD8<32, CD8VT1>;
3692 defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
3693 loadf32, "cvttss2usi">, XS,
3694 EVEX_CD8<32, CD8VT1>;
3695 defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
3696 loadf32, "cvttss2si">, XS, VEX_W,
3697 EVEX_CD8<32, CD8VT1>;
3698 defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
3699 loadf32, "cvttss2usi">, XS, VEX_W,
3700 EVEX_CD8<32, CD8VT1>;
3701 defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
3702 loadf64, "cvttsd2si">, XD,
3703 EVEX_CD8<64, CD8VT1>;
3704 defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
3705 loadf64, "cvttsd2usi">, XD,
3706 EVEX_CD8<64, CD8VT1>;
3707 defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
3708 loadf64, "cvttsd2si">, XD, VEX_W,
3709 EVEX_CD8<64, CD8VT1>;
3710 defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
3711 loadf64, "cvttsd2usi">, XD, VEX_W,
3712 EVEX_CD8<64, CD8VT1>;
3714 //===----------------------------------------------------------------------===//
3715 // AVX-512 Convert form float to double and back
3716 //===----------------------------------------------------------------------===//
3717 let hasSideEffects = 0 in {
3718 def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3719 (ins FR32X:$src1, FR32X:$src2),
3720 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3721 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3723 def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3724 (ins FR32X:$src1, f32mem:$src2),
3725 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3726 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3727 EVEX_CD8<32, CD8VT1>;
3729 // Convert scalar double to scalar single
3730 def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3731 (ins FR64X:$src1, FR64X:$src2),
3732 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3733 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3735 def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3736 (ins FR64X:$src1, f64mem:$src2),
3737 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3738 []>, EVEX_4V, VEX_LIG, VEX_W,
3739 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3742 def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3743 Requires<[HasAVX512]>;
3744 def : Pat<(fextend (loadf32 addr:$src)),
3745 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3747 def : Pat<(extloadf32 addr:$src),
3748 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3749 Requires<[HasAVX512, OptForSize]>;
3751 def : Pat<(extloadf32 addr:$src),
3752 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3753 Requires<[HasAVX512, OptForSpeed]>;
3755 def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3756 Requires<[HasAVX512]>;
3758 multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
3759 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3760 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3762 let hasSideEffects = 0 in {
3763 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3764 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3766 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3767 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3768 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3769 [], d>, EVEX, EVEX_B, EVEX_RC;
3771 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3772 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3774 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3775 } // hasSideEffects = 0
3778 multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
3779 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3780 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3782 let hasSideEffects = 0 in {
3783 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3784 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3786 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3788 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3789 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3791 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3792 } // hasSideEffects = 0
3795 defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
3796 memopv8f64, f512mem, v8f32, v8f64,
3797 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
3798 EVEX_CD8<64, CD8VF>;
3800 defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3801 memopv4f64, f256mem, v8f64, v8f32,
3802 SSEPackedDouble>, EVEX_V512, PS,
3803 EVEX_CD8<32, CD8VH>;
3804 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3805 (VCVTPS2PDZrm addr:$src)>;
3807 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3808 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3809 (VCVTPD2PSZrr VR512:$src)>;
3811 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3812 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3813 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
3815 //===----------------------------------------------------------------------===//
3816 // AVX-512 Vector convert from sign integer to float/double
3817 //===----------------------------------------------------------------------===//
3819 defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
3820 memopv8i64, i512mem, v16f32, v16i32,
3821 SSEPackedSingle>, EVEX_V512, PS,
3822 EVEX_CD8<32, CD8VF>;
3824 defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3825 memopv4i64, i256mem, v8f64, v8i32,
3826 SSEPackedDouble>, EVEX_V512, XS,
3827 EVEX_CD8<32, CD8VH>;
3829 defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
3830 memopv16f32, f512mem, v16i32, v16f32,
3831 SSEPackedSingle>, EVEX_V512, XS,
3832 EVEX_CD8<32, CD8VF>;
3834 defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
3835 memopv8f64, f512mem, v8i32, v8f64,
3836 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
3837 EVEX_CD8<64, CD8VF>;
3839 defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
3840 memopv16f32, f512mem, v16i32, v16f32,
3841 SSEPackedSingle>, EVEX_V512, PS,
3842 EVEX_CD8<32, CD8VF>;
3844 // cvttps2udq (src, 0, mask-all-ones, sae-current)
3845 def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3846 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3847 (VCVTTPS2UDQZrr VR512:$src)>;
3849 defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
3850 memopv8f64, f512mem, v8i32, v8f64,
3851 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
3852 EVEX_CD8<64, CD8VF>;
3854 // cvttpd2udq (src, 0, mask-all-ones, sae-current)
3855 def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3856 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3857 (VCVTTPD2UDQZrr VR512:$src)>;
3859 defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3860 memopv4i64, f256mem, v8f64, v8i32,
3861 SSEPackedDouble>, EVEX_V512, XS,
3862 EVEX_CD8<32, CD8VH>;
3864 defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
3865 memopv16i32, f512mem, v16f32, v16i32,
3866 SSEPackedSingle>, EVEX_V512, XD,
3867 EVEX_CD8<32, CD8VF>;
3869 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3870 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3871 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3873 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3874 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3875 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3877 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3878 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3879 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3881 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3882 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3883 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3885 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3886 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3887 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3889 def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
3890 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3891 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
3892 def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3893 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3894 (VCVTDQ2PDZrr VR256X:$src)>;
3895 def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3896 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3897 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3898 def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3899 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3900 (VCVTUDQ2PDZrr VR256X:$src)>;
3902 multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3903 RegisterClass DstRC, PatFrag mem_frag,
3904 X86MemOperand x86memop, Domain d> {
3905 let hasSideEffects = 0 in {
3906 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3907 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3909 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3910 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3911 [], d>, EVEX, EVEX_B, EVEX_RC;
3913 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3914 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3916 } // hasSideEffects = 0
3919 defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
3920 memopv16f32, f512mem, SSEPackedSingle>, PD,
3921 EVEX_V512, EVEX_CD8<32, CD8VF>;
3922 defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3923 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3924 EVEX_V512, EVEX_CD8<64, CD8VF>;
3926 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3927 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3928 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3930 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3931 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3932 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3934 defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3935 memopv16f32, f512mem, SSEPackedSingle>,
3936 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
3937 defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3938 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
3939 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
3941 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3942 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3943 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3945 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3946 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3947 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
3949 let Predicates = [HasAVX512] in {
3950 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3951 (VCVTPD2PSZrm addr:$src)>;
3952 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3953 (VCVTPS2PDZrm addr:$src)>;
3956 //===----------------------------------------------------------------------===//
3957 // Half precision conversion instructions
3958 //===----------------------------------------------------------------------===//
3959 multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3960 X86MemOperand x86memop> {
3961 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3962 "vcvtph2ps\t{$src, $dst|$dst, $src}",
3964 let hasSideEffects = 0, mayLoad = 1 in
3965 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3966 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3969 multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3970 X86MemOperand x86memop> {
3971 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3972 (ins srcRC:$src1, i32i8imm:$src2),
3973 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3975 let hasSideEffects = 0, mayStore = 1 in
3976 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3977 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
3978 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
3981 defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
3982 EVEX_CD8<32, CD8VH>;
3983 defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
3984 EVEX_CD8<32, CD8VH>;
3986 def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3987 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3988 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3990 def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3991 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3992 (VCVTPH2PSZrr VR256X:$src)>;
3994 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3995 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
3996 "ucomiss">, PS, EVEX, VEX_LIG,
3997 EVEX_CD8<32, CD8VT1>;
3998 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
3999 "ucomisd">, PD, EVEX,
4000 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4001 let Pattern = []<dag> in {
4002 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
4003 "comiss">, PS, EVEX, VEX_LIG,
4004 EVEX_CD8<32, CD8VT1>;
4005 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
4006 "comisd">, PD, EVEX,
4007 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4009 let isCodeGenOnly = 1 in {
4010 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
4011 load, "ucomiss">, PS, EVEX, VEX_LIG,
4012 EVEX_CD8<32, CD8VT1>;
4013 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
4014 load, "ucomisd">, PD, EVEX,
4015 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4017 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
4018 load, "comiss">, PS, EVEX, VEX_LIG,
4019 EVEX_CD8<32, CD8VT1>;
4020 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
4021 load, "comisd">, PD, EVEX,
4022 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4026 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
4027 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4028 X86MemOperand x86memop> {
4029 let hasSideEffects = 0 in {
4030 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4031 (ins RC:$src1, RC:$src2),
4032 !strconcat(OpcodeStr,
4033 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4034 let mayLoad = 1 in {
4035 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4036 (ins RC:$src1, x86memop:$src2),
4037 !strconcat(OpcodeStr,
4038 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4043 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
4044 EVEX_CD8<32, CD8VT1>;
4045 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
4046 VEX_W, EVEX_CD8<64, CD8VT1>;
4047 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
4048 EVEX_CD8<32, CD8VT1>;
4049 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
4050 VEX_W, EVEX_CD8<64, CD8VT1>;
4052 def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
4053 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4054 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4055 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4057 def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
4058 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4059 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4060 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4062 def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
4063 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4064 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4065 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4067 def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
4068 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4069 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4070 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4072 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
4073 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
4074 RegisterClass RC, X86MemOperand x86memop,
4075 PatFrag mem_frag, ValueType OpVt> {
4076 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4077 !strconcat(OpcodeStr,
4078 " \t{$src, $dst|$dst, $src}"),
4079 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
4081 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4082 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4083 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
4086 defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
4087 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4088 defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
4089 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4090 defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
4091 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4092 defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
4093 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4095 def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
4096 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4097 (VRSQRT14PSZr VR512:$src)>;
4098 def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
4099 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4100 (VRSQRT14PDZr VR512:$src)>;
4102 def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4103 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4104 (VRCP14PSZr VR512:$src)>;
4105 def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4106 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4107 (VRCP14PDZr VR512:$src)>;
4109 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4110 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4111 X86MemOperand x86memop> {
4112 let hasSideEffects = 0, Predicates = [HasERI] in {
4113 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4114 (ins RC:$src1, RC:$src2),
4115 !strconcat(OpcodeStr,
4116 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4117 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4118 (ins RC:$src1, RC:$src2),
4119 !strconcat(OpcodeStr,
4120 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
4121 []>, EVEX_4V, EVEX_B;
4122 let mayLoad = 1 in {
4123 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4124 (ins RC:$src1, x86memop:$src2),
4125 !strconcat(OpcodeStr,
4126 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4131 defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4132 EVEX_CD8<32, CD8VT1>;
4133 defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4134 VEX_W, EVEX_CD8<64, CD8VT1>;
4135 defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4136 EVEX_CD8<32, CD8VT1>;
4137 defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4138 VEX_W, EVEX_CD8<64, CD8VT1>;
4140 def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4141 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4143 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4144 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4146 def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4147 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4149 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4150 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4152 def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4153 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4155 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4156 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4158 def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4159 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4161 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4162 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4164 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4165 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4166 RegisterClass RC, X86MemOperand x86memop> {
4167 let hasSideEffects = 0, Predicates = [HasERI] in {
4168 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4169 !strconcat(OpcodeStr,
4170 " \t{$src, $dst|$dst, $src}"),
4172 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4173 !strconcat(OpcodeStr,
4174 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
4176 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4177 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4181 defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4182 EVEX_V512, EVEX_CD8<32, CD8VF>;
4183 defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4184 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4185 defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4186 EVEX_V512, EVEX_CD8<32, CD8VF>;
4187 defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4188 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4190 def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4191 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4192 (VRSQRT28PSZrb VR512:$src)>;
4193 def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4194 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4195 (VRSQRT28PDZrb VR512:$src)>;
4197 def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4198 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4199 (VRCP28PSZrb VR512:$src)>;
4200 def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4201 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4202 (VRCP28PDZrb VR512:$src)>;
4204 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
4205 OpndItins itins_s, OpndItins itins_d> {
4206 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4207 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4208 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4212 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4213 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4215 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4216 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4218 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4219 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4220 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4224 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4225 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4226 [(set VR512:$dst, (OpNode
4227 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4228 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4232 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4233 Intrinsic F32Int, Intrinsic F64Int,
4234 OpndItins itins_s, OpndItins itins_d> {
4235 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4236 (ins FR32X:$src1, FR32X:$src2),
4237 !strconcat(OpcodeStr,
4238 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4239 [], itins_s.rr>, XS, EVEX_4V;
4240 let isCodeGenOnly = 1 in
4241 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4242 (ins VR128X:$src1, VR128X:$src2),
4243 !strconcat(OpcodeStr,
4244 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4246 (F32Int VR128X:$src1, VR128X:$src2))],
4247 itins_s.rr>, XS, EVEX_4V;
4248 let mayLoad = 1 in {
4249 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4250 (ins FR32X:$src1, f32mem:$src2),
4251 !strconcat(OpcodeStr,
4252 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4253 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4254 let isCodeGenOnly = 1 in
4255 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4256 (ins VR128X:$src1, ssmem:$src2),
4257 !strconcat(OpcodeStr,
4258 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4260 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4261 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4263 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4264 (ins FR64X:$src1, FR64X:$src2),
4265 !strconcat(OpcodeStr,
4266 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4268 let isCodeGenOnly = 1 in
4269 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4270 (ins VR128X:$src1, VR128X:$src2),
4271 !strconcat(OpcodeStr,
4272 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4274 (F64Int VR128X:$src1, VR128X:$src2))],
4275 itins_s.rr>, XD, EVEX_4V, VEX_W;
4276 let mayLoad = 1 in {
4277 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4278 (ins FR64X:$src1, f64mem:$src2),
4279 !strconcat(OpcodeStr,
4280 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4281 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4282 let isCodeGenOnly = 1 in
4283 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4284 (ins VR128X:$src1, sdmem:$src2),
4285 !strconcat(OpcodeStr,
4286 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4288 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4289 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4294 defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4295 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4296 SSE_SQRTSS, SSE_SQRTSD>,
4297 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
4298 SSE_SQRTPS, SSE_SQRTPD>;
4300 let Predicates = [HasAVX512] in {
4301 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4302 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4303 (VSQRTPSZrr VR512:$src1)>;
4304 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4305 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4306 (VSQRTPDZrr VR512:$src1)>;
4308 def : Pat<(f32 (fsqrt FR32X:$src)),
4309 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4310 def : Pat<(f32 (fsqrt (load addr:$src))),
4311 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4312 Requires<[OptForSize]>;
4313 def : Pat<(f64 (fsqrt FR64X:$src)),
4314 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4315 def : Pat<(f64 (fsqrt (load addr:$src))),
4316 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4317 Requires<[OptForSize]>;
4319 def : Pat<(f32 (X86frsqrt FR32X:$src)),
4320 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4321 def : Pat<(f32 (X86frsqrt (load addr:$src))),
4322 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4323 Requires<[OptForSize]>;
4325 def : Pat<(f32 (X86frcp FR32X:$src)),
4326 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4327 def : Pat<(f32 (X86frcp (load addr:$src))),
4328 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4329 Requires<[OptForSize]>;
4331 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4332 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4333 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4335 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4336 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4338 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4339 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4340 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4342 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4343 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4347 multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4348 X86MemOperand x86memop, RegisterClass RC,
4349 PatFrag mem_frag32, PatFrag mem_frag64,
4350 Intrinsic V4F32Int, Intrinsic V2F64Int,
4352 let ExeDomain = SSEPackedSingle in {
4353 // Intrinsic operation, reg.
4354 // Vector intrinsic operation, reg
4355 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4356 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4357 !strconcat(OpcodeStr,
4358 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4359 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4361 // Vector intrinsic operation, mem
4362 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4363 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4364 !strconcat(OpcodeStr,
4365 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4367 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4368 EVEX_CD8<32, VForm>;
4369 } // ExeDomain = SSEPackedSingle
4371 let ExeDomain = SSEPackedDouble in {
4372 // Vector intrinsic operation, reg
4373 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4374 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4375 !strconcat(OpcodeStr,
4376 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4377 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4379 // Vector intrinsic operation, mem
4380 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4381 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4382 !strconcat(OpcodeStr,
4383 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4385 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4386 EVEX_CD8<64, VForm>;
4387 } // ExeDomain = SSEPackedDouble
4390 multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4394 let ExeDomain = GenericDomain in {
4396 let hasSideEffects = 0 in
4397 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4398 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4399 !strconcat(OpcodeStr,
4400 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4403 // Intrinsic operation, reg.
4404 let isCodeGenOnly = 1 in
4405 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4406 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4407 !strconcat(OpcodeStr,
4408 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4409 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4411 // Intrinsic operation, mem.
4412 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4413 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4414 !strconcat(OpcodeStr,
4415 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4416 [(set VR128X:$dst, (F32Int VR128X:$src1,
4417 sse_load_f32:$src2, imm:$src3))]>,
4418 EVEX_CD8<32, CD8VT1>;
4421 let hasSideEffects = 0 in
4422 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4423 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4424 !strconcat(OpcodeStr,
4425 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4428 // Intrinsic operation, reg.
4429 let isCodeGenOnly = 1 in
4430 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4431 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4432 !strconcat(OpcodeStr,
4433 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4434 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4437 // Intrinsic operation, mem.
4438 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4439 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4440 !strconcat(OpcodeStr,
4441 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4443 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4444 VEX_W, EVEX_CD8<64, CD8VT1>;
4445 } // ExeDomain = GenericDomain
4448 multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4449 X86MemOperand x86memop, RegisterClass RC,
4450 PatFrag mem_frag, Domain d> {
4451 let ExeDomain = d in {
4452 // Intrinsic operation, reg.
4453 // Vector intrinsic operation, reg
4454 def r : AVX512AIi8<opc, MRMSrcReg,
4455 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4456 !strconcat(OpcodeStr,
4457 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4460 // Vector intrinsic operation, mem
4461 def m : AVX512AIi8<opc, MRMSrcMem,
4462 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4463 !strconcat(OpcodeStr,
4464 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4470 defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4471 memopv16f32, SSEPackedSingle>, EVEX_V512,
4472 EVEX_CD8<32, CD8VF>;
4474 def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
4475 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
4477 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4480 defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4481 memopv8f64, SSEPackedDouble>, EVEX_V512,
4482 VEX_W, EVEX_CD8<64, CD8VF>;
4484 def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
4485 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
4487 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4489 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4490 Operand x86memop, RegisterClass RC, Domain d> {
4491 let ExeDomain = d in {
4492 def r : AVX512AIi8<opc, MRMSrcReg,
4493 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4494 !strconcat(OpcodeStr,
4495 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4498 def m : AVX512AIi8<opc, MRMSrcMem,
4499 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4500 !strconcat(OpcodeStr,
4501 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4506 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4507 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4509 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4510 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4512 def : Pat<(ffloor FR32X:$src),
4513 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4514 def : Pat<(f64 (ffloor FR64X:$src)),
4515 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4516 def : Pat<(f32 (fnearbyint FR32X:$src)),
4517 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4518 def : Pat<(f64 (fnearbyint FR64X:$src)),
4519 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4520 def : Pat<(f32 (fceil FR32X:$src)),
4521 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4522 def : Pat<(f64 (fceil FR64X:$src)),
4523 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4524 def : Pat<(f32 (frint FR32X:$src)),
4525 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4526 def : Pat<(f64 (frint FR64X:$src)),
4527 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4528 def : Pat<(f32 (ftrunc FR32X:$src)),
4529 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4530 def : Pat<(f64 (ftrunc FR64X:$src)),
4531 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4533 def : Pat<(v16f32 (ffloor VR512:$src)),
4534 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
4535 def : Pat<(v16f32 (fnearbyint VR512:$src)),
4536 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
4537 def : Pat<(v16f32 (fceil VR512:$src)),
4538 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
4539 def : Pat<(v16f32 (frint VR512:$src)),
4540 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
4541 def : Pat<(v16f32 (ftrunc VR512:$src)),
4542 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
4544 def : Pat<(v8f64 (ffloor VR512:$src)),
4545 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
4546 def : Pat<(v8f64 (fnearbyint VR512:$src)),
4547 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
4548 def : Pat<(v8f64 (fceil VR512:$src)),
4549 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
4550 def : Pat<(v8f64 (frint VR512:$src)),
4551 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
4552 def : Pat<(v8f64 (ftrunc VR512:$src)),
4553 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
4555 //-------------------------------------------------
4556 // Integer truncate and extend operations
4557 //-------------------------------------------------
4559 multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4560 RegisterClass dstRC, RegisterClass srcRC,
4561 RegisterClass KRC, X86MemOperand x86memop> {
4562 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4564 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4567 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4568 (ins KRC:$mask, srcRC:$src),
4569 !strconcat(OpcodeStr,
4570 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4573 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4574 (ins KRC:$mask, srcRC:$src),
4575 !strconcat(OpcodeStr,
4576 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4579 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
4580 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4583 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4584 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4585 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4589 defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4590 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4591 defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4592 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4593 defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4594 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4595 defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4596 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4597 defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4598 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4599 defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4600 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4601 defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4602 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4603 defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4604 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4605 defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4606 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4607 defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4608 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4609 defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4610 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4611 defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4612 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4613 defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4614 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4615 defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4616 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4617 defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4618 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4620 def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4621 def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4622 def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4623 def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4624 def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4626 def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4627 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
4628 def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4629 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
4630 def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4631 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
4632 def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4633 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
4636 multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4637 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4638 PatFrag mem_frag, X86MemOperand x86memop,
4639 ValueType OpVT, ValueType InVT> {
4641 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4643 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4644 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
4646 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4647 (ins KRC:$mask, SrcRC:$src),
4648 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4651 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4652 (ins KRC:$mask, SrcRC:$src),
4653 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4656 let mayLoad = 1 in {
4657 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4658 (ins x86memop:$src),
4659 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4661 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4664 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4665 (ins KRC:$mask, x86memop:$src),
4666 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4670 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4671 (ins KRC:$mask, x86memop:$src),
4672 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4678 defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
4679 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4681 defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
4682 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4684 defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
4685 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4686 EVEX_CD8<16, CD8VH>;
4687 defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
4688 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4689 EVEX_CD8<16, CD8VQ>;
4690 defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
4691 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4692 EVEX_CD8<32, CD8VH>;
4694 defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
4695 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4697 defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
4698 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4700 defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
4701 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4702 EVEX_CD8<16, CD8VH>;
4703 defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
4704 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4705 EVEX_CD8<16, CD8VQ>;
4706 defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
4707 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4708 EVEX_CD8<32, CD8VH>;
4710 //===----------------------------------------------------------------------===//
4711 // GATHER - SCATTER Operations
4713 multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4714 RegisterClass RC, X86MemOperand memop> {
4716 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4717 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4718 (ins RC:$src1, KRC:$mask, memop:$src2),
4719 !strconcat(OpcodeStr,
4720 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4724 let ExeDomain = SSEPackedDouble in {
4725 defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4726 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4727 defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4728 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4731 let ExeDomain = SSEPackedSingle in {
4732 defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4733 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4734 defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4735 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4738 defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4739 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4740 defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4741 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4743 defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4744 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4745 defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4746 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4748 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4749 RegisterClass RC, X86MemOperand memop> {
4750 let mayStore = 1, Constraints = "$mask = $mask_wb" in
4751 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4752 (ins memop:$dst, KRC:$mask, RC:$src2),
4753 !strconcat(OpcodeStr,
4754 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4758 let ExeDomain = SSEPackedDouble in {
4759 defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4760 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4761 defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4762 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4765 let ExeDomain = SSEPackedSingle in {
4766 defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4767 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4768 defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4769 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4772 defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4773 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4774 defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4775 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4777 defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4778 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4779 defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4780 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4783 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4784 RegisterClass KRC, X86MemOperand memop> {
4785 let Predicates = [HasPFI], hasSideEffects = 1 in
4786 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4787 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4791 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4792 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4794 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4795 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4797 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4798 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4800 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4801 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4803 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4804 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4806 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4807 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4809 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4810 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4812 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4813 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4815 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4816 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4818 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4819 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4821 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4822 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4824 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4825 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4827 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4828 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4830 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4831 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4833 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4834 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4836 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4837 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4838 //===----------------------------------------------------------------------===//
4839 // VSHUFPS - VSHUFPD Operations
4841 multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4842 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4844 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4845 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4846 !strconcat(OpcodeStr,
4847 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4848 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4849 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4850 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
4851 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4852 (ins RC:$src1, RC:$src2, i8imm:$src3),
4853 !strconcat(OpcodeStr,
4854 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4855 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4856 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4857 EVEX_4V, Sched<[WriteShuffle]>;
4860 defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
4861 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
4862 defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
4863 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4865 def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4866 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4867 def : Pat<(v16i32 (X86Shufp VR512:$src1,
4868 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4869 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4871 def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4872 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4873 def : Pat<(v8i64 (X86Shufp VR512:$src1,
4874 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4875 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4877 multiclass avx512_valign<X86VectorVTInfo _> {
4878 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4879 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4881 "$src3, $src2, $src1", "$src1, $src2, $src3",
4882 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4884 _.VT, _.RC, _.KRCWM>,
4885 AVX512AIi8Base, EVEX_4V;
4887 // Also match valign of packed floats.
4888 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4889 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
4892 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4893 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4894 !strconcat("valign"##_.Suffix,
4895 " \t{$src3, $src2, $src1, $dst|"
4896 "$dst, $src1, $src2, $src3}"),
4899 defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4900 defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4902 // Helper fragments to match sext vXi1 to vXiY.
4903 def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4904 def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4906 multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4907 RegisterClass KRC, RegisterClass RC,
4908 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4910 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4911 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4913 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4914 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4916 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4917 !strconcat(OpcodeStr,
4918 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4920 let mayLoad = 1 in {
4921 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4922 (ins x86memop:$src),
4923 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4925 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4926 (ins KRC:$mask, x86memop:$src),
4927 !strconcat(OpcodeStr,
4928 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4930 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4931 (ins KRC:$mask, x86memop:$src),
4932 !strconcat(OpcodeStr,
4933 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4935 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4936 (ins x86scalar_mop:$src),
4937 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4938 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4940 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4941 (ins KRC:$mask, x86scalar_mop:$src),
4942 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4943 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4944 []>, EVEX, EVEX_B, EVEX_K;
4945 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4946 (ins KRC:$mask, x86scalar_mop:$src),
4947 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4948 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4950 []>, EVEX, EVEX_B, EVEX_KZ;
4954 defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4955 i512mem, i32mem, "{1to16}">, EVEX_V512,
4956 EVEX_CD8<32, CD8VF>;
4957 defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4958 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4959 EVEX_CD8<64, CD8VF>;
4962 (bc_v16i32 (v16i1sextv16i32)),
4963 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4964 (VPABSDZrr VR512:$src)>;
4966 (bc_v8i64 (v8i1sextv8i64)),
4967 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4968 (VPABSQZrr VR512:$src)>;
4970 def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4971 (v16i32 immAllZerosV), (i16 -1))),
4972 (VPABSDZrr VR512:$src)>;
4973 def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4974 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
4975 (VPABSQZrr VR512:$src)>;
4977 multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
4978 RegisterClass RC, RegisterClass KRC,
4979 X86MemOperand x86memop,
4980 X86MemOperand x86scalar_mop, string BrdcstStr> {
4981 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4983 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
4985 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4986 (ins x86memop:$src),
4987 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
4989 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4990 (ins x86scalar_mop:$src),
4991 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4992 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4994 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4995 (ins KRC:$mask, RC:$src),
4996 !strconcat(OpcodeStr,
4997 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4999 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5000 (ins KRC:$mask, x86memop:$src),
5001 !strconcat(OpcodeStr,
5002 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
5004 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5005 (ins KRC:$mask, x86scalar_mop:$src),
5006 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
5007 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
5009 []>, EVEX, EVEX_KZ, EVEX_B;
5011 let Constraints = "$src1 = $dst" in {
5012 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
5013 (ins RC:$src1, KRC:$mask, RC:$src2),
5014 !strconcat(OpcodeStr,
5015 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
5017 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5018 (ins RC:$src1, KRC:$mask, x86memop:$src2),
5019 !strconcat(OpcodeStr,
5020 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
5022 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5023 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
5024 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
5025 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
5026 []>, EVEX, EVEX_K, EVEX_B;
5030 let Predicates = [HasCDI] in {
5031 defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
5032 i512mem, i32mem, "{1to16}">,
5033 EVEX_V512, EVEX_CD8<32, CD8VF>;
5036 defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
5037 i512mem, i64mem, "{1to8}">,
5038 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5042 def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
5044 (VPCONFLICTDrrk VR512:$src1,
5045 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5047 def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
5049 (VPCONFLICTQrrk VR512:$src1,
5050 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5052 let Predicates = [HasCDI] in {
5053 defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
5054 i512mem, i32mem, "{1to16}">,
5055 EVEX_V512, EVEX_CD8<32, CD8VF>;
5058 defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
5059 i512mem, i64mem, "{1to8}">,
5060 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5064 def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
5066 (VPLZCNTDrrk VR512:$src1,
5067 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5069 def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
5071 (VPLZCNTQrrk VR512:$src1,
5072 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5074 def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
5075 (VPLZCNTDrm addr:$src)>;
5076 def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
5077 (VPLZCNTDrr VR512:$src)>;
5078 def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
5079 (VPLZCNTQrm addr:$src)>;
5080 def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
5081 (VPLZCNTQrr VR512:$src)>;
5083 def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5084 def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5085 def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
5087 def : Pat<(store VK1:$src, addr:$dst),
5088 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
5090 def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
5091 (truncstore node:$val, node:$ptr), [{
5092 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
5095 def : Pat<(truncstorei1 GR8:$src, addr:$dst),
5096 (MOV8mr addr:$dst, GR8:$src)>;