1 // Group template arguments that can be derived from the vector type (EltNum x
2 // EltVT). These are things like the register class for the writemask, etc.
3 // The idea is to pass one of these as the template argument rather than the
4 // individual arguments.
5 class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
25 string VTName = "v" # NumElts # EltVT;
28 ValueType VT = !cast<ValueType>(VTName);
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
32 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
35 // "i" for integer types and "f" for floating-point types
36 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
38 // Size of RC in bits, e.g. 512 for VR512.
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
43 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
55 // The corresponding float type, e.g. v16f32 for v16i32
56 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
69 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
70 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
71 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
72 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
74 // "x" in v32i8x_info means RC = VR256X
75 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
76 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
77 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
78 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
80 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
81 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
82 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
83 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
85 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
86 X86VectorVTInfo i128> {
87 X86VectorVTInfo info512 = i512;
88 X86VectorVTInfo info256 = i256;
89 X86VectorVTInfo info128 = i128;
92 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
94 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
96 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
98 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
102 // Common base class of AVX512_masking and AVX512_masking_3src.
103 multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
104 dag MaskingIns, dag ZeroMaskingIns,
106 string AttSrcAsm, string IntelSrcAsm,
107 dag RHS, dag MaskingRHS, ValueType OpVT,
108 RegisterClass RC, RegisterClass KRC,
109 string MaskingConstraint = ""> {
110 def NAME: AVX512<O, F, Outs, Ins,
111 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
112 "$dst, "#IntelSrcAsm#"}",
113 [(set RC:$dst, RHS)]>;
115 // Prefer over VMOV*rrk Pat<>
116 let AddedComplexity = 20 in
117 def NAME#k: AVX512<O, F, Outs, MaskingIns,
118 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
119 "$dst {${mask}}, "#IntelSrcAsm#"}",
120 [(set RC:$dst, MaskingRHS)]>,
122 // In case of the 3src subclass this is overridden with a let.
123 string Constraints = MaskingConstraint;
125 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
126 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
127 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
128 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
130 (vselect KRC:$mask, RHS,
132 (v16i32 immAllZerosV)))))]>,
136 // This multiclass generates the unconditional/non-masking, the masking and
137 // the zero-masking variant of the instruction. In the masking case, the
138 // perserved vector elements come from a new dummy input operand tied to $dst.
139 multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
141 string AttSrcAsm, string IntelSrcAsm,
142 dag RHS, ValueType OpVT, RegisterClass RC,
144 AVX512_masking_common<O, F, Outs,
146 !con((ins RC:$src0, KRC:$mask), Ins),
147 !con((ins KRC:$mask), Ins),
148 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
149 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
152 // Similar to AVX512_masking but in this case one of the source operands
153 // ($src1) is already tied to $dst so we just use that for the preserved
154 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
156 multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
158 string AttSrcAsm, string IntelSrcAsm,
159 dag RHS, ValueType OpVT,
160 RegisterClass RC, RegisterClass KRC> :
161 AVX512_masking_common<O, F, Outs,
162 !con((ins RC:$src1), NonTiedIns),
163 !con((ins RC:$src1), !con((ins KRC:$mask),
165 !con((ins RC:$src1), !con((ins KRC:$mask),
167 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
168 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
170 // Bitcasts between 512-bit vector types. Return the original type since
171 // no instruction is needed for the conversion
172 let Predicates = [HasAVX512] in {
173 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
174 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
175 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
176 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
177 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
178 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
179 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
180 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
181 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
182 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
183 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
184 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
185 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
186 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
187 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
188 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
189 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
190 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
191 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
192 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
193 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
194 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
195 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
196 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
197 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
200 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
201 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
202 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
203 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
205 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
206 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
207 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
208 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
209 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
210 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
211 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
212 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
213 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
214 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
215 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
216 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
217 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
218 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
219 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
220 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
221 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
222 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
223 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
224 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
225 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
226 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
227 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
228 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
229 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
230 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
231 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
232 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
233 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
234 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
236 // Bitcasts between 256-bit vector types. Return the original type since
237 // no instruction is needed for the conversion
238 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
239 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
240 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
241 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
242 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
243 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
244 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
245 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
246 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
247 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
248 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
249 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
250 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
251 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
252 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
253 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
254 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
255 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
256 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
257 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
258 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
259 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
260 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
261 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
262 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
263 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
264 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
265 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
266 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
267 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
271 // AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
274 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
275 isPseudo = 1, Predicates = [HasAVX512] in {
276 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
277 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
280 let Predicates = [HasAVX512] in {
281 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
282 def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
283 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
286 //===----------------------------------------------------------------------===//
287 // AVX-512 - VECTOR INSERT
290 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
291 def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
292 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
293 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
294 []>, EVEX_4V, EVEX_V512;
296 def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
297 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
302 // -- 64x4 fp form --
303 let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
304 def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
305 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
306 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
307 []>, EVEX_4V, EVEX_V512, VEX_W;
309 def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
310 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
314 // -- 32x4 integer form --
315 let hasSideEffects = 0 in {
316 def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
317 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
318 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512;
321 def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
322 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
327 let hasSideEffects = 0 in {
329 def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
330 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
331 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
332 []>, EVEX_4V, EVEX_V512, VEX_W;
334 def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
335 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
340 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
341 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
342 (INSERT_get_vinsert128_imm VR512:$ins))>;
343 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
344 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
345 (INSERT_get_vinsert128_imm VR512:$ins))>;
346 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
347 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
348 (INSERT_get_vinsert128_imm VR512:$ins))>;
349 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
350 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
351 (INSERT_get_vinsert128_imm VR512:$ins))>;
353 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
354 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
357 (bc_v4i32 (loadv2i64 addr:$src2)),
358 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
359 (INSERT_get_vinsert128_imm VR512:$ins))>;
360 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
364 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
365 (INSERT_get_vinsert128_imm VR512:$ins))>;
367 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
368 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
369 (INSERT_get_vinsert256_imm VR512:$ins))>;
370 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
371 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
372 (INSERT_get_vinsert256_imm VR512:$ins))>;
373 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
374 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
375 (INSERT_get_vinsert256_imm VR512:$ins))>;
376 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
377 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
378 (INSERT_get_vinsert256_imm VR512:$ins))>;
380 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
381 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
384 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386 def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
387 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
388 (INSERT_get_vinsert256_imm VR512:$ins))>;
389 def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
390 (bc_v8i32 (loadv4i64 addr:$src2)),
391 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
394 // vinsertps - insert f32 to XMM
395 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
396 (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
397 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
398 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
400 def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
401 (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
403 [(set VR128X:$dst, (X86insertps VR128X:$src1,
404 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
405 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
407 //===----------------------------------------------------------------------===//
408 // AVX-512 VECTOR EXTRACT
410 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
412 def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
413 (ins VR512:$src1, i8imm:$src2),
414 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
415 []>, EVEX, EVEX_V512;
416 def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
417 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
418 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
419 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
422 def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
423 (ins VR512:$src1, i8imm:$src2),
424 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
425 []>, EVEX, EVEX_V512, VEX_W;
427 def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
428 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
429 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
430 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
433 let hasSideEffects = 0 in {
435 def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
436 (ins VR512:$src1, i8imm:$src2),
437 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
438 []>, EVEX, EVEX_V512;
439 def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
440 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
441 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
442 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
445 def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
446 (ins VR512:$src1, i8imm:$src2),
447 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
448 []>, EVEX, EVEX_V512, VEX_W;
450 def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
451 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
452 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
453 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
456 def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
457 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
458 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
460 def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
461 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
462 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
464 def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
465 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
466 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
468 def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
469 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
470 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
473 def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
474 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
475 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
477 def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
478 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
479 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
481 def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
482 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
483 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
485 def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
486 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
487 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
489 // A 256-bit subvector extract from the first 512-bit vector position
490 // is a subregister copy that needs no instruction.
491 def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
492 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
493 def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
494 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
495 def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
496 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
497 def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
498 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
501 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
502 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
503 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
504 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
505 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
506 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
507 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
508 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
511 // A 128-bit subvector insert to the first 512-bit vector position
512 // is a subregister copy that needs no instruction.
513 def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
515 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
517 def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
518 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
519 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
521 def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
523 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
525 def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
526 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
527 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
530 def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
531 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
532 def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
533 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
534 def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
535 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
536 def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
537 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
539 // vextractps - extract 32 bits from XMM
540 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
541 (ins VR128X:$src1, u32u8imm:$src2),
542 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
543 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
546 def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
547 (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
548 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
549 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
550 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
552 //===---------------------------------------------------------------------===//
555 multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
556 RegisterClass DestRC,
557 RegisterClass SrcRC, X86MemOperand x86memop> {
558 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
559 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
561 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
562 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
564 let ExeDomain = SSEPackedSingle in {
565 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
567 EVEX_V512, EVEX_CD8<32, CD8VT1>;
570 let ExeDomain = SSEPackedDouble in {
571 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
573 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
576 def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
577 (VBROADCASTSSZrm addr:$src)>;
578 def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
579 (VBROADCASTSDZrm addr:$src)>;
581 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
582 (VBROADCASTSSZrm addr:$src)>;
583 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
584 (VBROADCASTSDZrm addr:$src)>;
586 multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
587 RegisterClass SrcRC, RegisterClass KRC> {
588 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
589 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
590 []>, EVEX, EVEX_V512;
591 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
592 (ins KRC:$mask, SrcRC:$src),
593 !strconcat(OpcodeStr,
594 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
595 []>, EVEX, EVEX_V512, EVEX_KZ;
598 defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
599 defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
602 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
603 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
605 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
606 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
608 def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
609 (VPBROADCASTDrZrr GR32:$src)>;
610 def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
611 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
612 def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
613 (VPBROADCASTQrZrr GR64:$src)>;
614 def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
615 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
617 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
618 (VPBROADCASTDrZrr GR32:$src)>;
619 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
620 (VPBROADCASTQrZrr GR64:$src)>;
622 def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
623 (v16i32 immAllZerosV), (i16 GR16:$mask))),
624 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
625 def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
626 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
627 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
629 multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
630 X86MemOperand x86memop, PatFrag ld_frag,
631 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
633 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
634 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
636 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
637 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
639 !strconcat(OpcodeStr,
640 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
642 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
645 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
646 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
648 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
649 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
651 !strconcat(OpcodeStr,
652 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
653 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
654 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
658 defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
659 loadi32, VR512, v16i32, v4i32, VK16WM>,
660 EVEX_V512, EVEX_CD8<32, CD8VT1>;
661 defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
662 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
663 EVEX_CD8<64, CD8VT1>;
665 multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
666 X86MemOperand x86memop, PatFrag ld_frag,
669 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
670 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
672 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
674 !strconcat(OpcodeStr,
675 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
680 defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
681 i128mem, loadv2i64, VK16WM>,
682 EVEX_V512, EVEX_CD8<32, CD8VT4>;
683 defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
684 i256mem, loadv4i64, VK16WM>, VEX_W,
685 EVEX_V512, EVEX_CD8<64, CD8VT4>;
687 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
688 (VPBROADCASTDZrr VR128X:$src)>;
689 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
690 (VPBROADCASTQZrr VR128X:$src)>;
692 def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
693 (VBROADCASTSSZrr VR128X:$src)>;
694 def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
695 (VBROADCASTSDZrr VR128X:$src)>;
697 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
698 (VBROADCASTSSZrr VR128X:$src)>;
699 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
700 (VBROADCASTSDZrr VR128X:$src)>;
702 // Provide fallback in case the load node that is used in the patterns above
703 // is used by additional users, which prevents the pattern selection.
704 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
705 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
706 def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
707 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
710 let Predicates = [HasAVX512] in {
711 def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
713 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
714 addr:$src)), sub_ymm)>;
716 //===----------------------------------------------------------------------===//
717 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
720 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
721 RegisterClass DstRC, RegisterClass KRC,
722 ValueType OpVT, ValueType SrcVT> {
723 def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
724 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
728 let Predicates = [HasCDI] in {
729 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
730 VK16, v16i32, v16i1>, EVEX_V512;
731 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
732 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
735 //===----------------------------------------------------------------------===//
738 // -- immediate form --
739 multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
740 SDNode OpNode, PatFrag mem_frag,
741 X86MemOperand x86memop, ValueType OpVT> {
742 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
743 (ins RC:$src1, i8imm:$src2),
744 !strconcat(OpcodeStr,
745 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
747 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
749 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
750 (ins x86memop:$src1, i8imm:$src2),
751 !strconcat(OpcodeStr,
752 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
754 (OpVT (OpNode (mem_frag addr:$src1),
755 (i8 imm:$src2))))]>, EVEX;
758 defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
759 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760 let ExeDomain = SSEPackedDouble in
761 defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
762 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
764 // -- VPERM - register form --
765 multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
766 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
768 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
769 (ins RC:$src1, RC:$src2),
770 !strconcat(OpcodeStr,
771 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
773 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
775 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
776 (ins RC:$src1, x86memop:$src2),
777 !strconcat(OpcodeStr,
778 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
780 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
784 defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
785 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
786 defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
787 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
788 let ExeDomain = SSEPackedSingle in
789 defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
790 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
791 let ExeDomain = SSEPackedDouble in
792 defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
793 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
795 // -- VPERM2I - 3 source operands form --
796 multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
797 PatFrag mem_frag, X86MemOperand x86memop,
798 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
799 let Constraints = "$src1 = $dst" in {
800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
801 (ins RC:$src1, RC:$src2, RC:$src3),
802 !strconcat(OpcodeStr,
803 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
805 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
808 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
809 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
810 !strconcat(OpcodeStr,
811 " \t{$src3, $src2, $dst {${mask}}|"
812 "$dst {${mask}}, $src2, $src3}"),
813 [(set RC:$dst, (OpVT (vselect KRC:$mask,
814 (OpNode RC:$src1, RC:$src2,
819 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
821 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
822 !strconcat(OpcodeStr,
823 " \t{$src3, $src2, $dst {${mask}} {z} |",
824 "$dst {${mask}} {z}, $src2, $src3}"),
825 [(set RC:$dst, (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
829 (v16i32 immAllZerosV))))))]>,
832 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
833 (ins RC:$src1, RC:$src2, x86memop:$src3),
834 !strconcat(OpcodeStr,
835 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
837 (OpVT (OpNode RC:$src1, RC:$src2,
838 (mem_frag addr:$src3))))]>, EVEX_4V;
840 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
841 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
842 !strconcat(OpcodeStr,
843 " \t{$src3, $src2, $dst {${mask}}|"
844 "$dst {${mask}}, $src2, $src3}"),
846 (OpVT (vselect KRC:$mask,
847 (OpNode RC:$src1, RC:$src2,
848 (mem_frag addr:$src3)),
852 let AddedComplexity = 10 in // Prefer over the rrkz variant
853 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
854 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
855 !strconcat(OpcodeStr,
856 " \t{$src3, $src2, $dst {${mask}} {z}|"
857 "$dst {${mask}} {z}, $src2, $src3}"),
859 (OpVT (vselect KRC:$mask,
860 (OpNode RC:$src1, RC:$src2,
861 (mem_frag addr:$src3)),
863 (v16i32 immAllZerosV))))))]>,
867 defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
868 i512mem, X86VPermiv3, v16i32, VK16WM>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
870 defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
871 i512mem, X86VPermiv3, v8i64, VK8WM>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
873 defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
874 i512mem, X86VPermiv3, v16f32, VK16WM>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
876 defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
877 i512mem, X86VPermiv3, v8f64, VK8WM>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
880 multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
881 PatFrag mem_frag, X86MemOperand x86memop,
882 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
883 ValueType MaskVT, RegisterClass MRC> :
884 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
886 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
887 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
888 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
890 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
891 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
892 (!cast<Instruction>(NAME#rrk) VR512:$src1,
893 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
896 defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
897 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
898 EVEX_V512, EVEX_CD8<32, CD8VF>;
899 defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
900 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
901 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
902 defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
903 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
904 EVEX_V512, EVEX_CD8<32, CD8VF>;
905 defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
906 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
907 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
909 //===----------------------------------------------------------------------===//
910 // AVX-512 - BLEND using mask
912 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
913 RegisterClass KRC, RegisterClass RC,
914 X86MemOperand x86memop, PatFrag mem_frag,
915 SDNode OpNode, ValueType vt> {
916 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
917 (ins KRC:$mask, RC:$src1, RC:$src2),
918 !strconcat(OpcodeStr,
919 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
920 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
921 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
923 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
924 (ins KRC:$mask, RC:$src1, x86memop:$src2),
925 !strconcat(OpcodeStr,
926 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
927 []>, EVEX_4V, EVEX_K;
930 let ExeDomain = SSEPackedSingle in
931 defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
932 VK16WM, VR512, f512mem,
933 memopv16f32, vselect, v16f32>,
934 EVEX_CD8<32, CD8VF>, EVEX_V512;
935 let ExeDomain = SSEPackedDouble in
936 defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
937 VK8WM, VR512, f512mem,
938 memopv8f64, vselect, v8f64>,
939 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
941 def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
942 (v16f32 VR512:$src2), (i16 GR16:$mask))),
943 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
944 VR512:$src1, VR512:$src2)>;
946 def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
947 (v8f64 VR512:$src2), (i8 GR8:$mask))),
948 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
949 VR512:$src1, VR512:$src2)>;
951 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
952 VK16WM, VR512, f512mem,
953 memopv16i32, vselect, v16i32>,
954 EVEX_CD8<32, CD8VF>, EVEX_V512;
956 defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
957 VK8WM, VR512, f512mem,
958 memopv8i64, vselect, v8i64>,
959 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
961 def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
962 (v16i32 VR512:$src2), (i16 GR16:$mask))),
963 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
964 VR512:$src1, VR512:$src2)>;
966 def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
967 (v8i64 VR512:$src2), (i8 GR8:$mask))),
968 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
969 VR512:$src1, VR512:$src2)>;
971 let Predicates = [HasAVX512] in {
972 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
973 (v8f32 VR256X:$src2))),
975 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
976 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
977 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
979 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
980 (v8i32 VR256X:$src2))),
982 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
983 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
984 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
986 //===----------------------------------------------------------------------===//
987 // Compare Instructions
988 //===----------------------------------------------------------------------===//
990 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
991 multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
992 Operand CC, SDNode OpNode, ValueType VT,
993 PatFrag ld_frag, string asm, string asm_alt> {
994 def rr : AVX512Ii8<0xC2, MRMSrcReg,
995 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
996 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
997 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
998 def rm : AVX512Ii8<0xC2, MRMSrcMem,
999 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
1000 [(set VK1:$dst, (OpNode (VT RC:$src1),
1001 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1002 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1003 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
1004 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1005 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1006 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
1007 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1008 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1012 let Predicates = [HasAVX512] in {
1013 defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
1014 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1015 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1017 defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
1018 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1019 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1023 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1024 X86VectorVTInfo _> {
1025 def rr : AVX512BI<opc, MRMSrcReg,
1026 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1027 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1028 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
1029 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1031 def rm : AVX512BI<opc, MRMSrcMem,
1032 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1034 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1035 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
1036 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1037 def rrk : AVX512BI<opc, MRMSrcReg,
1038 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1040 "$dst {${mask}}, $src1, $src2}"),
1041 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1042 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1043 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1045 def rmk : AVX512BI<opc, MRMSrcMem,
1046 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1048 "$dst {${mask}}, $src1, $src2}"),
1049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1050 (OpNode (_.VT _.RC:$src1),
1052 (_.LdFrag addr:$src2))))))],
1053 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1056 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1057 X86VectorVTInfo _> {
1058 let mayLoad = 1 in {
1059 def rmb : AVX512BI<opc, MRMSrcMem,
1060 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1061 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1062 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1063 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1064 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1065 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1066 def rmbk : AVX512BI<opc, MRMSrcMem,
1067 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1068 _.ScalarMemOp:$src2),
1069 !strconcat(OpcodeStr,
1070 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1071 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1072 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1073 (OpNode (_.VT _.RC:$src1),
1075 (_.ScalarLdFrag addr:$src2)))))],
1076 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1080 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1081 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1082 let Predicates = [prd] in
1083 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1086 let Predicates = [prd, HasVLX] in {
1087 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1089 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1094 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1095 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1097 let Predicates = [prd] in
1098 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1101 let Predicates = [prd, HasVLX] in {
1102 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1104 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1109 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1110 avx512vl_i8_info, HasBWI>,
1113 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1114 avx512vl_i16_info, HasBWI>,
1115 EVEX_CD8<16, CD8VF>;
1117 defm VPCMPEQD : avx512_icmp_packed_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1118 avx512vl_i32_info, HasAVX512>,
1119 avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1120 avx512vl_i32_info, HasAVX512>,
1121 EVEX_CD8<32, CD8VF>;
1123 defm VPCMPEQQ : avx512_icmp_packed_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1124 avx512vl_i64_info, HasAVX512>,
1125 avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1126 avx512vl_i64_info, HasAVX512>,
1127 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1129 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1130 avx512vl_i8_info, HasBWI>,
1133 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1134 avx512vl_i16_info, HasBWI>,
1135 EVEX_CD8<16, CD8VF>;
1137 defm VPCMPGTD : avx512_icmp_packed_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1138 avx512vl_i32_info, HasAVX512>,
1139 avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1140 avx512vl_i32_info, HasAVX512>,
1141 EVEX_CD8<32, CD8VF>;
1143 defm VPCMPGTQ : avx512_icmp_packed_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1144 avx512vl_i64_info, HasAVX512>,
1145 avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1146 avx512vl_i64_info, HasAVX512>,
1147 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1149 def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1150 (COPY_TO_REGCLASS (VPCMPGTDZrr
1151 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1152 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1154 def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1155 (COPY_TO_REGCLASS (VPCMPEQDZrr
1156 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1157 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1159 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1160 X86VectorVTInfo _> {
1161 def rri : AVX512AIi8<opc, MRMSrcReg,
1162 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1163 !strconcat("vpcmp${cc}", Suffix,
1164 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1165 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1167 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1169 def rmi : AVX512AIi8<opc, MRMSrcMem,
1170 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
1171 !strconcat("vpcmp${cc}", Suffix,
1172 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1173 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1174 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1176 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1177 def rrik : AVX512AIi8<opc, MRMSrcReg,
1178 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1180 !strconcat("vpcmp${cc}", Suffix,
1181 "\t{$src2, $src1, $dst {${mask}}|",
1182 "$dst {${mask}}, $src1, $src2}"),
1183 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1184 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1186 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1188 def rmik : AVX512AIi8<opc, MRMSrcMem,
1189 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1191 !strconcat("vpcmp${cc}", Suffix,
1192 "\t{$src2, $src1, $dst {${mask}}|",
1193 "$dst {${mask}}, $src1, $src2}"),
1194 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1195 (OpNode (_.VT _.RC:$src1),
1196 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1198 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1200 // Accept explicit immediate argument form instead of comparison code.
1201 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1202 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
1203 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
1204 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1205 "$dst, $src1, $src2, $cc}"),
1206 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1207 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
1208 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
1209 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1210 "$dst, $src1, $src2, $cc}"),
1211 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1212 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1213 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1215 !strconcat("vpcmp", Suffix,
1216 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1217 "$dst {${mask}}, $src1, $src2, $cc}"),
1218 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1219 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1220 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1222 !strconcat("vpcmp", Suffix,
1223 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1224 "$dst {${mask}}, $src1, $src2, $cc}"),
1225 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1229 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
1230 X86VectorVTInfo _> {
1231 let mayLoad = 1 in {
1232 def rmib : AVX512AIi8<opc, MRMSrcMem,
1233 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1235 !strconcat("vpcmp${cc}", Suffix,
1236 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1237 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1238 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1239 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1241 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1242 def rmibk : AVX512AIi8<opc, MRMSrcMem,
1243 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1244 _.ScalarMemOp:$src2, AVXCC:$cc),
1245 !strconcat("vpcmp${cc}", Suffix,
1246 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1247 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1248 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1249 (OpNode (_.VT _.RC:$src1),
1250 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1252 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1255 // Accept explicit immediate argument form instead of comparison code.
1256 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1257 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1258 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1260 !strconcat("vpcmp", Suffix,
1261 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1262 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1263 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1264 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1265 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1266 _.ScalarMemOp:$src2, i8imm:$cc),
1267 !strconcat("vpcmp", Suffix,
1268 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1269 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1270 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1274 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1275 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1276 let Predicates = [prd] in
1277 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1279 let Predicates = [prd, HasVLX] in {
1280 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1281 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1285 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1286 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1287 let Predicates = [prd] in
1288 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1291 let Predicates = [prd, HasVLX] in {
1292 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1294 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1299 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1300 HasBWI>, EVEX_CD8<8, CD8VF>;
1301 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1302 HasBWI>, EVEX_CD8<8, CD8VF>;
1304 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1305 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1306 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1307 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1309 defm VPCMPD : avx512_icmp_cc_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1311 avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1312 HasAVX512>, EVEX_CD8<32, CD8VF>;
1313 defm VPCMPUD : avx512_icmp_cc_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1315 avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1316 HasAVX512>, EVEX_CD8<32, CD8VF>;
1318 defm VPCMPQ : avx512_icmp_cc_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1320 avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1321 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1322 defm VPCMPUQ : avx512_icmp_cc_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1324 avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1325 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1327 // avx512_cmp_packed - compare packed instructions
1328 multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
1329 X86MemOperand x86memop, ValueType vt,
1330 string suffix, Domain d> {
1331 def rri : AVX512PIi8<0xC2, MRMSrcReg,
1332 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1333 !strconcat("vcmp${cc}", suffix,
1334 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1335 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1336 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
1337 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1338 !strconcat("vcmp${cc}", suffix,
1339 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
1341 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
1342 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
1343 !strconcat("vcmp${cc}", suffix,
1344 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1346 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
1348 // Accept explicit immediate argument form instead of comparison code.
1349 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1350 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
1351 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1352 !strconcat("vcmp", suffix,
1353 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1354 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
1355 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1356 !strconcat("vcmp", suffix,
1357 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1361 defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
1362 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
1363 EVEX_CD8<32, CD8VF>;
1364 defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
1365 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
1366 EVEX_CD8<64, CD8VF>;
1368 def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1369 (COPY_TO_REGCLASS (VCMPPSZrri
1370 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1371 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1373 def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1374 (COPY_TO_REGCLASS (VPCMPDZrri
1375 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1376 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1378 def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1379 (COPY_TO_REGCLASS (VPCMPUDZrri
1380 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1381 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1384 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1385 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1387 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
1388 (I8Imm imm:$cc)), GR16)>;
1390 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1391 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1393 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
1394 (I8Imm imm:$cc)), GR8)>;
1396 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1397 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1399 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1400 (I8Imm imm:$cc)), GR16)>;
1402 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1403 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1405 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1406 (I8Imm imm:$cc)), GR8)>;
1408 // Mask register copy, including
1409 // - copy between mask registers
1410 // - load/store mask registers
1411 // - copy from GPR to mask register and vice versa
1413 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1414 string OpcodeStr, RegisterClass KRC,
1415 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
1416 let hasSideEffects = 0 in {
1417 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1418 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1420 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
1421 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1422 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
1424 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
1425 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1429 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1431 RegisterClass KRC, RegisterClass GRC> {
1432 let hasSideEffects = 0 in {
1433 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
1434 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1435 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
1436 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1440 let Predicates = [HasDQI] in
1441 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1443 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1446 let Predicates = [HasAVX512] in
1447 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1449 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
1452 let Predicates = [HasBWI] in {
1453 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1454 i32mem>, VEX, PD, VEX_W;
1455 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1459 let Predicates = [HasBWI] in {
1460 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1461 i64mem>, VEX, PS, VEX_W;
1462 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1466 // GR from/to mask register
1467 let Predicates = [HasDQI] in {
1468 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1469 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1470 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1471 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1473 let Predicates = [HasAVX512] in {
1474 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1475 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1476 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1477 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
1479 let Predicates = [HasBWI] in {
1480 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1481 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1483 let Predicates = [HasBWI] in {
1484 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1485 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1489 let Predicates = [HasDQI] in {
1490 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1491 (KMOVBmk addr:$dst, VK8:$src)>;
1493 let Predicates = [HasAVX512] in {
1494 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
1495 (KMOVWmk addr:$dst, VK16:$src)>;
1496 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1497 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
1498 def : Pat<(i1 (load addr:$src)),
1499 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
1500 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
1501 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
1503 let Predicates = [HasBWI] in {
1504 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1505 (KMOVDmk addr:$dst, VK32:$src)>;
1507 let Predicates = [HasBWI] in {
1508 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1509 (KMOVQmk addr:$dst, VK64:$src)>;
1512 let Predicates = [HasAVX512] in {
1513 def : Pat<(i1 (trunc (i64 GR64:$src))),
1514 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1517 def : Pat<(i1 (trunc (i32 GR32:$src))),
1518 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
1520 def : Pat<(i1 (trunc (i8 GR8:$src))),
1522 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1524 def : Pat<(i1 (trunc (i16 GR16:$src))),
1526 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1529 def : Pat<(i32 (zext VK1:$src)),
1530 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
1531 def : Pat<(i8 (zext VK1:$src)),
1534 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
1535 def : Pat<(i64 (zext VK1:$src)),
1536 (AND64ri8 (SUBREG_TO_REG (i64 0),
1537 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
1538 def : Pat<(i16 (zext VK1:$src)),
1540 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1542 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1543 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1544 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1545 (COPY_TO_REGCLASS VK1:$src, VK8)>;
1547 let Predicates = [HasBWI] in {
1548 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1549 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1550 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1551 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1555 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1556 let Predicates = [HasAVX512] in {
1557 // GR from/to 8-bit mask without native support
1558 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1560 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1562 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1564 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1567 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
1568 (COPY_TO_REGCLASS VK16:$src, VK1)>;
1569 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
1570 (COPY_TO_REGCLASS VK8:$src, VK1)>;
1572 let Predicates = [HasBWI] in {
1573 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1574 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1575 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1576 (COPY_TO_REGCLASS VK64:$src, VK1)>;
1579 // Mask unary operation
1581 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
1582 RegisterClass KRC, SDPatternOperator OpNode,
1584 let Predicates = [prd] in
1585 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1586 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1587 [(set KRC:$dst, (OpNode KRC:$src))]>;
1590 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1591 SDPatternOperator OpNode> {
1592 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1594 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1595 HasAVX512>, VEX, PS;
1596 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1597 HasBWI>, VEX, PD, VEX_W;
1598 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1599 HasBWI>, VEX, PS, VEX_W;
1602 defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
1604 multiclass avx512_mask_unop_int<string IntName, string InstName> {
1605 let Predicates = [HasAVX512] in
1606 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1608 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1609 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1611 defm : avx512_mask_unop_int<"knot", "KNOT">;
1613 let Predicates = [HasDQI] in
1614 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1615 let Predicates = [HasAVX512] in
1616 def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
1617 let Predicates = [HasBWI] in
1618 def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1619 let Predicates = [HasBWI] in
1620 def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1622 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1623 let Predicates = [HasAVX512] in {
1624 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1625 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1627 def : Pat<(not VK8:$src),
1629 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
1632 // Mask binary operation
1633 // - KAND, KANDN, KOR, KXNOR, KXOR
1634 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
1635 RegisterClass KRC, SDPatternOperator OpNode,
1637 let Predicates = [prd] in
1638 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1639 !strconcat(OpcodeStr,
1640 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1641 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1644 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1645 SDPatternOperator OpNode> {
1646 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1647 HasDQI>, VEX_4V, VEX_L, PD;
1648 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1649 HasAVX512>, VEX_4V, VEX_L, PS;
1650 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1651 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1652 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1653 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
1656 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1657 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1659 let isCommutable = 1 in {
1660 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1661 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1662 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1663 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
1665 let isCommutable = 0 in
1666 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
1668 def : Pat<(xor VK1:$src1, VK1:$src2),
1669 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1670 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1672 def : Pat<(or VK1:$src1, VK1:$src2),
1673 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1674 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1676 def : Pat<(and VK1:$src1, VK1:$src2),
1677 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1678 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1680 multiclass avx512_mask_binop_int<string IntName, string InstName> {
1681 let Predicates = [HasAVX512] in
1682 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1683 (i16 GR16:$src1), (i16 GR16:$src2)),
1684 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1685 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1686 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1689 defm : avx512_mask_binop_int<"kand", "KAND">;
1690 defm : avx512_mask_binop_int<"kandn", "KANDN">;
1691 defm : avx512_mask_binop_int<"kor", "KOR">;
1692 defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1693 defm : avx512_mask_binop_int<"kxor", "KXOR">;
1695 // With AVX-512, 8-bit mask is promoted to 16-bit mask.
1696 multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1697 let Predicates = [HasAVX512] in
1698 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1700 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1701 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1704 defm : avx512_binop_pat<and, KANDWrr>;
1705 defm : avx512_binop_pat<andn, KANDNWrr>;
1706 defm : avx512_binop_pat<or, KORWrr>;
1707 defm : avx512_binop_pat<xnor, KXNORWrr>;
1708 defm : avx512_binop_pat<xor, KXORWrr>;
1711 multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
1712 RegisterClass KRC> {
1713 let Predicates = [HasAVX512] in
1714 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1715 !strconcat(OpcodeStr,
1716 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
1719 multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
1720 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
1724 defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
1725 def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1726 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1727 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1730 multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1731 let Predicates = [HasAVX512] in
1732 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1733 (i16 GR16:$src1), (i16 GR16:$src2)),
1734 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1735 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1736 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1738 defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
1741 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1743 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1744 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
1745 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
1746 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1749 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1750 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1754 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
1756 def : Pat<(X86cmp VK1:$src1, (i1 0)),
1757 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1758 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
1761 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1763 let Predicates = [HasAVX512] in
1764 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1765 !strconcat(OpcodeStr,
1766 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
1767 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1770 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1772 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1776 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1777 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
1779 // Mask setting all 0s or 1s
1780 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1781 let Predicates = [HasAVX512] in
1782 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1783 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1784 [(set KRC:$dst, (VT Val))]>;
1787 multiclass avx512_mask_setop_w<PatFrag Val> {
1788 defm B : avx512_mask_setop<VK8, v8i1, Val>;
1789 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1792 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1793 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1795 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1796 let Predicates = [HasAVX512] in {
1797 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1798 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
1799 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1800 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1801 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1803 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1804 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1806 def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1807 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1809 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1810 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1812 def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1813 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1815 def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1816 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1817 //===----------------------------------------------------------------------===//
1818 // AVX-512 - Aligned and unaligned load and store
1821 multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1822 RegisterClass KRC, RegisterClass RC,
1823 ValueType vt, ValueType zvt, X86MemOperand memop,
1824 Domain d, bit IsReMaterializable = 1> {
1825 let hasSideEffects = 0 in {
1826 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
1827 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1829 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
1830 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1831 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
1833 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1834 SchedRW = [WriteLoad] in
1835 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1836 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1837 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1840 let AddedComplexity = 20 in {
1841 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1842 let hasSideEffects = 0 in
1843 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1844 (ins RC:$src0, KRC:$mask, RC:$src1),
1845 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1846 "${dst} {${mask}}, $src1}"),
1847 [(set RC:$dst, (vt (vselect KRC:$mask,
1851 let mayLoad = 1, SchedRW = [WriteLoad] in
1852 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1853 (ins RC:$src0, KRC:$mask, memop:$src1),
1854 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1855 "${dst} {${mask}}, $src1}"),
1858 (vt (bitconvert (ld_frag addr:$src1))),
1862 let mayLoad = 1, SchedRW = [WriteLoad] in
1863 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1864 (ins KRC:$mask, memop:$src),
1865 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1866 "${dst} {${mask}} {z}, $src}"),
1869 (vt (bitconvert (ld_frag addr:$src))),
1870 (vt (bitconvert (zvt immAllZerosV))))))],
1875 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1876 string elty, string elsz, string vsz512,
1877 string vsz256, string vsz128, Domain d,
1878 Predicate prd, bit IsReMaterializable = 1> {
1879 let Predicates = [prd] in
1880 defm Z : avx512_load<opc, OpcodeStr,
1881 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1882 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1883 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1884 !cast<X86MemOperand>(elty##"512mem"), d,
1885 IsReMaterializable>, EVEX_V512;
1887 let Predicates = [prd, HasVLX] in {
1888 defm Z256 : avx512_load<opc, OpcodeStr,
1889 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1890 "v"##vsz256##elty##elsz, "v4i64")),
1891 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1892 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1893 !cast<X86MemOperand>(elty##"256mem"), d,
1894 IsReMaterializable>, EVEX_V256;
1896 defm Z128 : avx512_load<opc, OpcodeStr,
1897 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1898 "v"##vsz128##elty##elsz, "v2i64")),
1899 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1900 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1901 !cast<X86MemOperand>(elty##"128mem"), d,
1902 IsReMaterializable>, EVEX_V128;
1907 multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1908 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1909 X86MemOperand memop, Domain d> {
1910 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1911 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
1912 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
1914 let Constraints = "$src1 = $dst" in
1915 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1916 (ins RC:$src1, KRC:$mask, RC:$src2),
1917 !strconcat(OpcodeStr,
1918 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
1920 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1921 (ins KRC:$mask, RC:$src),
1922 !strconcat(OpcodeStr,
1923 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
1924 [], d>, EVEX, EVEX_KZ;
1926 let mayStore = 1 in {
1927 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1928 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1929 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
1930 def mrk : AVX512PI<opc, MRMDestMem, (outs),
1931 (ins memop:$dst, KRC:$mask, RC:$src),
1932 !strconcat(OpcodeStr,
1933 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
1934 [], d>, EVEX, EVEX_K;
1939 multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1940 string st_suff_512, string st_suff_256,
1941 string st_suff_128, string elty, string elsz,
1942 string vsz512, string vsz256, string vsz128,
1943 Domain d, Predicate prd> {
1944 let Predicates = [prd] in
1945 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1946 !cast<ValueType>("v"##vsz512##elty##elsz),
1947 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1948 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1950 let Predicates = [prd, HasVLX] in {
1951 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1952 !cast<ValueType>("v"##vsz256##elty##elsz),
1953 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1954 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1956 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1957 !cast<ValueType>("v"##vsz128##elty##elsz),
1958 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1959 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1963 defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1964 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1965 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1966 "512", "256", "", "f", "32", "16", "8", "4",
1967 SSEPackedSingle, HasAVX512>,
1968 PS, EVEX_CD8<32, CD8VF>;
1970 defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1971 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1972 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1973 "512", "256", "", "f", "64", "8", "4", "2",
1974 SSEPackedDouble, HasAVX512>,
1975 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1977 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1978 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1979 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1980 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1981 PS, EVEX_CD8<32, CD8VF>;
1983 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1984 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1985 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1986 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1987 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1989 def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
1990 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
1991 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
1993 def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1994 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1995 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
1997 def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1999 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2001 def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
2003 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2006 defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
2007 "16", "8", "4", SSEPackedInt, HasAVX512>,
2008 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
2009 "512", "256", "", "i", "32", "16", "8", "4",
2010 SSEPackedInt, HasAVX512>,
2011 PD, EVEX_CD8<32, CD8VF>;
2013 defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
2014 "8", "4", "2", SSEPackedInt, HasAVX512>,
2015 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
2016 "512", "256", "", "i", "64", "8", "4", "2",
2017 SSEPackedInt, HasAVX512>,
2018 PD, VEX_W, EVEX_CD8<64, CD8VF>;
2020 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
2021 "64", "32", "16", SSEPackedInt, HasBWI>,
2022 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
2023 "i", "8", "64", "32", "16", SSEPackedInt,
2024 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
2026 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
2027 "32", "16", "8", SSEPackedInt, HasBWI>,
2028 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
2029 "i", "16", "32", "16", "8", SSEPackedInt,
2030 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
2032 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
2033 "16", "8", "4", SSEPackedInt, HasAVX512>,
2034 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
2035 "i", "32", "16", "8", "4", SSEPackedInt,
2036 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
2038 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
2039 "8", "4", "2", SSEPackedInt, HasAVX512>,
2040 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
2041 "i", "64", "8", "4", "2", SSEPackedInt,
2042 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
2044 def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
2045 (v16i32 immAllZerosV), GR16:$mask)),
2046 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
2048 def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
2049 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2050 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
2052 def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
2054 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2056 def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
2058 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2061 let AddedComplexity = 20 in {
2062 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
2063 (bc_v8i64 (v16i32 immAllZerosV)))),
2064 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
2066 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
2067 (v8i64 VR512:$src))),
2068 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
2071 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2072 (v16i32 immAllZerosV))),
2073 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
2075 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
2076 (v16i32 VR512:$src))),
2077 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
2080 // Move Int Doubleword to Packed Double Int
2082 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
2083 "vmovd\t{$src, $dst|$dst, $src}",
2085 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2087 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
2088 "vmovd\t{$src, $dst|$dst, $src}",
2090 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2091 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2092 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
2093 "vmovq\t{$src, $dst|$dst, $src}",
2095 (v2i64 (scalar_to_vector GR64:$src)))],
2096 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
2097 let isCodeGenOnly = 1 in {
2098 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
2099 "vmovq\t{$src, $dst|$dst, $src}",
2100 [(set FR64:$dst, (bitconvert GR64:$src))],
2101 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2102 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
2103 "vmovq\t{$src, $dst|$dst, $src}",
2104 [(set GR64:$dst, (bitconvert FR64:$src))],
2105 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2107 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
2108 "vmovq\t{$src, $dst|$dst, $src}",
2109 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2110 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2111 EVEX_CD8<64, CD8VT1>;
2113 // Move Int Doubleword to Single Scalar
2115 let isCodeGenOnly = 1 in {
2116 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
2117 "vmovd\t{$src, $dst|$dst, $src}",
2118 [(set FR32X:$dst, (bitconvert GR32:$src))],
2119 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2121 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
2122 "vmovd\t{$src, $dst|$dst, $src}",
2123 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2124 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2127 // Move doubleword from xmm register to r/m32
2129 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
2130 "vmovd\t{$src, $dst|$dst, $src}",
2131 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2132 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2134 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2135 (ins i32mem:$dst, VR128X:$src),
2136 "vmovd\t{$src, $dst|$dst, $src}",
2137 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2138 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2139 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2141 // Move quadword from xmm1 register to r/m64
2143 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
2144 "vmovq\t{$src, $dst|$dst, $src}",
2145 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2147 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
2148 Requires<[HasAVX512, In64BitMode]>;
2150 def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
2151 (ins i64mem:$dst, VR128X:$src),
2152 "vmovq\t{$src, $dst|$dst, $src}",
2153 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2154 addr:$dst)], IIC_SSE_MOVDQ>,
2155 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
2156 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2158 // Move Scalar Single to Double Int
2160 let isCodeGenOnly = 1 in {
2161 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
2163 "vmovd\t{$src, $dst|$dst, $src}",
2164 [(set GR32:$dst, (bitconvert FR32X:$src))],
2165 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
2166 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2167 (ins i32mem:$dst, FR32X:$src),
2168 "vmovd\t{$src, $dst|$dst, $src}",
2169 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2170 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2173 // Move Quadword Int to Packed Quadword Int
2175 def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
2177 "vmovq\t{$src, $dst|$dst, $src}",
2179 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2180 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2182 //===----------------------------------------------------------------------===//
2183 // AVX-512 MOVSS, MOVSD
2184 //===----------------------------------------------------------------------===//
2186 multiclass avx512_move_scalar <string asm, RegisterClass RC,
2187 SDNode OpNode, ValueType vt,
2188 X86MemOperand x86memop, PatFrag mem_pat> {
2189 let hasSideEffects = 0 in {
2190 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
2191 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2192 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2193 (scalar_to_vector RC:$src2))))],
2194 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
2195 let Constraints = "$src1 = $dst" in
2196 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2197 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2199 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
2200 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
2201 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2202 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2203 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2205 let mayStore = 1 in {
2206 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2207 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2208 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2210 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2211 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2212 [], IIC_SSE_MOV_S_MR>,
2213 EVEX, VEX_LIG, EVEX_K;
2215 } //hasSideEffects = 0
2218 let ExeDomain = SSEPackedSingle in
2219 defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
2220 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2222 let ExeDomain = SSEPackedDouble in
2223 defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
2224 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2226 def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2227 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2228 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2230 def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2231 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2232 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
2234 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2235 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2236 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2238 // For the disassembler
2239 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
2240 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2241 (ins VR128X:$src1, FR32X:$src2),
2242 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2244 XS, EVEX_4V, VEX_LIG;
2245 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2246 (ins VR128X:$src1, FR64X:$src2),
2247 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2249 XD, EVEX_4V, VEX_LIG, VEX_W;
2252 let Predicates = [HasAVX512] in {
2253 let AddedComplexity = 15 in {
2254 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2255 // MOVS{S,D} to the lower bits.
2256 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2257 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2258 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2259 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2260 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2261 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2262 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2263 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2265 // Move low f32 and clear high bits.
2266 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2267 (SUBREG_TO_REG (i32 0),
2268 (VMOVSSZrr (v4f32 (V_SET0)),
2269 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2270 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2271 (SUBREG_TO_REG (i32 0),
2272 (VMOVSSZrr (v4i32 (V_SET0)),
2273 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2276 let AddedComplexity = 20 in {
2277 // MOVSSrm zeros the high parts of the register; represent this
2278 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2279 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2280 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2281 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2282 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2283 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2284 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2286 // MOVSDrm zeros the high parts of the register; represent this
2287 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2288 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2289 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2290 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2291 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2292 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2293 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2294 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2295 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2296 def : Pat<(v2f64 (X86vzload addr:$src)),
2297 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2299 // Represent the same patterns above but in the form they appear for
2301 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2302 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
2303 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
2304 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2305 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2306 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2307 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2308 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2309 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2311 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2312 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2313 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2314 FR32X:$src)), sub_xmm)>;
2315 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2316 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2317 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2318 FR64X:$src)), sub_xmm)>;
2319 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2320 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
2321 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
2323 // Move low f64 and clear high bits.
2324 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2325 (SUBREG_TO_REG (i32 0),
2326 (VMOVSDZrr (v2f64 (V_SET0)),
2327 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2329 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2330 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2331 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2333 // Extract and store.
2334 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2336 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2337 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2339 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2341 // Shuffle with VMOVSS
2342 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2343 (VMOVSSZrr (v4i32 VR128X:$src1),
2344 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2345 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2346 (VMOVSSZrr (v4f32 VR128X:$src1),
2347 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2350 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2351 (SUBREG_TO_REG (i32 0),
2352 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2353 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2355 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2356 (SUBREG_TO_REG (i32 0),
2357 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2358 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2361 // Shuffle with VMOVSD
2362 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2363 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2364 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2365 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2366 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2367 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2368 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2369 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2372 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2373 (SUBREG_TO_REG (i32 0),
2374 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2375 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2377 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2378 (SUBREG_TO_REG (i32 0),
2379 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2380 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2383 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2384 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2385 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2386 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2387 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2388 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2389 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2390 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2393 let AddedComplexity = 15 in
2394 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2396 "vmovq\t{$src, $dst|$dst, $src}",
2397 [(set VR128X:$dst, (v2i64 (X86vzmovl
2398 (v2i64 VR128X:$src))))],
2399 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2401 let AddedComplexity = 20 in
2402 def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2404 "vmovq\t{$src, $dst|$dst, $src}",
2405 [(set VR128X:$dst, (v2i64 (X86vzmovl
2406 (loadv2i64 addr:$src))))],
2407 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2408 EVEX_CD8<8, CD8VT8>;
2410 let Predicates = [HasAVX512] in {
2411 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2412 let AddedComplexity = 20 in {
2413 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2414 (VMOVDI2PDIZrm addr:$src)>;
2415 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2416 (VMOV64toPQIZrr GR64:$src)>;
2417 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2418 (VMOVDI2PDIZrr GR32:$src)>;
2420 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2421 (VMOVDI2PDIZrm addr:$src)>;
2422 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2423 (VMOVDI2PDIZrm addr:$src)>;
2424 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2425 (VMOVZPQILo2PQIZrm addr:$src)>;
2426 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2427 (VMOVZPQILo2PQIZrr VR128X:$src)>;
2428 def : Pat<(v2i64 (X86vzload addr:$src)),
2429 (VMOVZPQILo2PQIZrm addr:$src)>;
2432 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2433 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2434 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2435 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2436 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2437 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2438 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2441 def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2442 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2444 def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2445 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2447 def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2448 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2450 def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2451 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2453 //===----------------------------------------------------------------------===//
2454 // AVX-512 - Non-temporals
2455 //===----------------------------------------------------------------------===//
2456 let SchedRW = [WriteLoad] in {
2457 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2458 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2459 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2460 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2461 EVEX_CD8<64, CD8VF>;
2463 let Predicates = [HasAVX512, HasVLX] in {
2464 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2466 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2467 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2468 EVEX_CD8<64, CD8VF>;
2470 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2472 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2473 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2474 EVEX_CD8<64, CD8VF>;
2478 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2479 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2480 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2481 let SchedRW = [WriteStore], mayStore = 1,
2482 AddedComplexity = 400 in
2483 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2484 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2485 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2488 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2489 string elty, string elsz, string vsz512,
2490 string vsz256, string vsz128, Domain d,
2491 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2492 let Predicates = [prd] in
2493 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2494 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2495 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2498 let Predicates = [prd, HasVLX] in {
2499 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2500 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2501 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2504 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2505 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2506 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2511 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2512 "i", "64", "8", "4", "2", SSEPackedInt,
2513 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2515 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2516 "f", "64", "8", "4", "2", SSEPackedDouble,
2517 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2519 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2520 "f", "32", "16", "8", "4", SSEPackedSingle,
2521 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2523 //===----------------------------------------------------------------------===//
2524 // AVX-512 - Integer arithmetic
2526 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2527 ValueType OpVT, RegisterClass KRC,
2528 RegisterClass RC, PatFrag memop_frag,
2529 X86MemOperand x86memop, PatFrag scalar_mfrag,
2530 X86MemOperand x86scalar_mop, string BrdcstStr,
2531 OpndItins itins, bit IsCommutable = 0> {
2532 let isCommutable = IsCommutable in
2533 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2534 (ins RC:$src1, RC:$src2),
2535 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2536 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2538 let AddedComplexity = 30 in {
2539 let Constraints = "$src0 = $dst" in
2540 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2541 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2542 !strconcat(OpcodeStr,
2543 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2544 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2545 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2547 itins.rr>, EVEX_4V, EVEX_K;
2548 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2549 (ins KRC:$mask, RC:$src1, RC:$src2),
2550 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2551 "|$dst {${mask}} {z}, $src1, $src2}"),
2552 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2553 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2554 (OpVT immAllZerosV))))],
2555 itins.rr>, EVEX_4V, EVEX_KZ;
2558 let mayLoad = 1 in {
2559 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2560 (ins RC:$src1, x86memop:$src2),
2561 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2562 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2564 let AddedComplexity = 30 in {
2565 let Constraints = "$src0 = $dst" in
2566 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2567 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2568 !strconcat(OpcodeStr,
2569 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2570 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2571 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2573 itins.rm>, EVEX_4V, EVEX_K;
2574 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2575 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2576 !strconcat(OpcodeStr,
2577 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2578 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2579 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2580 (OpVT immAllZerosV))))],
2581 itins.rm>, EVEX_4V, EVEX_KZ;
2583 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2584 (ins RC:$src1, x86scalar_mop:$src2),
2585 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2586 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2587 [(set RC:$dst, (OpNode RC:$src1,
2588 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2589 itins.rm>, EVEX_4V, EVEX_B;
2590 let AddedComplexity = 30 in {
2591 let Constraints = "$src0 = $dst" in
2592 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2593 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2594 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2595 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2597 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2598 (OpNode (OpVT RC:$src1),
2599 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2601 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2602 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2603 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2604 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2605 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2607 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2608 (OpNode (OpVT RC:$src1),
2609 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2610 (OpVT immAllZerosV))))],
2611 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2616 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2617 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2618 PatFrag memop_frag, X86MemOperand x86memop,
2619 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2620 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
2621 let isCommutable = IsCommutable in
2623 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2624 (ins RC:$src1, RC:$src2),
2625 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2627 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2628 (ins KRC:$mask, RC:$src1, RC:$src2),
2629 !strconcat(OpcodeStr,
2630 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2631 [], itins.rr>, EVEX_4V, EVEX_K;
2632 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2633 (ins KRC:$mask, RC:$src1, RC:$src2),
2634 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2635 "|$dst {${mask}} {z}, $src1, $src2}"),
2636 [], itins.rr>, EVEX_4V, EVEX_KZ;
2638 let mayLoad = 1 in {
2639 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2640 (ins RC:$src1, x86memop:$src2),
2641 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2643 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2644 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2645 !strconcat(OpcodeStr,
2646 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2647 [], itins.rm>, EVEX_4V, EVEX_K;
2648 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2649 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2650 !strconcat(OpcodeStr,
2651 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2652 [], itins.rm>, EVEX_4V, EVEX_KZ;
2653 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2654 (ins RC:$src1, x86scalar_mop:$src2),
2655 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2656 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2657 [], itins.rm>, EVEX_4V, EVEX_B;
2658 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2659 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2660 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2661 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2663 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2664 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2665 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2666 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2667 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2669 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2673 defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2674 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2675 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2677 defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2678 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2679 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2681 defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2682 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2683 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2685 defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2686 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2687 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
2689 defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2690 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2691 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2693 defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2694 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2695 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2696 EVEX_CD8<64, CD8VF>, VEX_W;
2698 defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2699 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2700 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
2702 def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2703 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2705 def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2706 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2707 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2708 def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2709 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2710 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2712 defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2713 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2714 SSE_INTALU_ITINS_P, 1>,
2715 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2716 defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2717 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2718 SSE_INTALU_ITINS_P, 0>,
2719 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2721 defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2722 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2723 SSE_INTALU_ITINS_P, 1>,
2724 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2725 defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2726 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2727 SSE_INTALU_ITINS_P, 0>,
2728 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2730 defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2731 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2732 SSE_INTALU_ITINS_P, 1>,
2733 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2734 defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2735 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2736 SSE_INTALU_ITINS_P, 0>,
2737 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2739 defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2740 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2741 SSE_INTALU_ITINS_P, 1>,
2742 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2743 defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2744 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2745 SSE_INTALU_ITINS_P, 0>,
2746 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2748 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2749 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2750 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2751 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2752 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2753 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2754 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2755 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2756 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2757 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2758 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2759 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2760 def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2761 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2762 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2763 def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2764 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2765 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2766 def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2767 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2768 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2769 def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2770 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2771 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
2772 //===----------------------------------------------------------------------===//
2773 // AVX-512 - Unpack Instructions
2774 //===----------------------------------------------------------------------===//
2776 multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2777 PatFrag mem_frag, RegisterClass RC,
2778 X86MemOperand x86memop, string asm,
2780 def rr : AVX512PI<opc, MRMSrcReg,
2781 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2783 (vt (OpNode RC:$src1, RC:$src2)))],
2785 def rm : AVX512PI<opc, MRMSrcMem,
2786 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2788 (vt (OpNode RC:$src1,
2789 (bitconvert (mem_frag addr:$src2)))))],
2793 defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2794 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2795 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2796 defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2797 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2798 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2799 defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2800 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2801 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2802 defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2803 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2804 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2806 multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2807 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2808 X86MemOperand x86memop> {
2809 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2810 (ins RC:$src1, RC:$src2),
2811 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2812 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2813 IIC_SSE_UNPCK>, EVEX_4V;
2814 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2815 (ins RC:$src1, x86memop:$src2),
2816 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2817 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2818 (bitconvert (memop_frag addr:$src2)))))],
2819 IIC_SSE_UNPCK>, EVEX_4V;
2821 defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2822 VR512, memopv16i32, i512mem>, EVEX_V512,
2823 EVEX_CD8<32, CD8VF>;
2824 defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2825 VR512, memopv8i64, i512mem>, EVEX_V512,
2826 VEX_W, EVEX_CD8<64, CD8VF>;
2827 defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2828 VR512, memopv16i32, i512mem>, EVEX_V512,
2829 EVEX_CD8<32, CD8VF>;
2830 defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2831 VR512, memopv8i64, i512mem>, EVEX_V512,
2832 VEX_W, EVEX_CD8<64, CD8VF>;
2833 //===----------------------------------------------------------------------===//
2837 multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2838 SDNode OpNode, PatFrag mem_frag,
2839 X86MemOperand x86memop, ValueType OpVT> {
2840 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2841 (ins RC:$src1, i8imm:$src2),
2842 !strconcat(OpcodeStr,
2843 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2845 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2847 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2848 (ins x86memop:$src1, i8imm:$src2),
2849 !strconcat(OpcodeStr,
2850 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2852 (OpVT (OpNode (mem_frag addr:$src1),
2853 (i8 imm:$src2))))]>, EVEX;
2856 defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
2857 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2859 let ExeDomain = SSEPackedSingle in
2860 defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
2861 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
2862 EVEX_CD8<32, CD8VF>;
2863 let ExeDomain = SSEPackedDouble in
2864 defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
2865 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
2866 VEX_W, EVEX_CD8<32, CD8VF>;
2868 def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2869 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2870 def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2871 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2873 //===----------------------------------------------------------------------===//
2874 // AVX-512 Logical Instructions
2875 //===----------------------------------------------------------------------===//
2877 defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
2878 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2879 EVEX_V512, EVEX_CD8<32, CD8VF>;
2880 defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
2881 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2882 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2883 defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
2884 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2885 EVEX_V512, EVEX_CD8<32, CD8VF>;
2886 defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
2887 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2888 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2889 defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
2890 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2891 EVEX_V512, EVEX_CD8<32, CD8VF>;
2892 defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
2893 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2894 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2895 defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
2896 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2897 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2898 defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2899 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2900 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2902 //===----------------------------------------------------------------------===//
2903 // AVX-512 FP arithmetic
2904 //===----------------------------------------------------------------------===//
2906 multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2908 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
2909 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2910 EVEX_CD8<32, CD8VT1>;
2911 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
2912 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2913 EVEX_CD8<64, CD8VT1>;
2916 let isCommutable = 1 in {
2917 defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2918 defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2919 defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2920 defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2922 let isCommutable = 0 in {
2923 defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2924 defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2927 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2929 RegisterClass RC, ValueType vt,
2930 X86MemOperand x86memop, PatFrag mem_frag,
2931 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2933 Domain d, OpndItins itins, bit commutable> {
2934 let isCommutable = commutable in {
2935 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
2936 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2937 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
2940 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2941 !strconcat(OpcodeStr,
2942 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2943 [], itins.rr, d>, EVEX_4V, EVEX_K;
2945 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2946 !strconcat(OpcodeStr,
2947 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2948 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2951 let mayLoad = 1 in {
2952 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2953 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2954 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
2955 itins.rm, d>, EVEX_4V;
2957 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2958 (ins RC:$src1, x86scalar_mop:$src2),
2959 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2960 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2961 [(set RC:$dst, (OpNode RC:$src1,
2962 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2963 itins.rm, d>, EVEX_4V, EVEX_B;
2965 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2966 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2967 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2968 [], itins.rm, d>, EVEX_4V, EVEX_K;
2970 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2971 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2972 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2973 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2975 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2976 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2977 " \t{${src2}", BrdcstStr,
2978 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2979 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2981 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2982 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2983 " \t{${src2}", BrdcstStr,
2984 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2986 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2990 defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
2991 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2992 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2994 defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
2995 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2996 SSE_ALU_ITINS_P.d, 1>,
2997 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2999 defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
3000 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3001 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3002 defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
3003 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3004 SSE_ALU_ITINS_P.d, 1>,
3005 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3007 defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
3008 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3009 SSE_ALU_ITINS_P.s, 1>,
3010 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3011 defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
3012 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3013 SSE_ALU_ITINS_P.s, 1>,
3014 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3016 defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
3017 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3018 SSE_ALU_ITINS_P.d, 1>,
3019 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3020 defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
3021 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3022 SSE_ALU_ITINS_P.d, 1>,
3023 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3025 defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
3026 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3027 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3028 defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
3029 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3030 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3032 defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
3033 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3034 SSE_ALU_ITINS_P.d, 0>,
3035 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3036 defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
3037 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3038 SSE_ALU_ITINS_P.d, 0>,
3039 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3041 def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
3042 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3043 (i16 -1), FROUND_CURRENT)),
3044 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
3046 def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
3047 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3048 (i8 -1), FROUND_CURRENT)),
3049 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
3051 def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
3052 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3053 (i16 -1), FROUND_CURRENT)),
3054 (VMINPSZrr VR512:$src1, VR512:$src2)>;
3056 def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
3057 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3058 (i8 -1), FROUND_CURRENT)),
3059 (VMINPDZrr VR512:$src1, VR512:$src2)>;
3060 //===----------------------------------------------------------------------===//
3061 // AVX-512 VPTESTM instructions
3062 //===----------------------------------------------------------------------===//
3064 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3065 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3066 SDNode OpNode, ValueType vt> {
3067 def rr : AVX512PI<opc, MRMSrcReg,
3068 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
3069 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3070 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
3071 SSEPackedInt>, EVEX_4V;
3072 def rm : AVX512PI<opc, MRMSrcMem,
3073 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
3074 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3075 [(set KRC:$dst, (OpNode (vt RC:$src1),
3076 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
3079 defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
3080 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
3081 EVEX_CD8<32, CD8VF>;
3082 defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
3083 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
3084 EVEX_CD8<64, CD8VF>;
3086 let Predicates = [HasCDI] in {
3087 defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
3088 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
3089 EVEX_CD8<32, CD8VF>;
3090 defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
3091 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
3092 EVEX_CD8<64, CD8VF>;
3095 def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3096 (v16i32 VR512:$src2), (i16 -1))),
3097 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3099 def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3100 (v8i64 VR512:$src2), (i8 -1))),
3101 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
3102 //===----------------------------------------------------------------------===//
3103 // AVX-512 Shift instructions
3104 //===----------------------------------------------------------------------===//
3105 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3106 string OpcodeStr, SDNode OpNode, RegisterClass RC,
3107 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
3108 RegisterClass KRC> {
3109 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
3110 (ins RC:$src1, i8imm:$src2),
3111 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3112 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
3113 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3114 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
3115 (ins KRC:$mask, RC:$src1, i8imm:$src2),
3116 !strconcat(OpcodeStr,
3117 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3118 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3119 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3120 (ins x86memop:$src1, i8imm:$src2),
3121 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3122 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
3123 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3124 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3125 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
3126 !strconcat(OpcodeStr,
3127 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3128 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3131 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3132 RegisterClass RC, ValueType vt, ValueType SrcVT,
3133 PatFrag bc_frag, RegisterClass KRC> {
3134 // src2 is always 128-bit
3135 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3136 (ins RC:$src1, VR128X:$src2),
3137 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3138 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3139 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3140 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3141 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3142 !strconcat(OpcodeStr,
3143 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3144 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3145 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3146 (ins RC:$src1, i128mem:$src2),
3147 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3148 [(set RC:$dst, (vt (OpNode RC:$src1,
3149 (bc_frag (memopv2i64 addr:$src2)))))],
3150 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3151 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3152 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3153 !strconcat(OpcodeStr,
3154 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3155 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3158 defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3159 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3160 EVEX_V512, EVEX_CD8<32, CD8VF>;
3161 defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3162 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3163 EVEX_CD8<32, CD8VQ>;
3165 defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3166 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3167 EVEX_CD8<64, CD8VF>, VEX_W;
3168 defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3169 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3170 EVEX_CD8<64, CD8VQ>, VEX_W;
3172 defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3173 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3174 EVEX_CD8<32, CD8VF>;
3175 defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3176 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3177 EVEX_CD8<32, CD8VQ>;
3179 defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3180 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3181 EVEX_CD8<64, CD8VF>, VEX_W;
3182 defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3183 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3184 EVEX_CD8<64, CD8VQ>, VEX_W;
3186 defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3187 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3188 EVEX_V512, EVEX_CD8<32, CD8VF>;
3189 defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3190 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3191 EVEX_CD8<32, CD8VQ>;
3193 defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3194 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3195 EVEX_CD8<64, CD8VF>, VEX_W;
3196 defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3197 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3198 EVEX_CD8<64, CD8VQ>, VEX_W;
3200 //===-------------------------------------------------------------------===//
3201 // Variable Bit Shifts
3202 //===-------------------------------------------------------------------===//
3203 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3204 RegisterClass RC, ValueType vt,
3205 X86MemOperand x86memop, PatFrag mem_frag> {
3206 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3207 (ins RC:$src1, RC:$src2),
3208 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3210 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3212 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3213 (ins RC:$src1, x86memop:$src2),
3214 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3216 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3220 defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3221 i512mem, memopv16i32>, EVEX_V512,
3222 EVEX_CD8<32, CD8VF>;
3223 defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3224 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3225 EVEX_CD8<64, CD8VF>;
3226 defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3227 i512mem, memopv16i32>, EVEX_V512,
3228 EVEX_CD8<32, CD8VF>;
3229 defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3230 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3231 EVEX_CD8<64, CD8VF>;
3232 defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3233 i512mem, memopv16i32>, EVEX_V512,
3234 EVEX_CD8<32, CD8VF>;
3235 defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3236 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3237 EVEX_CD8<64, CD8VF>;
3239 //===----------------------------------------------------------------------===//
3240 // AVX-512 - MOVDDUP
3241 //===----------------------------------------------------------------------===//
3243 multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3244 X86MemOperand x86memop, PatFrag memop_frag> {
3245 def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3246 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3247 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3248 def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3249 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3251 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3254 defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3255 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3256 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3257 (VMOVDDUPZrm addr:$src)>;
3259 //===---------------------------------------------------------------------===//
3260 // Replicate Single FP - MOVSHDUP and MOVSLDUP
3261 //===---------------------------------------------------------------------===//
3262 multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3263 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3264 X86MemOperand x86memop> {
3265 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3266 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3267 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3269 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3270 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3271 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3274 defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3275 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3276 EVEX_CD8<32, CD8VF>;
3277 defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3278 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3279 EVEX_CD8<32, CD8VF>;
3281 def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3282 def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3283 (VMOVSHDUPZrm addr:$src)>;
3284 def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3285 def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3286 (VMOVSLDUPZrm addr:$src)>;
3288 //===----------------------------------------------------------------------===//
3289 // Move Low to High and High to Low packed FP Instructions
3290 //===----------------------------------------------------------------------===//
3291 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3292 (ins VR128X:$src1, VR128X:$src2),
3293 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3294 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3295 IIC_SSE_MOV_LH>, EVEX_4V;
3296 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3297 (ins VR128X:$src1, VR128X:$src2),
3298 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3299 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3300 IIC_SSE_MOV_LH>, EVEX_4V;
3302 let Predicates = [HasAVX512] in {
3304 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3305 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3306 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3307 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
3310 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3311 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3314 //===----------------------------------------------------------------------===//
3315 // FMA - Fused Multiply Operations
3317 let Constraints = "$src1 = $dst" in {
3318 multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3319 RegisterClass RC, X86MemOperand x86memop,
3320 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3321 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3322 RegisterClass KRC> {
3323 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3324 (ins RC:$src2, RC:$src3),
3325 OpcodeStr, "$src3, $src2", "$src2, $src3",
3326 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3330 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3331 (ins RC:$src1, RC:$src2, x86memop:$src3),
3332 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3333 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3334 (mem_frag addr:$src3))))]>;
3335 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3336 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
3337 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
3338 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3339 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3340 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3342 } // Constraints = "$src1 = $dst"
3344 let ExeDomain = SSEPackedSingle in {
3345 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3346 memopv16f32, f32mem, loadf32, "{1to16}",
3347 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
3348 EVEX_CD8<32, CD8VF>;
3349 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3350 memopv16f32, f32mem, loadf32, "{1to16}",
3351 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
3352 EVEX_CD8<32, CD8VF>;
3353 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3354 memopv16f32, f32mem, loadf32, "{1to16}",
3355 X86Fmaddsub, v16f32, VK16WM>,
3356 EVEX_V512, EVEX_CD8<32, CD8VF>;
3357 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3358 memopv16f32, f32mem, loadf32, "{1to16}",
3359 X86Fmsubadd, v16f32, VK16WM>,
3360 EVEX_V512, EVEX_CD8<32, CD8VF>;
3361 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3362 memopv16f32, f32mem, loadf32, "{1to16}",
3363 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
3364 EVEX_CD8<32, CD8VF>;
3365 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3366 memopv16f32, f32mem, loadf32, "{1to16}",
3367 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
3368 EVEX_CD8<32, CD8VF>;
3370 let ExeDomain = SSEPackedDouble in {
3371 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3372 memopv8f64, f64mem, loadf64, "{1to8}",
3373 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
3374 VEX_W, EVEX_CD8<64, CD8VF>;
3375 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3376 memopv8f64, f64mem, loadf64, "{1to8}",
3377 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3378 EVEX_CD8<64, CD8VF>;
3379 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3380 memopv8f64, f64mem, loadf64, "{1to8}",
3381 X86Fmaddsub, v8f64, VK8WM>,
3382 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3383 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3384 memopv8f64, f64mem, loadf64, "{1to8}",
3385 X86Fmsubadd, v8f64, VK8WM>,
3386 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3387 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3388 memopv8f64, f64mem, loadf64, "{1to8}",
3389 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
3390 EVEX_CD8<64, CD8VF>;
3391 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3392 memopv8f64, f64mem, loadf64, "{1to8}",
3393 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3394 EVEX_CD8<64, CD8VF>;
3397 let Constraints = "$src1 = $dst" in {
3398 multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3399 RegisterClass RC, X86MemOperand x86memop,
3400 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3401 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3403 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3404 (ins RC:$src1, RC:$src3, x86memop:$src2),
3405 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
3406 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3407 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3408 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
3409 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
3410 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3411 [(set RC:$dst, (OpNode RC:$src1,
3412 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3414 } // Constraints = "$src1 = $dst"
3417 let ExeDomain = SSEPackedSingle in {
3418 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3419 memopv16f32, f32mem, loadf32, "{1to16}",
3420 X86Fmadd, v16f32>, EVEX_V512,
3421 EVEX_CD8<32, CD8VF>;
3422 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3423 memopv16f32, f32mem, loadf32, "{1to16}",
3424 X86Fmsub, v16f32>, EVEX_V512,
3425 EVEX_CD8<32, CD8VF>;
3426 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3427 memopv16f32, f32mem, loadf32, "{1to16}",
3428 X86Fmaddsub, v16f32>,
3429 EVEX_V512, EVEX_CD8<32, CD8VF>;
3430 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3431 memopv16f32, f32mem, loadf32, "{1to16}",
3432 X86Fmsubadd, v16f32>,
3433 EVEX_V512, EVEX_CD8<32, CD8VF>;
3434 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3435 memopv16f32, f32mem, loadf32, "{1to16}",
3436 X86Fnmadd, v16f32>, EVEX_V512,
3437 EVEX_CD8<32, CD8VF>;
3438 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3439 memopv16f32, f32mem, loadf32, "{1to16}",
3440 X86Fnmsub, v16f32>, EVEX_V512,
3441 EVEX_CD8<32, CD8VF>;
3443 let ExeDomain = SSEPackedDouble in {
3444 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3445 memopv8f64, f64mem, loadf64, "{1to8}",
3446 X86Fmadd, v8f64>, EVEX_V512,
3447 VEX_W, EVEX_CD8<64, CD8VF>;
3448 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3449 memopv8f64, f64mem, loadf64, "{1to8}",
3450 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3451 EVEX_CD8<64, CD8VF>;
3452 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3453 memopv8f64, f64mem, loadf64, "{1to8}",
3454 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3455 EVEX_CD8<64, CD8VF>;
3456 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3457 memopv8f64, f64mem, loadf64, "{1to8}",
3458 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3459 EVEX_CD8<64, CD8VF>;
3460 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3461 memopv8f64, f64mem, loadf64, "{1to8}",
3462 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3463 EVEX_CD8<64, CD8VF>;
3464 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3465 memopv8f64, f64mem, loadf64, "{1to8}",
3466 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3467 EVEX_CD8<64, CD8VF>;
3471 let Constraints = "$src1 = $dst" in {
3472 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3473 RegisterClass RC, ValueType OpVT,
3474 X86MemOperand x86memop, Operand memop,
3476 let isCommutable = 1 in
3477 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3478 (ins RC:$src1, RC:$src2, RC:$src3),
3479 !strconcat(OpcodeStr,
3480 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3482 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3484 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3485 (ins RC:$src1, RC:$src2, f128mem:$src3),
3486 !strconcat(OpcodeStr,
3487 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3489 (OpVT (OpNode RC:$src2, RC:$src1,
3490 (mem_frag addr:$src3))))]>;
3493 } // Constraints = "$src1 = $dst"
3495 defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
3496 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3497 defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
3498 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3499 defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
3500 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3501 defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
3502 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3503 defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
3504 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3505 defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
3506 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3507 defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
3508 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3509 defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
3510 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3512 //===----------------------------------------------------------------------===//
3513 // AVX-512 Scalar convert from sign integer to float/double
3514 //===----------------------------------------------------------------------===//
3516 multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3517 X86MemOperand x86memop, string asm> {
3518 let hasSideEffects = 0 in {
3519 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
3520 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3523 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3524 (ins DstRC:$src1, x86memop:$src),
3525 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3527 } // hasSideEffects = 0
3529 let Predicates = [HasAVX512] in {
3530 defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
3531 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3532 defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
3533 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3534 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
3535 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3536 defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
3537 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3539 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3540 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3541 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
3542 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3543 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3544 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3545 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
3546 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3548 def : Pat<(f32 (sint_to_fp GR32:$src)),
3549 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3550 def : Pat<(f32 (sint_to_fp GR64:$src)),
3551 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3552 def : Pat<(f64 (sint_to_fp GR32:$src)),
3553 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3554 def : Pat<(f64 (sint_to_fp GR64:$src)),
3555 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3557 defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
3558 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3559 defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
3560 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3561 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
3562 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3563 defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
3564 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3566 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3567 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3568 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3569 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3570 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3571 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3572 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3573 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3575 def : Pat<(f32 (uint_to_fp GR32:$src)),
3576 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3577 def : Pat<(f32 (uint_to_fp GR64:$src)),
3578 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3579 def : Pat<(f64 (uint_to_fp GR32:$src)),
3580 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3581 def : Pat<(f64 (uint_to_fp GR64:$src)),
3582 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3585 //===----------------------------------------------------------------------===//
3586 // AVX-512 Scalar convert from float/double to integer
3587 //===----------------------------------------------------------------------===//
3588 multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3589 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3591 let hasSideEffects = 0 in {
3592 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3593 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3594 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3595 Requires<[HasAVX512]>;
3597 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
3598 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
3599 Requires<[HasAVX512]>;
3600 } // hasSideEffects = 0
3602 let Predicates = [HasAVX512] in {
3603 // Convert float/double to signed/unsigned int 32/64
3604 defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
3605 ssmem, sse_load_f32, "cvtss2si">,
3606 XS, EVEX_CD8<32, CD8VT1>;
3607 defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
3608 ssmem, sse_load_f32, "cvtss2si">,
3609 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3610 defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
3611 ssmem, sse_load_f32, "cvtss2usi">,
3612 XS, EVEX_CD8<32, CD8VT1>;
3613 defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3614 int_x86_avx512_cvtss2usi64, ssmem,
3615 sse_load_f32, "cvtss2usi">, XS, VEX_W,
3616 EVEX_CD8<32, CD8VT1>;
3617 defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
3618 sdmem, sse_load_f64, "cvtsd2si">,
3619 XD, EVEX_CD8<64, CD8VT1>;
3620 defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
3621 sdmem, sse_load_f64, "cvtsd2si">,
3622 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3623 defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
3624 sdmem, sse_load_f64, "cvtsd2usi">,
3625 XD, EVEX_CD8<64, CD8VT1>;
3626 defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3627 int_x86_avx512_cvtsd2usi64, sdmem,
3628 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
3629 EVEX_CD8<64, CD8VT1>;
3631 let isCodeGenOnly = 1 in {
3632 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3633 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3634 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3635 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3636 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3637 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3638 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3639 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3640 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3641 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3642 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3643 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3645 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3646 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3647 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3648 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3649 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3650 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3651 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3652 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3653 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3654 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3655 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3656 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3657 } // isCodeGenOnly = 1
3659 // Convert float/double to signed/unsigned int 32/64 with truncation
3660 let isCodeGenOnly = 1 in {
3661 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3662 ssmem, sse_load_f32, "cvttss2si">,
3663 XS, EVEX_CD8<32, CD8VT1>;
3664 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3665 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3666 "cvttss2si">, XS, VEX_W,
3667 EVEX_CD8<32, CD8VT1>;
3668 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3669 sdmem, sse_load_f64, "cvttsd2si">, XD,
3670 EVEX_CD8<64, CD8VT1>;
3671 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3672 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3673 "cvttsd2si">, XD, VEX_W,
3674 EVEX_CD8<64, CD8VT1>;
3675 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3676 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3677 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3678 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3679 int_x86_avx512_cvttss2usi64, ssmem,
3680 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3681 EVEX_CD8<32, CD8VT1>;
3682 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3683 int_x86_avx512_cvttsd2usi,
3684 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3685 EVEX_CD8<64, CD8VT1>;
3686 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3687 int_x86_avx512_cvttsd2usi64, sdmem,
3688 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3689 EVEX_CD8<64, CD8VT1>;
3690 } // isCodeGenOnly = 1
3692 multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3693 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3695 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3696 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3697 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3698 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3699 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3700 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3703 defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
3704 loadf32, "cvttss2si">, XS,
3705 EVEX_CD8<32, CD8VT1>;
3706 defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
3707 loadf32, "cvttss2usi">, XS,
3708 EVEX_CD8<32, CD8VT1>;
3709 defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
3710 loadf32, "cvttss2si">, XS, VEX_W,
3711 EVEX_CD8<32, CD8VT1>;
3712 defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
3713 loadf32, "cvttss2usi">, XS, VEX_W,
3714 EVEX_CD8<32, CD8VT1>;
3715 defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
3716 loadf64, "cvttsd2si">, XD,
3717 EVEX_CD8<64, CD8VT1>;
3718 defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
3719 loadf64, "cvttsd2usi">, XD,
3720 EVEX_CD8<64, CD8VT1>;
3721 defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
3722 loadf64, "cvttsd2si">, XD, VEX_W,
3723 EVEX_CD8<64, CD8VT1>;
3724 defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
3725 loadf64, "cvttsd2usi">, XD, VEX_W,
3726 EVEX_CD8<64, CD8VT1>;
3728 //===----------------------------------------------------------------------===//
3729 // AVX-512 Convert form float to double and back
3730 //===----------------------------------------------------------------------===//
3731 let hasSideEffects = 0 in {
3732 def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3733 (ins FR32X:$src1, FR32X:$src2),
3734 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3735 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3737 def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3738 (ins FR32X:$src1, f32mem:$src2),
3739 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3740 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3741 EVEX_CD8<32, CD8VT1>;
3743 // Convert scalar double to scalar single
3744 def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3745 (ins FR64X:$src1, FR64X:$src2),
3746 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3747 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3749 def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3750 (ins FR64X:$src1, f64mem:$src2),
3751 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3752 []>, EVEX_4V, VEX_LIG, VEX_W,
3753 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3756 def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3757 Requires<[HasAVX512]>;
3758 def : Pat<(fextend (loadf32 addr:$src)),
3759 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3761 def : Pat<(extloadf32 addr:$src),
3762 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3763 Requires<[HasAVX512, OptForSize]>;
3765 def : Pat<(extloadf32 addr:$src),
3766 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3767 Requires<[HasAVX512, OptForSpeed]>;
3769 def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3770 Requires<[HasAVX512]>;
3772 multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
3773 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3774 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3776 let hasSideEffects = 0 in {
3777 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3778 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3780 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3781 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3782 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3783 [], d>, EVEX, EVEX_B, EVEX_RC;
3785 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3786 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3788 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3789 } // hasSideEffects = 0
3792 multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
3793 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3794 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3796 let hasSideEffects = 0 in {
3797 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3798 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3800 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3802 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3803 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3805 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3806 } // hasSideEffects = 0
3809 defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
3810 memopv8f64, f512mem, v8f32, v8f64,
3811 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
3812 EVEX_CD8<64, CD8VF>;
3814 defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3815 memopv4f64, f256mem, v8f64, v8f32,
3816 SSEPackedDouble>, EVEX_V512, PS,
3817 EVEX_CD8<32, CD8VH>;
3818 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3819 (VCVTPS2PDZrm addr:$src)>;
3821 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3822 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3823 (VCVTPD2PSZrr VR512:$src)>;
3825 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3826 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3827 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
3829 //===----------------------------------------------------------------------===//
3830 // AVX-512 Vector convert from sign integer to float/double
3831 //===----------------------------------------------------------------------===//
3833 defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
3834 memopv8i64, i512mem, v16f32, v16i32,
3835 SSEPackedSingle>, EVEX_V512, PS,
3836 EVEX_CD8<32, CD8VF>;
3838 defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3839 memopv4i64, i256mem, v8f64, v8i32,
3840 SSEPackedDouble>, EVEX_V512, XS,
3841 EVEX_CD8<32, CD8VH>;
3843 defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
3844 memopv16f32, f512mem, v16i32, v16f32,
3845 SSEPackedSingle>, EVEX_V512, XS,
3846 EVEX_CD8<32, CD8VF>;
3848 defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
3849 memopv8f64, f512mem, v8i32, v8f64,
3850 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
3851 EVEX_CD8<64, CD8VF>;
3853 defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
3854 memopv16f32, f512mem, v16i32, v16f32,
3855 SSEPackedSingle>, EVEX_V512, PS,
3856 EVEX_CD8<32, CD8VF>;
3858 // cvttps2udq (src, 0, mask-all-ones, sae-current)
3859 def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3860 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3861 (VCVTTPS2UDQZrr VR512:$src)>;
3863 defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
3864 memopv8f64, f512mem, v8i32, v8f64,
3865 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
3866 EVEX_CD8<64, CD8VF>;
3868 // cvttpd2udq (src, 0, mask-all-ones, sae-current)
3869 def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3870 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3871 (VCVTTPD2UDQZrr VR512:$src)>;
3873 defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3874 memopv4i64, f256mem, v8f64, v8i32,
3875 SSEPackedDouble>, EVEX_V512, XS,
3876 EVEX_CD8<32, CD8VH>;
3878 defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
3879 memopv16i32, f512mem, v16f32, v16i32,
3880 SSEPackedSingle>, EVEX_V512, XD,
3881 EVEX_CD8<32, CD8VF>;
3883 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3884 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3885 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3887 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3888 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3889 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3891 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3892 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3893 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3895 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3896 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3897 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3899 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3900 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3901 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3903 def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
3904 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3905 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
3906 def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3907 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3908 (VCVTDQ2PDZrr VR256X:$src)>;
3909 def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3910 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3911 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3912 def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3913 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3914 (VCVTUDQ2PDZrr VR256X:$src)>;
3916 multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3917 RegisterClass DstRC, PatFrag mem_frag,
3918 X86MemOperand x86memop, Domain d> {
3919 let hasSideEffects = 0 in {
3920 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3921 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3923 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3924 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3925 [], d>, EVEX, EVEX_B, EVEX_RC;
3927 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3928 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3930 } // hasSideEffects = 0
3933 defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
3934 memopv16f32, f512mem, SSEPackedSingle>, PD,
3935 EVEX_V512, EVEX_CD8<32, CD8VF>;
3936 defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3937 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3938 EVEX_V512, EVEX_CD8<64, CD8VF>;
3940 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3941 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3942 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3944 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3945 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3946 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3948 defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3949 memopv16f32, f512mem, SSEPackedSingle>,
3950 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
3951 defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3952 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
3953 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
3955 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3956 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3957 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3959 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3960 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3961 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
3963 let Predicates = [HasAVX512] in {
3964 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3965 (VCVTPD2PSZrm addr:$src)>;
3966 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3967 (VCVTPS2PDZrm addr:$src)>;
3970 //===----------------------------------------------------------------------===//
3971 // Half precision conversion instructions
3972 //===----------------------------------------------------------------------===//
3973 multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3974 X86MemOperand x86memop> {
3975 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3976 "vcvtph2ps\t{$src, $dst|$dst, $src}",
3978 let hasSideEffects = 0, mayLoad = 1 in
3979 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3980 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3983 multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3984 X86MemOperand x86memop> {
3985 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3986 (ins srcRC:$src1, i32i8imm:$src2),
3987 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3989 let hasSideEffects = 0, mayStore = 1 in
3990 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3991 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
3992 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
3995 defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
3996 EVEX_CD8<32, CD8VH>;
3997 defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
3998 EVEX_CD8<32, CD8VH>;
4000 def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
4001 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
4002 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
4004 def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
4005 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
4006 (VCVTPH2PSZrr VR256X:$src)>;
4008 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
4009 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
4010 "ucomiss">, PS, EVEX, VEX_LIG,
4011 EVEX_CD8<32, CD8VT1>;
4012 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
4013 "ucomisd">, PD, EVEX,
4014 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4015 let Pattern = []<dag> in {
4016 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
4017 "comiss">, PS, EVEX, VEX_LIG,
4018 EVEX_CD8<32, CD8VT1>;
4019 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
4020 "comisd">, PD, EVEX,
4021 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4023 let isCodeGenOnly = 1 in {
4024 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
4025 load, "ucomiss">, PS, EVEX, VEX_LIG,
4026 EVEX_CD8<32, CD8VT1>;
4027 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
4028 load, "ucomisd">, PD, EVEX,
4029 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4031 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
4032 load, "comiss">, PS, EVEX, VEX_LIG,
4033 EVEX_CD8<32, CD8VT1>;
4034 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
4035 load, "comisd">, PD, EVEX,
4036 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4040 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
4041 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4042 X86MemOperand x86memop> {
4043 let hasSideEffects = 0 in {
4044 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4045 (ins RC:$src1, RC:$src2),
4046 !strconcat(OpcodeStr,
4047 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4048 let mayLoad = 1 in {
4049 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4050 (ins RC:$src1, x86memop:$src2),
4051 !strconcat(OpcodeStr,
4052 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4057 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
4058 EVEX_CD8<32, CD8VT1>;
4059 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
4060 VEX_W, EVEX_CD8<64, CD8VT1>;
4061 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
4062 EVEX_CD8<32, CD8VT1>;
4063 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
4064 VEX_W, EVEX_CD8<64, CD8VT1>;
4066 def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
4067 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4068 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4069 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4071 def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
4072 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4073 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4074 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4076 def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
4077 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4078 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4079 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4081 def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
4082 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4083 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4084 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4086 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
4087 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
4088 RegisterClass RC, X86MemOperand x86memop,
4089 PatFrag mem_frag, ValueType OpVt> {
4090 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4091 !strconcat(OpcodeStr,
4092 " \t{$src, $dst|$dst, $src}"),
4093 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
4095 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4096 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4097 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
4100 defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
4101 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4102 defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
4103 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4104 defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
4105 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4106 defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
4107 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4109 def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
4110 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4111 (VRSQRT14PSZr VR512:$src)>;
4112 def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
4113 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4114 (VRSQRT14PDZr VR512:$src)>;
4116 def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4117 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4118 (VRCP14PSZr VR512:$src)>;
4119 def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4120 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4121 (VRCP14PDZr VR512:$src)>;
4123 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4124 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4125 X86MemOperand x86memop> {
4126 let hasSideEffects = 0, Predicates = [HasERI] in {
4127 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4128 (ins RC:$src1, RC:$src2),
4129 !strconcat(OpcodeStr,
4130 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4131 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4132 (ins RC:$src1, RC:$src2),
4133 !strconcat(OpcodeStr,
4134 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
4135 []>, EVEX_4V, EVEX_B;
4136 let mayLoad = 1 in {
4137 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4138 (ins RC:$src1, x86memop:$src2),
4139 !strconcat(OpcodeStr,
4140 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4145 defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4146 EVEX_CD8<32, CD8VT1>;
4147 defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4148 VEX_W, EVEX_CD8<64, CD8VT1>;
4149 defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4150 EVEX_CD8<32, CD8VT1>;
4151 defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4152 VEX_W, EVEX_CD8<64, CD8VT1>;
4154 def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4155 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4157 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4158 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4160 def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4161 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4163 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4164 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4166 def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4167 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4169 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4170 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4172 def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4173 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4175 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4176 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4178 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4179 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4180 RegisterClass RC, X86MemOperand x86memop> {
4181 let hasSideEffects = 0, Predicates = [HasERI] in {
4182 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4183 !strconcat(OpcodeStr,
4184 " \t{$src, $dst|$dst, $src}"),
4186 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4187 !strconcat(OpcodeStr,
4188 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
4190 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4191 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4195 defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4196 EVEX_V512, EVEX_CD8<32, CD8VF>;
4197 defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4198 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4199 defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4200 EVEX_V512, EVEX_CD8<32, CD8VF>;
4201 defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4202 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4204 def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4205 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4206 (VRSQRT28PSZrb VR512:$src)>;
4207 def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4208 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4209 (VRSQRT28PDZrb VR512:$src)>;
4211 def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4212 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4213 (VRCP28PSZrb VR512:$src)>;
4214 def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4215 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4216 (VRCP28PDZrb VR512:$src)>;
4218 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
4219 OpndItins itins_s, OpndItins itins_d> {
4220 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4221 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4222 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4226 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4227 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4229 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4230 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4232 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4233 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4234 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4238 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4239 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4240 [(set VR512:$dst, (OpNode
4241 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4242 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4246 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4247 Intrinsic F32Int, Intrinsic F64Int,
4248 OpndItins itins_s, OpndItins itins_d> {
4249 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4250 (ins FR32X:$src1, FR32X:$src2),
4251 !strconcat(OpcodeStr,
4252 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4253 [], itins_s.rr>, XS, EVEX_4V;
4254 let isCodeGenOnly = 1 in
4255 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4256 (ins VR128X:$src1, VR128X:$src2),
4257 !strconcat(OpcodeStr,
4258 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4260 (F32Int VR128X:$src1, VR128X:$src2))],
4261 itins_s.rr>, XS, EVEX_4V;
4262 let mayLoad = 1 in {
4263 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4264 (ins FR32X:$src1, f32mem:$src2),
4265 !strconcat(OpcodeStr,
4266 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4267 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4268 let isCodeGenOnly = 1 in
4269 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4270 (ins VR128X:$src1, ssmem:$src2),
4271 !strconcat(OpcodeStr,
4272 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4274 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4275 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4277 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4278 (ins FR64X:$src1, FR64X:$src2),
4279 !strconcat(OpcodeStr,
4280 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4282 let isCodeGenOnly = 1 in
4283 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4284 (ins VR128X:$src1, VR128X:$src2),
4285 !strconcat(OpcodeStr,
4286 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4288 (F64Int VR128X:$src1, VR128X:$src2))],
4289 itins_s.rr>, XD, EVEX_4V, VEX_W;
4290 let mayLoad = 1 in {
4291 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4292 (ins FR64X:$src1, f64mem:$src2),
4293 !strconcat(OpcodeStr,
4294 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4295 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4296 let isCodeGenOnly = 1 in
4297 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4298 (ins VR128X:$src1, sdmem:$src2),
4299 !strconcat(OpcodeStr,
4300 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4302 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4303 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4308 defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4309 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4310 SSE_SQRTSS, SSE_SQRTSD>,
4311 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
4312 SSE_SQRTPS, SSE_SQRTPD>;
4314 let Predicates = [HasAVX512] in {
4315 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4316 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4317 (VSQRTPSZrr VR512:$src1)>;
4318 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4319 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4320 (VSQRTPDZrr VR512:$src1)>;
4322 def : Pat<(f32 (fsqrt FR32X:$src)),
4323 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4324 def : Pat<(f32 (fsqrt (load addr:$src))),
4325 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4326 Requires<[OptForSize]>;
4327 def : Pat<(f64 (fsqrt FR64X:$src)),
4328 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4329 def : Pat<(f64 (fsqrt (load addr:$src))),
4330 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4331 Requires<[OptForSize]>;
4333 def : Pat<(f32 (X86frsqrt FR32X:$src)),
4334 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4335 def : Pat<(f32 (X86frsqrt (load addr:$src))),
4336 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4337 Requires<[OptForSize]>;
4339 def : Pat<(f32 (X86frcp FR32X:$src)),
4340 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4341 def : Pat<(f32 (X86frcp (load addr:$src))),
4342 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4343 Requires<[OptForSize]>;
4345 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4346 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4347 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4349 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4350 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4352 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4353 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4354 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4356 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4357 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4361 multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4362 X86MemOperand x86memop, RegisterClass RC,
4363 PatFrag mem_frag32, PatFrag mem_frag64,
4364 Intrinsic V4F32Int, Intrinsic V2F64Int,
4366 let ExeDomain = SSEPackedSingle in {
4367 // Intrinsic operation, reg.
4368 // Vector intrinsic operation, reg
4369 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4370 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4371 !strconcat(OpcodeStr,
4372 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4373 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4375 // Vector intrinsic operation, mem
4376 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4377 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4378 !strconcat(OpcodeStr,
4379 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4381 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4382 EVEX_CD8<32, VForm>;
4383 } // ExeDomain = SSEPackedSingle
4385 let ExeDomain = SSEPackedDouble in {
4386 // Vector intrinsic operation, reg
4387 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4388 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4389 !strconcat(OpcodeStr,
4390 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4391 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4393 // Vector intrinsic operation, mem
4394 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4395 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4396 !strconcat(OpcodeStr,
4397 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4399 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4400 EVEX_CD8<64, VForm>;
4401 } // ExeDomain = SSEPackedDouble
4404 multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4408 let ExeDomain = GenericDomain in {
4410 let hasSideEffects = 0 in
4411 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4412 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4413 !strconcat(OpcodeStr,
4414 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4417 // Intrinsic operation, reg.
4418 let isCodeGenOnly = 1 in
4419 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4420 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4421 !strconcat(OpcodeStr,
4422 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4423 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4425 // Intrinsic operation, mem.
4426 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4427 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4428 !strconcat(OpcodeStr,
4429 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4430 [(set VR128X:$dst, (F32Int VR128X:$src1,
4431 sse_load_f32:$src2, imm:$src3))]>,
4432 EVEX_CD8<32, CD8VT1>;
4435 let hasSideEffects = 0 in
4436 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4437 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4438 !strconcat(OpcodeStr,
4439 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4442 // Intrinsic operation, reg.
4443 let isCodeGenOnly = 1 in
4444 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4445 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4446 !strconcat(OpcodeStr,
4447 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4448 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4451 // Intrinsic operation, mem.
4452 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4453 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4454 !strconcat(OpcodeStr,
4455 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4457 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4458 VEX_W, EVEX_CD8<64, CD8VT1>;
4459 } // ExeDomain = GenericDomain
4462 multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4463 X86MemOperand x86memop, RegisterClass RC,
4464 PatFrag mem_frag, Domain d> {
4465 let ExeDomain = d in {
4466 // Intrinsic operation, reg.
4467 // Vector intrinsic operation, reg
4468 def r : AVX512AIi8<opc, MRMSrcReg,
4469 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4470 !strconcat(OpcodeStr,
4471 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4474 // Vector intrinsic operation, mem
4475 def m : AVX512AIi8<opc, MRMSrcMem,
4476 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4477 !strconcat(OpcodeStr,
4478 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4484 defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4485 memopv16f32, SSEPackedSingle>, EVEX_V512,
4486 EVEX_CD8<32, CD8VF>;
4488 def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
4489 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
4491 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4494 defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4495 memopv8f64, SSEPackedDouble>, EVEX_V512,
4496 VEX_W, EVEX_CD8<64, CD8VF>;
4498 def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
4499 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
4501 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4503 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4504 Operand x86memop, RegisterClass RC, Domain d> {
4505 let ExeDomain = d in {
4506 def r : AVX512AIi8<opc, MRMSrcReg,
4507 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4508 !strconcat(OpcodeStr,
4509 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4512 def m : AVX512AIi8<opc, MRMSrcMem,
4513 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4514 !strconcat(OpcodeStr,
4515 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4520 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4521 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4523 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4524 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4526 def : Pat<(ffloor FR32X:$src),
4527 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4528 def : Pat<(f64 (ffloor FR64X:$src)),
4529 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4530 def : Pat<(f32 (fnearbyint FR32X:$src)),
4531 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4532 def : Pat<(f64 (fnearbyint FR64X:$src)),
4533 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4534 def : Pat<(f32 (fceil FR32X:$src)),
4535 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4536 def : Pat<(f64 (fceil FR64X:$src)),
4537 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4538 def : Pat<(f32 (frint FR32X:$src)),
4539 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4540 def : Pat<(f64 (frint FR64X:$src)),
4541 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4542 def : Pat<(f32 (ftrunc FR32X:$src)),
4543 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4544 def : Pat<(f64 (ftrunc FR64X:$src)),
4545 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4547 def : Pat<(v16f32 (ffloor VR512:$src)),
4548 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
4549 def : Pat<(v16f32 (fnearbyint VR512:$src)),
4550 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
4551 def : Pat<(v16f32 (fceil VR512:$src)),
4552 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
4553 def : Pat<(v16f32 (frint VR512:$src)),
4554 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
4555 def : Pat<(v16f32 (ftrunc VR512:$src)),
4556 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
4558 def : Pat<(v8f64 (ffloor VR512:$src)),
4559 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
4560 def : Pat<(v8f64 (fnearbyint VR512:$src)),
4561 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
4562 def : Pat<(v8f64 (fceil VR512:$src)),
4563 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
4564 def : Pat<(v8f64 (frint VR512:$src)),
4565 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
4566 def : Pat<(v8f64 (ftrunc VR512:$src)),
4567 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
4569 //-------------------------------------------------
4570 // Integer truncate and extend operations
4571 //-------------------------------------------------
4573 multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4574 RegisterClass dstRC, RegisterClass srcRC,
4575 RegisterClass KRC, X86MemOperand x86memop> {
4576 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4578 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4581 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4582 (ins KRC:$mask, srcRC:$src),
4583 !strconcat(OpcodeStr,
4584 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4587 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4588 (ins KRC:$mask, srcRC:$src),
4589 !strconcat(OpcodeStr,
4590 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4593 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
4594 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4597 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4598 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4599 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4603 defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4604 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4605 defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4606 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4607 defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4608 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4609 defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4610 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4611 defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4612 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4613 defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4614 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4615 defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4616 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4617 defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4618 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4619 defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4620 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4621 defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4622 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4623 defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4624 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4625 defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4626 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4627 defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4628 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4629 defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4630 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4631 defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4632 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4634 def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4635 def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4636 def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4637 def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4638 def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4640 def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4641 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
4642 def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4643 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
4644 def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4645 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
4646 def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4647 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
4650 multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4651 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4652 PatFrag mem_frag, X86MemOperand x86memop,
4653 ValueType OpVT, ValueType InVT> {
4655 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4657 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4658 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
4660 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4661 (ins KRC:$mask, SrcRC:$src),
4662 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4665 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4666 (ins KRC:$mask, SrcRC:$src),
4667 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4670 let mayLoad = 1 in {
4671 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4672 (ins x86memop:$src),
4673 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4675 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4678 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4679 (ins KRC:$mask, x86memop:$src),
4680 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4684 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4685 (ins KRC:$mask, x86memop:$src),
4686 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4692 defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
4693 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4695 defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
4696 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4698 defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
4699 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4700 EVEX_CD8<16, CD8VH>;
4701 defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
4702 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4703 EVEX_CD8<16, CD8VQ>;
4704 defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
4705 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4706 EVEX_CD8<32, CD8VH>;
4708 defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
4709 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4711 defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
4712 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4714 defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
4715 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4716 EVEX_CD8<16, CD8VH>;
4717 defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
4718 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4719 EVEX_CD8<16, CD8VQ>;
4720 defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
4721 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4722 EVEX_CD8<32, CD8VH>;
4724 //===----------------------------------------------------------------------===//
4725 // GATHER - SCATTER Operations
4727 multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4728 RegisterClass RC, X86MemOperand memop> {
4730 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4731 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4732 (ins RC:$src1, KRC:$mask, memop:$src2),
4733 !strconcat(OpcodeStr,
4734 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4738 let ExeDomain = SSEPackedDouble in {
4739 defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4740 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4741 defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4742 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4745 let ExeDomain = SSEPackedSingle in {
4746 defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4747 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4748 defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4749 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4752 defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4753 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4754 defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4755 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4757 defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4758 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4759 defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4760 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4762 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4763 RegisterClass RC, X86MemOperand memop> {
4764 let mayStore = 1, Constraints = "$mask = $mask_wb" in
4765 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4766 (ins memop:$dst, KRC:$mask, RC:$src2),
4767 !strconcat(OpcodeStr,
4768 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4772 let ExeDomain = SSEPackedDouble in {
4773 defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4774 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4775 defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4776 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4779 let ExeDomain = SSEPackedSingle in {
4780 defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4781 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4782 defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4783 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4786 defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4787 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4788 defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4789 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4791 defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4792 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4793 defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4794 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4797 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4798 RegisterClass KRC, X86MemOperand memop> {
4799 let Predicates = [HasPFI], hasSideEffects = 1 in
4800 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4801 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4805 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4806 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4808 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4809 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4811 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4812 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4814 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4815 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4817 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4818 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4820 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4821 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4823 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4824 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4826 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4827 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4829 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4830 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4832 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4833 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4835 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4836 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4838 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4839 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4841 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4842 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4844 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4845 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4847 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4848 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4850 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4851 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4852 //===----------------------------------------------------------------------===//
4853 // VSHUFPS - VSHUFPD Operations
4855 multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4856 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4858 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4859 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4860 !strconcat(OpcodeStr,
4861 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4862 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4863 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4864 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
4865 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4866 (ins RC:$src1, RC:$src2, i8imm:$src3),
4867 !strconcat(OpcodeStr,
4868 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4869 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4870 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4871 EVEX_4V, Sched<[WriteShuffle]>;
4874 defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
4875 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
4876 defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
4877 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4879 def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4880 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4881 def : Pat<(v16i32 (X86Shufp VR512:$src1,
4882 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4883 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4885 def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4886 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4887 def : Pat<(v8i64 (X86Shufp VR512:$src1,
4888 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4889 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4891 multiclass avx512_valign<X86VectorVTInfo _> {
4892 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4893 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4895 "$src3, $src2, $src1", "$src1, $src2, $src3",
4896 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4898 _.VT, _.RC, _.KRCWM>,
4899 AVX512AIi8Base, EVEX_4V;
4901 // Also match valign of packed floats.
4902 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4903 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
4906 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4907 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4908 !strconcat("valign"##_.Suffix,
4909 " \t{$src3, $src2, $src1, $dst|"
4910 "$dst, $src1, $src2, $src3}"),
4913 defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4914 defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4916 // Helper fragments to match sext vXi1 to vXiY.
4917 def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4918 def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4920 multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4921 RegisterClass KRC, RegisterClass RC,
4922 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4924 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4925 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4927 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4928 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4930 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4931 !strconcat(OpcodeStr,
4932 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4934 let mayLoad = 1 in {
4935 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4936 (ins x86memop:$src),
4937 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4939 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4940 (ins KRC:$mask, x86memop:$src),
4941 !strconcat(OpcodeStr,
4942 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4944 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4945 (ins KRC:$mask, x86memop:$src),
4946 !strconcat(OpcodeStr,
4947 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4949 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4950 (ins x86scalar_mop:$src),
4951 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4952 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4954 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4955 (ins KRC:$mask, x86scalar_mop:$src),
4956 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4957 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4958 []>, EVEX, EVEX_B, EVEX_K;
4959 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4960 (ins KRC:$mask, x86scalar_mop:$src),
4961 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4962 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4964 []>, EVEX, EVEX_B, EVEX_KZ;
4968 defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4969 i512mem, i32mem, "{1to16}">, EVEX_V512,
4970 EVEX_CD8<32, CD8VF>;
4971 defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4972 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4973 EVEX_CD8<64, CD8VF>;
4976 (bc_v16i32 (v16i1sextv16i32)),
4977 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4978 (VPABSDZrr VR512:$src)>;
4980 (bc_v8i64 (v8i1sextv8i64)),
4981 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4982 (VPABSQZrr VR512:$src)>;
4984 def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4985 (v16i32 immAllZerosV), (i16 -1))),
4986 (VPABSDZrr VR512:$src)>;
4987 def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4988 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
4989 (VPABSQZrr VR512:$src)>;
4991 multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
4992 RegisterClass RC, RegisterClass KRC,
4993 X86MemOperand x86memop,
4994 X86MemOperand x86scalar_mop, string BrdcstStr> {
4995 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4997 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
4999 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5000 (ins x86memop:$src),
5001 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
5003 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5004 (ins x86scalar_mop:$src),
5005 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
5006 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
5008 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
5009 (ins KRC:$mask, RC:$src),
5010 !strconcat(OpcodeStr,
5011 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
5013 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5014 (ins KRC:$mask, x86memop:$src),
5015 !strconcat(OpcodeStr,
5016 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
5018 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5019 (ins KRC:$mask, x86scalar_mop:$src),
5020 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
5021 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
5023 []>, EVEX, EVEX_KZ, EVEX_B;
5025 let Constraints = "$src1 = $dst" in {
5026 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
5027 (ins RC:$src1, KRC:$mask, RC:$src2),
5028 !strconcat(OpcodeStr,
5029 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
5031 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5032 (ins RC:$src1, KRC:$mask, x86memop:$src2),
5033 !strconcat(OpcodeStr,
5034 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
5036 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5037 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
5038 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
5039 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
5040 []>, EVEX, EVEX_K, EVEX_B;
5044 let Predicates = [HasCDI] in {
5045 defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
5046 i512mem, i32mem, "{1to16}">,
5047 EVEX_V512, EVEX_CD8<32, CD8VF>;
5050 defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
5051 i512mem, i64mem, "{1to8}">,
5052 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5056 def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
5058 (VPCONFLICTDrrk VR512:$src1,
5059 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5061 def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
5063 (VPCONFLICTQrrk VR512:$src1,
5064 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5066 let Predicates = [HasCDI] in {
5067 defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
5068 i512mem, i32mem, "{1to16}">,
5069 EVEX_V512, EVEX_CD8<32, CD8VF>;
5072 defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
5073 i512mem, i64mem, "{1to8}">,
5074 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5078 def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
5080 (VPLZCNTDrrk VR512:$src1,
5081 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5083 def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
5085 (VPLZCNTQrrk VR512:$src1,
5086 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5088 def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
5089 (VPLZCNTDrm addr:$src)>;
5090 def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
5091 (VPLZCNTDrr VR512:$src)>;
5092 def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
5093 (VPLZCNTQrm addr:$src)>;
5094 def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
5095 (VPLZCNTQrr VR512:$src)>;
5097 def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5098 def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5099 def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
5101 def : Pat<(store VK1:$src, addr:$dst),
5102 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
5104 def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
5105 (truncstore node:$val, node:$ptr), [{
5106 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
5109 def : Pat<(truncstorei1 GR8:$src, addr:$dst),
5110 (MOV8mr addr:$dst, GR8:$src)>;