1 //===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
17 //===----------------------------------------------------------------------===//
18 // SSE 1 & 2 Instructions Classes
19 //===----------------------------------------------------------------------===//
21 /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
22 multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
23 RegisterClass RC, X86MemOperand x86memop,
25 let isCommutable = 1 in {
26 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
28 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
29 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
30 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
32 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
34 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
36 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
39 /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
40 multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
41 string asm, string SSEVer, string FPSizeStr,
42 Operand memopr, ComplexPattern mem_cpat,
44 def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
46 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
47 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
48 [(set RC:$dst, (!cast<Intrinsic>(
49 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
50 RC:$src1, RC:$src2))]>;
51 def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
53 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
54 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
55 [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
56 SSEVer, "_", OpcodeStr, FPSizeStr))
57 RC:$src1, mem_cpat:$src2))]>;
60 /// sse12_fp_packed - SSE 1 & 2 packed instructions class
61 multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
62 RegisterClass RC, ValueType vt,
63 X86MemOperand x86memop, PatFrag mem_frag,
64 Domain d, bit Is2Addr = 1> {
65 let isCommutable = 1 in
66 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
68 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
69 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
70 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_DEFAULT, d>;
72 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
74 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
75 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
76 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
80 /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
81 multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
82 string OpcodeStr, X86MemOperand x86memop,
83 list<dag> pat_rr, list<dag> pat_rm,
85 bit rr_hasSideEffects = 0> {
86 let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
87 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
89 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
90 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
91 pat_rr, IIC_DEFAULT, d>;
92 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
94 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
95 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
96 pat_rm, IIC_DEFAULT, d>;
99 /// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
100 multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
101 string asm, string SSEVer, string FPSizeStr,
102 X86MemOperand x86memop, PatFrag mem_frag,
103 Domain d, bit Is2Addr = 1> {
104 def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
106 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
107 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
108 [(set RC:$dst, (!cast<Intrinsic>(
109 !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
110 RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
111 def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
113 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
114 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
115 [(set RC:$dst, (!cast<Intrinsic>(
116 !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
117 RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
120 //===----------------------------------------------------------------------===//
121 // Non-instruction patterns
122 //===----------------------------------------------------------------------===//
124 // A vector extract of the first f32/f64 position is a subregister copy
125 def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
126 (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
127 def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
128 (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
130 // A 128-bit subvector extract from the first 256-bit vector position
131 // is a subregister copy that needs no instruction.
132 def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))),
133 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
134 def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))),
135 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
137 def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
138 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
139 def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
140 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
142 def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
143 (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
144 def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
145 (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
147 // A 128-bit subvector insert to the first 256-bit vector position
148 // is a subregister copy that needs no instruction.
149 def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
150 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
151 def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
152 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
153 def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
154 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
155 def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
156 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
157 def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
158 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
159 def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
160 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
162 // Implicitly promote a 32-bit scalar to a vector.
163 def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
164 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
165 def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
166 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
167 // Implicitly promote a 64-bit scalar to a vector.
168 def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
169 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
170 def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
171 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
173 // Bitcasts between 128-bit vector types. Return the original type since
174 // no instruction is needed for the conversion
175 let Predicates = [HasSSE2] in {
176 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
177 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
178 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
179 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
180 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
181 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
182 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
183 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
184 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
185 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
186 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
187 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
188 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
189 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
190 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
191 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
192 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
193 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
194 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
195 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
196 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
197 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
198 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
199 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
200 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
201 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
202 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
203 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
204 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
205 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
208 // Bitcasts between 256-bit vector types. Return the original type since
209 // no instruction is needed for the conversion
210 let Predicates = [HasAVX] in {
211 def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
212 def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
213 def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
214 def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
215 def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
216 def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
217 def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
218 def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
219 def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
220 def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
221 def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
222 def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
223 def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
224 def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
225 def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
226 def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
227 def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
228 def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
229 def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
230 def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
231 def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
232 def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
233 def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
234 def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
235 def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
236 def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
237 def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
238 def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
239 def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
240 def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
243 // Alias instructions that map fld0 to pxor for sse.
244 // This is expanded by ExpandPostRAPseudos.
245 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
247 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
248 [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
249 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
250 [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
253 //===----------------------------------------------------------------------===//
254 // AVX & SSE - Zero/One Vectors
255 //===----------------------------------------------------------------------===//
257 // Alias instruction that maps zero vector to pxor / xorp* for sse.
258 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
259 // swizzled by ExecutionDepsFix to pxor.
260 // We set canFoldAsLoad because this can be converted to a constant-pool
261 // load of an all-zeros value if folding it would be beneficial.
262 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
263 isPseudo = 1, neverHasSideEffects = 1 in {
264 def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
267 def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
268 def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
269 def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
270 def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
271 def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
272 def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
275 // The same as done above but for AVX. The 256-bit ISA does not support PI,
276 // and doesn't need it because on sandy bridge the register is set to zero
277 // at the rename stage without using any execution unit, so SET0PSY
278 // and SET0PDY can be used for vector int instructions without penalty
279 // FIXME: Change encoding to pseudo! This is blocked right now by the x86
280 // JIT implementatioan, it does not expand the instructions below like
281 // X86MCInstLower does.
282 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
283 isCodeGenOnly = 1 in {
284 let Predicates = [HasAVX] in {
285 def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
286 [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
287 def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
288 [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
290 let Predicates = [HasAVX2], neverHasSideEffects = 1 in
291 def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
295 let Predicates = [HasAVX2], AddedComplexity = 5 in {
296 def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
297 def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
298 def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
299 def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
302 // AVX has no support for 256-bit integer instructions, but since the 128-bit
303 // VPXOR instruction writes zero to its upper part, it's safe build zeros.
304 def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
305 def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
306 (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
308 def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
309 def : Pat<(bc_v16i16 (v8f32 immAllZerosV)),
310 (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
312 def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
313 def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
314 (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
316 def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
317 def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
318 (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
320 // We set canFoldAsLoad because this can be converted to a constant-pool
321 // load of an all-ones value if folding it would be beneficial.
322 // FIXME: Change encoding to pseudo! This is blocked right now by the x86
323 // JIT implementation, it does not expand the instructions below like
324 // X86MCInstLower does.
325 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
326 isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
327 let Predicates = [HasAVX] in
328 def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
329 [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
330 def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
331 [(set VR128:$dst, (v4i32 immAllOnesV))]>;
332 let Predicates = [HasAVX2] in
333 def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
334 [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
338 //===----------------------------------------------------------------------===//
339 // SSE 1 & 2 - Move FP Scalar Instructions
341 // Move Instructions. Register-to-register movss/movsd is not used for FR32/64
342 // register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
343 // is used instead. Register-to-register movss/movsd is not modeled as an
344 // INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
345 // in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
346 //===----------------------------------------------------------------------===//
348 class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
349 SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
350 [(set VR128:$dst, (vt (OpNode VR128:$src1,
351 (scalar_to_vector RC:$src2))))]>;
353 // Loading from memory automatically zeroing upper bits.
354 class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
355 PatFrag mem_pat, string OpcodeStr> :
356 SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
357 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
358 [(set RC:$dst, (mem_pat addr:$src))]>;
361 def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
362 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
364 def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
365 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
368 // For the disassembler
369 let isCodeGenOnly = 1 in {
370 def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
371 (ins VR128:$src1, FR32:$src2),
372 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
374 def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
375 (ins VR128:$src1, FR64:$src2),
376 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
380 let canFoldAsLoad = 1, isReMaterializable = 1 in {
381 def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
383 let AddedComplexity = 20 in
384 def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
388 def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
389 "movss\t{$src, $dst|$dst, $src}",
390 [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG;
391 def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
392 "movsd\t{$src, $dst|$dst, $src}",
393 [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG;
396 let Constraints = "$src1 = $dst" in {
397 def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
398 "movss\t{$src2, $dst|$dst, $src2}">, XS;
399 def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
400 "movsd\t{$src2, $dst|$dst, $src2}">, XD;
402 // For the disassembler
403 let isCodeGenOnly = 1 in {
404 def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
405 (ins VR128:$src1, FR32:$src2),
406 "movss\t{$src2, $dst|$dst, $src2}", []>, XS;
407 def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
408 (ins VR128:$src1, FR64:$src2),
409 "movsd\t{$src2, $dst|$dst, $src2}", []>, XD;
413 let canFoldAsLoad = 1, isReMaterializable = 1 in {
414 def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
416 let AddedComplexity = 20 in
417 def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
420 def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
421 "movss\t{$src, $dst|$dst, $src}",
422 [(store FR32:$src, addr:$dst)]>;
423 def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
424 "movsd\t{$src, $dst|$dst, $src}",
425 [(store FR64:$src, addr:$dst)]>;
428 let Predicates = [HasAVX] in {
429 let AddedComplexity = 15 in {
430 // Move scalar to XMM zero-extended, zeroing a VR128 then do a
431 // MOVS{S,D} to the lower bits.
432 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
433 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
434 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
435 (VMOVSSrr (v4f32 (V_SET0)),
436 (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
437 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
438 (VMOVSSrr (v4i32 (V_SET0)),
439 (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
440 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
441 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
443 // Move low f32 and clear high bits.
444 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
445 (SUBREG_TO_REG (i32 0),
446 (VMOVSSrr (v4f32 (V_SET0)),
447 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
448 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
449 (SUBREG_TO_REG (i32 0),
450 (VMOVSSrr (v4i32 (V_SET0)),
451 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
454 let AddedComplexity = 20 in {
455 // MOVSSrm zeros the high parts of the register; represent this
456 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
457 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
458 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
459 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
460 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
461 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
462 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
464 // MOVSDrm zeros the high parts of the register; represent this
465 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
466 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
467 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
468 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
469 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
470 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
471 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
472 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
473 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
474 def : Pat<(v2f64 (X86vzload addr:$src)),
475 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
477 // Represent the same patterns above but in the form they appear for
479 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
480 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
481 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
482 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
483 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
484 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
485 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
486 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
487 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
489 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
490 (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
491 (SUBREG_TO_REG (i32 0),
492 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
494 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
495 (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
496 (SUBREG_TO_REG (i64 0),
497 (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
499 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
500 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
501 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
503 // Move low f64 and clear high bits.
504 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
505 (SUBREG_TO_REG (i32 0),
506 (VMOVSDrr (v2f64 (V_SET0)),
507 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
509 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
510 (SUBREG_TO_REG (i32 0),
511 (VMOVSDrr (v2i64 (V_SET0)),
512 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
514 // Extract and store.
515 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
518 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
519 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
522 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
524 // Shuffle with VMOVSS
525 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
526 (VMOVSSrr (v4i32 VR128:$src1),
527 (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
528 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
529 (VMOVSSrr (v4f32 VR128:$src1),
530 (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
533 def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
534 (SUBREG_TO_REG (i32 0),
535 (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
536 (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
537 def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
538 (SUBREG_TO_REG (i32 0),
539 (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
540 (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
542 // Shuffle with VMOVSD
543 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
544 (VMOVSDrr (v2i64 VR128:$src1),
545 (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
546 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
547 (VMOVSDrr (v2f64 VR128:$src1),
548 (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
549 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
550 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
552 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
553 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
557 def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
558 (SUBREG_TO_REG (i32 0),
559 (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
560 (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
561 def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
562 (SUBREG_TO_REG (i32 0),
563 (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
564 (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
567 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
568 // is during lowering, where it's not possible to recognize the fold cause
569 // it has two uses through a bitcast. One use disappears at isel time and the
570 // fold opportunity reappears.
571 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
572 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
574 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
575 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
577 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
578 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
580 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
581 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
585 let Predicates = [HasSSE1] in {
586 let AddedComplexity = 15 in {
587 // Move scalar to XMM zero-extended, zeroing a VR128 then do a
588 // MOVSS to the lower bits.
589 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
590 (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
591 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
592 (MOVSSrr (v4f32 (V_SET0)),
593 (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
594 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
595 (MOVSSrr (v4i32 (V_SET0)),
596 (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
599 let AddedComplexity = 20 in {
600 // MOVSSrm zeros the high parts of the register; represent this
601 // with SUBREG_TO_REG.
602 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
603 (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
604 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
605 (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
606 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
607 (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
610 // Extract and store.
611 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
614 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
616 // Shuffle with MOVSS
617 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
618 (MOVSSrr (v4i32 VR128:$src1),
619 (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
620 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
621 (MOVSSrr (v4f32 VR128:$src1),
622 (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
625 let Predicates = [HasSSE2] in {
626 let AddedComplexity = 15 in {
627 // Move scalar to XMM zero-extended, zeroing a VR128 then do a
628 // MOVSD to the lower bits.
629 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
630 (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
633 let AddedComplexity = 20 in {
634 // MOVSDrm zeros the high parts of the register; represent this
635 // with SUBREG_TO_REG.
636 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
637 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
638 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
639 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
640 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
641 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
642 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
643 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
644 def : Pat<(v2f64 (X86vzload addr:$src)),
645 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
648 // Extract and store.
649 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
652 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
654 // Shuffle with MOVSD
655 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
656 (MOVSDrr (v2i64 VR128:$src1),
657 (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
658 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
659 (MOVSDrr (v2f64 VR128:$src1),
660 (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
661 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
662 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
663 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
664 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
666 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
667 // is during lowering, where it's not possible to recognize the fold cause
668 // it has two uses through a bitcast. One use disappears at isel time and the
669 // fold opportunity reappears.
670 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
671 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
672 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
673 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
674 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
675 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
676 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
677 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
680 //===----------------------------------------------------------------------===//
681 // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
682 //===----------------------------------------------------------------------===//
684 multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
685 X86MemOperand x86memop, PatFrag ld_frag,
686 string asm, Domain d,
687 bit IsReMaterializable = 1> {
688 let neverHasSideEffects = 1 in
689 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
690 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], IIC_DEFAULT, d>;
691 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
692 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
693 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
694 [(set RC:$dst, (ld_frag addr:$src))], IIC_DEFAULT, d>;
697 defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
698 "movaps", SSEPackedSingle>, TB, VEX;
699 defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
700 "movapd", SSEPackedDouble>, TB, OpSize, VEX;
701 defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
702 "movups", SSEPackedSingle>, TB, VEX;
703 defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
704 "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
706 defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
707 "movaps", SSEPackedSingle>, TB, VEX;
708 defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
709 "movapd", SSEPackedDouble>, TB, OpSize, VEX;
710 defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
711 "movups", SSEPackedSingle>, TB, VEX;
712 defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
713 "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
714 defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
715 "movaps", SSEPackedSingle>, TB;
716 defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
717 "movapd", SSEPackedDouble>, TB, OpSize;
718 defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
719 "movups", SSEPackedSingle>, TB;
720 defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
721 "movupd", SSEPackedDouble, 0>, TB, OpSize;
723 def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
724 "movaps\t{$src, $dst|$dst, $src}",
725 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
726 def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
727 "movapd\t{$src, $dst|$dst, $src}",
728 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
729 def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
730 "movups\t{$src, $dst|$dst, $src}",
731 [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
732 def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
733 "movupd\t{$src, $dst|$dst, $src}",
734 [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
735 def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
736 "movaps\t{$src, $dst|$dst, $src}",
737 [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX;
738 def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
739 "movapd\t{$src, $dst|$dst, $src}",
740 [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX;
741 def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
742 "movups\t{$src, $dst|$dst, $src}",
743 [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
744 def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
745 "movupd\t{$src, $dst|$dst, $src}",
746 [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
749 let isCodeGenOnly = 1 in {
750 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
752 "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
753 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
755 "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
756 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
758 "movups\t{$src, $dst|$dst, $src}", []>, VEX;
759 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
761 "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
762 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
764 "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
765 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
767 "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
768 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
770 "movups\t{$src, $dst|$dst, $src}", []>, VEX;
771 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
773 "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
776 let Predicates = [HasAVX] in {
777 def : Pat<(v8i32 (X86vzmovl
778 (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
779 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
780 def : Pat<(v4i64 (X86vzmovl
781 (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
782 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
783 def : Pat<(v8f32 (X86vzmovl
784 (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
785 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
786 def : Pat<(v4f64 (X86vzmovl
787 (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
788 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
792 def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
793 (VMOVUPSYmr addr:$dst, VR256:$src)>;
794 def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
795 (VMOVUPDYmr addr:$dst, VR256:$src)>;
797 def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
798 "movaps\t{$src, $dst|$dst, $src}",
799 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
800 def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
801 "movapd\t{$src, $dst|$dst, $src}",
802 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
803 def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
804 "movups\t{$src, $dst|$dst, $src}",
805 [(store (v4f32 VR128:$src), addr:$dst)]>;
806 def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
807 "movupd\t{$src, $dst|$dst, $src}",
808 [(store (v2f64 VR128:$src), addr:$dst)]>;
811 let isCodeGenOnly = 1 in {
812 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
813 "movaps\t{$src, $dst|$dst, $src}", []>;
814 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
815 "movapd\t{$src, $dst|$dst, $src}", []>;
816 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
817 "movups\t{$src, $dst|$dst, $src}", []>;
818 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
819 "movupd\t{$src, $dst|$dst, $src}", []>;
822 let Predicates = [HasAVX] in {
823 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
824 (VMOVUPSmr addr:$dst, VR128:$src)>;
825 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
826 (VMOVUPDmr addr:$dst, VR128:$src)>;
829 let Predicates = [HasSSE1] in
830 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
831 (MOVUPSmr addr:$dst, VR128:$src)>;
832 let Predicates = [HasSSE2] in
833 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
834 (MOVUPDmr addr:$dst, VR128:$src)>;
836 // Use vmovaps/vmovups for AVX integer load/store.
837 let Predicates = [HasAVX] in {
838 // 128-bit load/store
839 def : Pat<(alignedloadv2i64 addr:$src),
840 (VMOVAPSrm addr:$src)>;
841 def : Pat<(loadv2i64 addr:$src),
842 (VMOVUPSrm addr:$src)>;
844 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
845 (VMOVAPSmr addr:$dst, VR128:$src)>;
846 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
847 (VMOVAPSmr addr:$dst, VR128:$src)>;
848 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
849 (VMOVAPSmr addr:$dst, VR128:$src)>;
850 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
851 (VMOVAPSmr addr:$dst, VR128:$src)>;
852 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
853 (VMOVUPSmr addr:$dst, VR128:$src)>;
854 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
855 (VMOVUPSmr addr:$dst, VR128:$src)>;
856 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
857 (VMOVUPSmr addr:$dst, VR128:$src)>;
858 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
859 (VMOVUPSmr addr:$dst, VR128:$src)>;
861 // 256-bit load/store
862 def : Pat<(alignedloadv4i64 addr:$src),
863 (VMOVAPSYrm addr:$src)>;
864 def : Pat<(loadv4i64 addr:$src),
865 (VMOVUPSYrm addr:$src)>;
866 def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
867 (VMOVAPSYmr addr:$dst, VR256:$src)>;
868 def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
869 (VMOVAPSYmr addr:$dst, VR256:$src)>;
870 def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
871 (VMOVAPSYmr addr:$dst, VR256:$src)>;
872 def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
873 (VMOVAPSYmr addr:$dst, VR256:$src)>;
874 def : Pat<(store (v4i64 VR256:$src), addr:$dst),
875 (VMOVUPSYmr addr:$dst, VR256:$src)>;
876 def : Pat<(store (v8i32 VR256:$src), addr:$dst),
877 (VMOVUPSYmr addr:$dst, VR256:$src)>;
878 def : Pat<(store (v16i16 VR256:$src), addr:$dst),
879 (VMOVUPSYmr addr:$dst, VR256:$src)>;
880 def : Pat<(store (v32i8 VR256:$src), addr:$dst),
881 (VMOVUPSYmr addr:$dst, VR256:$src)>;
884 // Use movaps / movups for SSE integer load / store (one byte shorter).
885 // The instructions selected below are then converted to MOVDQA/MOVDQU
886 // during the SSE domain pass.
887 let Predicates = [HasSSE1] in {
888 def : Pat<(alignedloadv2i64 addr:$src),
889 (MOVAPSrm addr:$src)>;
890 def : Pat<(loadv2i64 addr:$src),
891 (MOVUPSrm addr:$src)>;
893 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
894 (MOVAPSmr addr:$dst, VR128:$src)>;
895 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
896 (MOVAPSmr addr:$dst, VR128:$src)>;
897 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
898 (MOVAPSmr addr:$dst, VR128:$src)>;
899 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
900 (MOVAPSmr addr:$dst, VR128:$src)>;
901 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
902 (MOVUPSmr addr:$dst, VR128:$src)>;
903 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
904 (MOVUPSmr addr:$dst, VR128:$src)>;
905 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
906 (MOVUPSmr addr:$dst, VR128:$src)>;
907 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
908 (MOVUPSmr addr:$dst, VR128:$src)>;
911 // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
912 // bits are disregarded. FIXME: Set encoding to pseudo!
913 let neverHasSideEffects = 1 in {
914 def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
915 "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
916 def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
917 "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
918 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
919 "movaps\t{$src, $dst|$dst, $src}", []>;
920 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
921 "movapd\t{$src, $dst|$dst, $src}", []>;
924 // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
925 // bits are disregarded. FIXME: Set encoding to pseudo!
926 let canFoldAsLoad = 1, isReMaterializable = 1 in {
927 let isCodeGenOnly = 1 in {
928 def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
929 "movaps\t{$src, $dst|$dst, $src}",
930 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX;
931 def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
932 "movapd\t{$src, $dst|$dst, $src}",
933 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX;
935 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
936 "movaps\t{$src, $dst|$dst, $src}",
937 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
938 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
939 "movapd\t{$src, $dst|$dst, $src}",
940 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
943 //===----------------------------------------------------------------------===//
944 // SSE 1 & 2 - Move Low packed FP Instructions
945 //===----------------------------------------------------------------------===//
947 multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
948 SDNode psnode, SDNode pdnode, string base_opc,
950 def PSrm : PI<opc, MRMSrcMem,
951 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
952 !strconcat(base_opc, "s", asm_opr),
955 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
956 IIC_DEFAULT, SSEPackedSingle>, TB;
958 def PDrm : PI<opc, MRMSrcMem,
959 (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
960 !strconcat(base_opc, "d", asm_opr),
961 [(set RC:$dst, (v2f64 (pdnode RC:$src1,
962 (scalar_to_vector (loadf64 addr:$src2)))))],
963 IIC_DEFAULT, SSEPackedDouble>, TB, OpSize;
966 let AddedComplexity = 20 in {
967 defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
968 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
970 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
971 defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
972 "\t{$src2, $dst|$dst, $src2}">;
975 def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
976 "movlps\t{$src, $dst|$dst, $src}",
977 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
978 (iPTR 0))), addr:$dst)]>, VEX;
979 def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
980 "movlpd\t{$src, $dst|$dst, $src}",
981 [(store (f64 (vector_extract (v2f64 VR128:$src),
982 (iPTR 0))), addr:$dst)]>, VEX;
983 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
984 "movlps\t{$src, $dst|$dst, $src}",
985 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
986 (iPTR 0))), addr:$dst)]>;
987 def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
988 "movlpd\t{$src, $dst|$dst, $src}",
989 [(store (f64 (vector_extract (v2f64 VR128:$src),
990 (iPTR 0))), addr:$dst)]>;
992 let Predicates = [HasAVX] in {
993 // Shuffle with VMOVLPS
994 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
995 (VMOVLPSrm VR128:$src1, addr:$src2)>;
996 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
997 (VMOVLPSrm VR128:$src1, addr:$src2)>;
999 // Shuffle with VMOVLPD
1000 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1001 (VMOVLPDrm VR128:$src1, addr:$src2)>;
1002 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1003 (VMOVLPDrm VR128:$src1, addr:$src2)>;
1006 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
1008 (VMOVLPSmr addr:$src1, VR128:$src2)>;
1009 def : Pat<(store (v4i32 (X86Movlps
1010 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
1011 (VMOVLPSmr addr:$src1, VR128:$src2)>;
1012 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1014 (VMOVLPDmr addr:$src1, VR128:$src2)>;
1015 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1017 (VMOVLPDmr addr:$src1, VR128:$src2)>;
1020 let Predicates = [HasSSE1] in {
1021 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
1022 def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
1023 (iPTR 0))), addr:$src1),
1024 (MOVLPSmr addr:$src1, VR128:$src2)>;
1026 // Shuffle with MOVLPS
1027 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
1028 (MOVLPSrm VR128:$src1, addr:$src2)>;
1029 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
1030 (MOVLPSrm VR128:$src1, addr:$src2)>;
1031 def : Pat<(X86Movlps VR128:$src1,
1032 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1033 (MOVLPSrm VR128:$src1, addr:$src2)>;
1036 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
1038 (MOVLPSmr addr:$src1, VR128:$src2)>;
1039 def : Pat<(store (v4i32 (X86Movlps
1040 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
1042 (MOVLPSmr addr:$src1, VR128:$src2)>;
1045 let Predicates = [HasSSE2] in {
1046 // Shuffle with MOVLPD
1047 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1048 (MOVLPDrm VR128:$src1, addr:$src2)>;
1049 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1050 (MOVLPDrm VR128:$src1, addr:$src2)>;
1053 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1055 (MOVLPDmr addr:$src1, VR128:$src2)>;
1056 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1058 (MOVLPDmr addr:$src1, VR128:$src2)>;
1061 //===----------------------------------------------------------------------===//
1062 // SSE 1 & 2 - Move Hi packed FP Instructions
1063 //===----------------------------------------------------------------------===//
1065 let AddedComplexity = 20 in {
1066 defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
1067 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
1069 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
1070 defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
1071 "\t{$src2, $dst|$dst, $src2}">;
1074 // v2f64 extract element 1 is always custom lowered to unpack high to low
1075 // and extract element 0 so the non-store version isn't too horrible.
1076 def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1077 "movhps\t{$src, $dst|$dst, $src}",
1078 [(store (f64 (vector_extract
1079 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
1080 (bc_v2f64 (v4f32 VR128:$src))),
1081 (iPTR 0))), addr:$dst)]>, VEX;
1082 def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1083 "movhpd\t{$src, $dst|$dst, $src}",
1084 [(store (f64 (vector_extract
1085 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
1086 (iPTR 0))), addr:$dst)]>, VEX;
1087 def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1088 "movhps\t{$src, $dst|$dst, $src}",
1089 [(store (f64 (vector_extract
1090 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
1091 (bc_v2f64 (v4f32 VR128:$src))),
1092 (iPTR 0))), addr:$dst)]>;
1093 def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1094 "movhpd\t{$src, $dst|$dst, $src}",
1095 [(store (f64 (vector_extract
1096 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
1097 (iPTR 0))), addr:$dst)]>;
1099 let Predicates = [HasAVX] in {
1101 def : Pat<(X86Movlhps VR128:$src1,
1102 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1103 (VMOVHPSrm VR128:$src1, addr:$src2)>;
1104 def : Pat<(X86Movlhps VR128:$src1,
1105 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
1106 (VMOVHPSrm VR128:$src1, addr:$src2)>;
1108 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
1109 // is during lowering, where it's not possible to recognize the load fold
1110 // cause it has two uses through a bitcast. One use disappears at isel time
1111 // and the fold opportunity reappears.
1112 def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1113 (scalar_to_vector (loadf64 addr:$src2)))),
1114 (VMOVHPDrm VR128:$src1, addr:$src2)>;
1117 let Predicates = [HasSSE1] in {
1119 def : Pat<(X86Movlhps VR128:$src1,
1120 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1121 (MOVHPSrm VR128:$src1, addr:$src2)>;
1122 def : Pat<(X86Movlhps VR128:$src1,
1123 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
1124 (MOVHPSrm VR128:$src1, addr:$src2)>;
1127 let Predicates = [HasSSE2] in {
1128 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
1129 // is during lowering, where it's not possible to recognize the load fold
1130 // cause it has two uses through a bitcast. One use disappears at isel time
1131 // and the fold opportunity reappears.
1132 def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1133 (scalar_to_vector (loadf64 addr:$src2)))),
1134 (MOVHPDrm VR128:$src1, addr:$src2)>;
1137 //===----------------------------------------------------------------------===//
1138 // SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
1139 //===----------------------------------------------------------------------===//
1141 let AddedComplexity = 20 in {
1142 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
1143 (ins VR128:$src1, VR128:$src2),
1144 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1146 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
1148 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
1149 (ins VR128:$src1, VR128:$src2),
1150 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1152 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
1155 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
1156 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
1157 (ins VR128:$src1, VR128:$src2),
1158 "movlhps\t{$src2, $dst|$dst, $src2}",
1160 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>;
1161 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
1162 (ins VR128:$src1, VR128:$src2),
1163 "movhlps\t{$src2, $dst|$dst, $src2}",
1165 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>;
1168 let Predicates = [HasAVX] in {
1170 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
1171 (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
1172 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
1173 (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
1176 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
1177 (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
1180 let Predicates = [HasSSE1] in {
1182 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
1183 (MOVLHPSrr VR128:$src1, VR128:$src2)>;
1184 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
1185 (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
1188 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
1189 (MOVHLPSrr VR128:$src1, VR128:$src2)>;
1192 //===----------------------------------------------------------------------===//
1193 // SSE 1 & 2 - Conversion Instructions
1194 //===----------------------------------------------------------------------===//
1196 multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1197 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
1199 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
1200 [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
1201 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
1202 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
1205 multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1206 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
1207 string asm, Domain d> {
1208 def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
1209 [(set DstRC:$dst, (OpNode SrcRC:$src))],
1211 def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
1212 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
1216 multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1217 X86MemOperand x86memop, string asm> {
1218 let neverHasSideEffects = 1 in {
1219 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
1220 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
1222 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
1223 (ins DstRC:$src1, x86memop:$src),
1224 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
1225 } // neverHasSideEffects = 1
1228 defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
1229 "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
1231 defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
1232 "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
1234 defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
1235 "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
1237 defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
1238 "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
1239 VEX, VEX_W, VEX_LIG;
1241 // The assembler can recognize rr 64-bit instructions by seeing a rxx
1242 // register, but the same isn't true when only using memory operands,
1243 // provide other assembly "l" and "q" forms to address this explicitly
1244 // where appropriate to do so.
1245 defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
1247 defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
1248 VEX_4V, VEX_W, VEX_LIG;
1249 defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
1251 defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
1253 defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
1254 VEX_4V, VEX_W, VEX_LIG;
1256 let Predicates = [HasAVX], AddedComplexity = 1 in {
1257 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
1258 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
1259 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
1260 (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
1261 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
1262 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
1263 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
1264 (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
1266 def : Pat<(f32 (sint_to_fp GR32:$src)),
1267 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
1268 def : Pat<(f32 (sint_to_fp GR64:$src)),
1269 (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
1270 def : Pat<(f64 (sint_to_fp GR32:$src)),
1271 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
1272 def : Pat<(f64 (sint_to_fp GR64:$src)),
1273 (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
1276 defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
1277 "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
1278 defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
1279 "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
1280 defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
1281 "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
1282 defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
1283 "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
1284 defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
1285 "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
1286 defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
1287 "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
1288 defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
1289 "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
1290 defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
1291 "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
1293 // Conversion Instructions Intrinsics - Match intrinsics which expect MM
1294 // and/or XMM operand(s).
1296 multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1297 Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
1299 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
1300 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1301 [(set DstRC:$dst, (Int SrcRC:$src))]>;
1302 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
1303 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1304 [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
1307 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
1308 RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
1309 PatFrag ld_frag, string asm, bit Is2Addr = 1> {
1310 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
1312 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
1313 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1314 [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
1315 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
1316 (ins DstRC:$src1, x86memop:$src2),
1318 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
1319 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1320 [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
1323 defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
1324 f128mem, load, "cvtsd2si">, XD, VEX, VEX_LIG;
1325 defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
1326 int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
1327 XD, VEX, VEX_W, VEX_LIG;
1329 defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
1330 f128mem, load, "cvtsd2si{l}">, XD;
1331 defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
1332 f128mem, load, "cvtsd2si{q}">, XD, REX_W;
1335 defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1336 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
1337 defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1338 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
1340 defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1341 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
1342 defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1343 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
1346 let Constraints = "$src1 = $dst" in {
1347 defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1348 int_x86_sse_cvtsi2ss, i32mem, loadi32,
1350 defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1351 int_x86_sse_cvtsi642ss, i64mem, loadi64,
1352 "cvtsi2ss{q}">, XS, REX_W;
1353 defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1354 int_x86_sse2_cvtsi2sd, i32mem, loadi32,
1356 defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1357 int_x86_sse2_cvtsi642sd, i64mem, loadi64,
1358 "cvtsi2sd">, XD, REX_W;
1363 // Aliases for intrinsics
1364 defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
1365 f32mem, load, "cvttss2si">, XS, VEX;
1366 defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1367 int_x86_sse_cvttss2si64, f32mem, load,
1368 "cvttss2si">, XS, VEX, VEX_W;
1369 defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
1370 f128mem, load, "cvttsd2si">, XD, VEX;
1371 defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1372 int_x86_sse2_cvttsd2si64, f128mem, load,
1373 "cvttsd2si">, XD, VEX, VEX_W;
1374 defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
1375 f32mem, load, "cvttss2si">, XS;
1376 defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1377 int_x86_sse_cvttss2si64, f32mem, load,
1378 "cvttss2si{q}">, XS, REX_W;
1379 defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
1380 f128mem, load, "cvttsd2si">, XD;
1381 defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1382 int_x86_sse2_cvttsd2si64, f128mem, load,
1383 "cvttsd2si{q}">, XD, REX_W;
1385 let Pattern = []<dag> in {
1386 defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
1387 "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS,
1389 defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
1390 "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
1392 defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
1393 "cvtdq2ps\t{$src, $dst|$dst, $src}",
1394 SSEPackedSingle>, TB, VEX;
1395 defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
1396 "cvtdq2ps\t{$src, $dst|$dst, $src}",
1397 SSEPackedSingle>, TB, VEX;
1400 let Pattern = []<dag> in {
1401 defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
1402 "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
1403 defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
1404 "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
1405 defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
1406 "cvtdq2ps\t{$src, $dst|$dst, $src}",
1407 SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
1410 let Predicates = [HasAVX] in {
1411 def : Pat<(int_x86_sse_cvtss2si VR128:$src),
1412 (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1413 def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
1414 (VCVTSS2SIrm addr:$src)>;
1415 def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
1416 (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1417 def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
1418 (VCVTSS2SI64rm addr:$src)>;
1421 let Predicates = [HasSSE1] in {
1422 def : Pat<(int_x86_sse_cvtss2si VR128:$src),
1423 (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1424 def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
1425 (CVTSS2SIrm addr:$src)>;
1426 def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
1427 (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1428 def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
1429 (CVTSS2SI64rm addr:$src)>;
1434 // Convert scalar double to scalar single
1435 def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
1436 (ins FR64:$src1, FR64:$src2),
1437 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
1440 def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
1441 (ins FR64:$src1, f64mem:$src2),
1442 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1443 []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
1445 def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
1448 def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
1449 "cvtsd2ss\t{$src, $dst|$dst, $src}",
1450 [(set FR32:$dst, (fround FR64:$src))]>;
1451 def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
1452 "cvtsd2ss\t{$src, $dst|$dst, $src}",
1453 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
1454 Requires<[HasSSE2, OptForSize]>;
1456 defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
1457 int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
1459 let Constraints = "$src1 = $dst" in
1460 defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
1461 int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
1463 // Convert scalar single to scalar double
1464 // SSE2 instructions with XS prefix
1465 def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
1466 (ins FR32:$src1, FR32:$src2),
1467 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1468 []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
1470 def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
1471 (ins FR32:$src1, f32mem:$src2),
1472 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1473 []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
1475 let Predicates = [HasAVX] in {
1476 def : Pat<(f64 (fextend FR32:$src)),
1477 (VCVTSS2SDrr FR32:$src, FR32:$src)>;
1478 def : Pat<(fextend (loadf32 addr:$src)),
1479 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
1480 def : Pat<(extloadf32 addr:$src),
1481 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
1484 def : Pat<(extloadf32 addr:$src),
1485 (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
1486 Requires<[HasAVX, OptForSpeed]>;
1488 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
1489 "cvtss2sd\t{$src, $dst|$dst, $src}",
1490 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
1491 Requires<[HasSSE2]>;
1492 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
1493 "cvtss2sd\t{$src, $dst|$dst, $src}",
1494 [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
1495 Requires<[HasSSE2, OptForSize]>;
1497 // extload f32 -> f64. This matches load+fextend because we have a hack in
1498 // the isel (PreprocessForFPConvert) that can introduce loads after dag
1500 // Since these loads aren't folded into the fextend, we have to match it
1502 def : Pat<(fextend (loadf32 addr:$src)),
1503 (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>;
1504 def : Pat<(extloadf32 addr:$src),
1505 (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
1507 def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
1508 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1509 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1510 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1511 VR128:$src2))]>, XS, VEX_4V,
1513 def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
1514 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
1515 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1516 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1517 (load addr:$src2)))]>, XS, VEX_4V,
1519 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
1520 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
1521 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1522 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1523 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1524 VR128:$src2))]>, XS,
1525 Requires<[HasSSE2]>;
1526 def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
1527 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
1528 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1529 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1530 (load addr:$src2)))]>, XS,
1531 Requires<[HasSSE2]>;
1534 // Convert doubleword to packed single/double fp
1535 // SSE2 instructions without OpSize prefix
1536 def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1537 "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1538 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
1539 TB, VEX, Requires<[HasAVX]>;
1540 def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
1541 "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1542 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
1543 (bitconvert (memopv2i64 addr:$src))))]>,
1544 TB, VEX, Requires<[HasAVX]>;
1545 def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1546 "cvtdq2ps\t{$src, $dst|$dst, $src}",
1547 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
1548 TB, Requires<[HasSSE2]>;
1549 def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
1550 "cvtdq2ps\t{$src, $dst|$dst, $src}",
1551 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
1552 (bitconvert (memopv2i64 addr:$src))))]>,
1553 TB, Requires<[HasSSE2]>;
1555 // FIXME: why the non-intrinsic version is described as SSE3?
1556 // SSE2 instructions with XS prefix
1557 def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1558 "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1559 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
1560 XS, VEX, Requires<[HasAVX]>;
1561 def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
1562 "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1563 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
1564 (bitconvert (memopv2i64 addr:$src))))]>,
1565 XS, VEX, Requires<[HasAVX]>;
1566 def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1567 "cvtdq2pd\t{$src, $dst|$dst, $src}",
1568 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
1569 XS, Requires<[HasSSE2]>;
1570 def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
1571 "cvtdq2pd\t{$src, $dst|$dst, $src}",
1572 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
1573 (bitconvert (memopv2i64 addr:$src))))]>,
1574 XS, Requires<[HasSSE2]>;
1577 // Convert packed single/double fp to doubleword
1578 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1579 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
1580 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1581 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
1582 def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
1583 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
1584 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
1585 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
1586 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1587 "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
1588 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1589 "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
1591 def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1592 "cvtps2dq\t{$src, $dst|$dst, $src}",
1593 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
1595 def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
1597 "cvtps2dq\t{$src, $dst|$dst, $src}",
1598 [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1599 (memop addr:$src)))]>, VEX;
1600 def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1601 "cvtps2dq\t{$src, $dst|$dst, $src}",
1602 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
1603 def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1604 "cvtps2dq\t{$src, $dst|$dst, $src}",
1605 [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1606 (memop addr:$src)))]>;
1608 // SSE2 packed instructions with XD prefix
1609 def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1610 "vcvtpd2dq\t{$src, $dst|$dst, $src}",
1611 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
1612 XD, VEX, Requires<[HasAVX]>;
1613 def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1614 "vcvtpd2dq\t{$src, $dst|$dst, $src}",
1615 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1616 (memop addr:$src)))]>,
1617 XD, VEX, Requires<[HasAVX]>;
1618 def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1619 "cvtpd2dq\t{$src, $dst|$dst, $src}",
1620 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
1621 XD, Requires<[HasSSE2]>;
1622 def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1623 "cvtpd2dq\t{$src, $dst|$dst, $src}",
1624 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1625 (memop addr:$src)))]>,
1626 XD, Requires<[HasSSE2]>;
1629 // Convert with truncation packed single/double fp to doubleword
1630 // SSE2 packed instructions with XS prefix
1631 def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1632 "cvttps2dq\t{$src, $dst|$dst, $src}",
1634 (int_x86_sse2_cvttps2dq VR128:$src))]>, VEX;
1635 def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1636 "cvttps2dq\t{$src, $dst|$dst, $src}",
1637 [(set VR128:$dst, (int_x86_sse2_cvttps2dq
1638 (memop addr:$src)))]>, VEX;
1639 def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
1640 "cvttps2dq\t{$src, $dst|$dst, $src}",
1642 (int_x86_avx_cvtt_ps2dq_256 VR256:$src))]>, VEX;
1643 def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
1644 "cvttps2dq\t{$src, $dst|$dst, $src}",
1645 [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
1646 (memopv8f32 addr:$src)))]>, VEX;
1648 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1649 "cvttps2dq\t{$src, $dst|$dst, $src}",
1651 (int_x86_sse2_cvttps2dq VR128:$src))]>;
1652 def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1653 "cvttps2dq\t{$src, $dst|$dst, $src}",
1655 (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
1657 let Predicates = [HasAVX] in {
1658 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
1659 (Int_VCVTDQ2PSrr VR128:$src)>;
1660 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
1661 (Int_VCVTDQ2PSrm addr:$src)>;
1663 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
1664 (VCVTTPS2DQrr VR128:$src)>;
1665 def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
1666 (VCVTTPS2DQrm addr:$src)>;
1668 def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
1669 (VCVTDQ2PSYrr VR256:$src)>;
1670 def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))),
1671 (VCVTDQ2PSYrm addr:$src)>;
1673 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
1674 (VCVTTPS2DQYrr VR256:$src)>;
1675 def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))),
1676 (VCVTTPS2DQYrm addr:$src)>;
1679 let Predicates = [HasSSE2] in {
1680 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
1681 (Int_CVTDQ2PSrr VR128:$src)>;
1682 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
1683 (Int_CVTDQ2PSrm addr:$src)>;
1685 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
1686 (CVTTPS2DQrr VR128:$src)>;
1687 def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
1688 (CVTTPS2DQrm addr:$src)>;
1691 def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1692 "cvttpd2dq\t{$src, $dst|$dst, $src}",
1694 (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX;
1695 let isCodeGenOnly = 1 in
1696 def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1697 "cvttpd2dq\t{$src, $dst|$dst, $src}",
1698 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1699 (memop addr:$src)))]>, VEX;
1700 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1701 "cvttpd2dq\t{$src, $dst|$dst, $src}",
1702 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
1703 def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
1704 "cvttpd2dq\t{$src, $dst|$dst, $src}",
1705 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1706 (memop addr:$src)))]>;
1708 // The assembler can recognize rr 256-bit instructions by seeing a ymm
1709 // register, but the same isn't true when using memory operands instead.
1710 // Provide other assembly rr and rm forms to address this explicitly.
1711 def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
1712 "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
1715 def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1716 "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
1717 def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1718 "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
1721 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
1722 "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
1723 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
1724 "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
1726 // Convert packed single to packed double
1727 let Predicates = [HasAVX] in {
1728 // SSE2 instructions without OpSize prefix
1729 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1730 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
1731 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
1732 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
1733 def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
1734 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
1735 def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
1736 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
1738 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1739 "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
1740 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
1741 "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
1743 def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1744 "vcvtps2pd\t{$src, $dst|$dst, $src}",
1745 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
1746 TB, VEX, Requires<[HasAVX]>;
1747 def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
1748 "vcvtps2pd\t{$src, $dst|$dst, $src}",
1749 [(set VR128:$dst, (int_x86_sse2_cvtps2pd
1750 (load addr:$src)))]>,
1751 TB, VEX, Requires<[HasAVX]>;
1752 def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1753 "cvtps2pd\t{$src, $dst|$dst, $src}",
1754 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
1755 TB, Requires<[HasSSE2]>;
1756 def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
1757 "cvtps2pd\t{$src, $dst|$dst, $src}",
1758 [(set VR128:$dst, (int_x86_sse2_cvtps2pd
1759 (load addr:$src)))]>,
1760 TB, Requires<[HasSSE2]>;
1762 // Convert packed double to packed single
1763 // The assembler can recognize rr 256-bit instructions by seeing a ymm
1764 // register, but the same isn't true when using memory operands instead.
1765 // Provide other assembly rr and rm forms to address this explicitly.
1766 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1767 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
1768 def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
1769 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
1772 def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1773 "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
1774 def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1775 "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
1778 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
1779 "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
1780 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
1781 "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
1782 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1783 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
1784 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1785 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
1788 def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1789 "cvtpd2ps\t{$src, $dst|$dst, $src}",
1790 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
1791 def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
1793 "cvtpd2ps\t{$src, $dst|$dst, $src}",
1794 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
1795 (memop addr:$src)))]>;
1796 def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1797 "cvtpd2ps\t{$src, $dst|$dst, $src}",
1798 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
1799 def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1800 "cvtpd2ps\t{$src, $dst|$dst, $src}",
1801 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
1802 (memop addr:$src)))]>;
1804 // AVX 256-bit register conversion intrinsics
1805 // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
1806 // whenever possible to avoid declaring two versions of each one.
1807 def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
1808 (VCVTDQ2PSYrr VR256:$src)>;
1809 def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
1810 (VCVTDQ2PSYrm addr:$src)>;
1812 def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
1813 (VCVTPD2PSYrr VR256:$src)>;
1814 def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
1815 (VCVTPD2PSYrm addr:$src)>;
1817 def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
1818 (VCVTPS2DQYrr VR256:$src)>;
1819 def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
1820 (VCVTPS2DQYrm addr:$src)>;
1822 def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
1823 (VCVTPS2PDYrr VR128:$src)>;
1824 def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
1825 (VCVTPS2PDYrm addr:$src)>;
1827 def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
1828 (VCVTTPD2DQYrr VR256:$src)>;
1829 def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
1830 (VCVTTPD2DQYrm addr:$src)>;
1832 // Match fround and fextend for 128/256-bit conversions
1833 def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
1834 (VCVTPD2PSYrr VR256:$src)>;
1835 def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
1836 (VCVTPD2PSYrm addr:$src)>;
1838 def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
1839 (VCVTPS2PDYrr VR128:$src)>;
1840 def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
1841 (VCVTPS2PDYrm addr:$src)>;
1843 //===----------------------------------------------------------------------===//
1844 // SSE 1 & 2 - Compare Instructions
1845 //===----------------------------------------------------------------------===//
1847 // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
1848 multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
1849 SDNode OpNode, ValueType VT, PatFrag ld_frag,
1850 string asm, string asm_alt> {
1851 def rr : SIi8<0xC2, MRMSrcReg,
1852 (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
1853 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>;
1854 def rm : SIi8<0xC2, MRMSrcMem,
1855 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
1856 [(set RC:$dst, (OpNode (VT RC:$src1),
1857 (ld_frag addr:$src2), imm:$cc))]>;
1859 // Accept explicit immediate argument form instead of comparison code.
1860 let neverHasSideEffects = 1 in {
1861 def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
1862 (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>;
1864 def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
1865 (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>;
1869 defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
1870 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1871 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1872 XS, VEX_4V, VEX_LIG;
1873 defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
1874 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1875 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1876 XD, VEX_4V, VEX_LIG;
1878 let Constraints = "$src1 = $dst" in {
1879 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
1880 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
1881 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
1883 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
1884 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
1885 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
1889 multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
1890 Intrinsic Int, string asm> {
1891 def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
1892 (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
1893 [(set VR128:$dst, (Int VR128:$src1,
1894 VR128:$src, imm:$cc))]>;
1895 def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
1896 (ins VR128:$src1, x86memop:$src, SSECC:$cc), asm,
1897 [(set VR128:$dst, (Int VR128:$src1,
1898 (load addr:$src), imm:$cc))]>;
1901 // Aliases to match intrinsics which expect XMM operand(s).
1902 defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
1903 "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
1905 defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
1906 "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
1908 let Constraints = "$src1 = $dst" in {
1909 defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
1910 "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
1911 defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
1912 "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
1916 // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
1917 multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
1918 ValueType vt, X86MemOperand x86memop,
1919 PatFrag ld_frag, string OpcodeStr, Domain d> {
1920 def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
1921 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1922 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
1924 def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
1925 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1926 [(set EFLAGS, (OpNode (vt RC:$src1),
1927 (ld_frag addr:$src2)))],
1931 let Defs = [EFLAGS] in {
1932 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
1933 "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
1934 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
1935 "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
1937 let Pattern = []<dag> in {
1938 defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
1939 "comiss", SSEPackedSingle>, TB, VEX,
1941 defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
1942 "comisd", SSEPackedDouble>, TB, OpSize, VEX,
1946 defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
1947 load, "ucomiss", SSEPackedSingle>, TB, VEX;
1948 defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
1949 load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
1951 defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
1952 load, "comiss", SSEPackedSingle>, TB, VEX;
1953 defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
1954 load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
1955 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
1956 "ucomiss", SSEPackedSingle>, TB;
1957 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
1958 "ucomisd", SSEPackedDouble>, TB, OpSize;
1960 let Pattern = []<dag> in {
1961 defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
1962 "comiss", SSEPackedSingle>, TB;
1963 defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
1964 "comisd", SSEPackedDouble>, TB, OpSize;
1967 defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
1968 load, "ucomiss", SSEPackedSingle>, TB;
1969 defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
1970 load, "ucomisd", SSEPackedDouble>, TB, OpSize;
1972 defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
1973 "comiss", SSEPackedSingle>, TB;
1974 defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
1975 "comisd", SSEPackedDouble>, TB, OpSize;
1976 } // Defs = [EFLAGS]
1978 // sse12_cmp_packed - sse 1 & 2 compared packed instructions
1979 multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
1980 Intrinsic Int, string asm, string asm_alt,
1982 let isAsmParserOnly = 1 in {
1983 def rri : PIi8<0xC2, MRMSrcReg,
1984 (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
1985 [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
1987 def rmi : PIi8<0xC2, MRMSrcMem,
1988 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
1989 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
1993 // Accept explicit immediate argument form instead of comparison code.
1994 def rri_alt : PIi8<0xC2, MRMSrcReg,
1995 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1996 asm_alt, [], IIC_DEFAULT, d>;
1997 def rmi_alt : PIi8<0xC2, MRMSrcMem,
1998 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1999 asm_alt, [], IIC_DEFAULT, d>;
2002 defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
2003 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2004 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2005 SSEPackedSingle>, TB, VEX_4V;
2006 defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
2007 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2008 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2009 SSEPackedDouble>, TB, OpSize, VEX_4V;
2010 defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
2011 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2012 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2013 SSEPackedSingle>, TB, VEX_4V;
2014 defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
2015 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2016 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2017 SSEPackedDouble>, TB, OpSize, VEX_4V;
2018 let Constraints = "$src1 = $dst" in {
2019 defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
2020 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
2021 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2022 SSEPackedSingle>, TB;
2023 defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
2024 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
2025 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2026 SSEPackedDouble>, TB, OpSize;
2029 let Predicates = [HasAVX] in {
2030 def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
2031 (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
2032 def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
2033 (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
2034 def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
2035 (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
2036 def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
2037 (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
2039 def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
2040 (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
2041 def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
2042 (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
2043 def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
2044 (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
2045 def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
2046 (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
2049 let Predicates = [HasSSE1] in {
2050 def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
2051 (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
2052 def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
2053 (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
2056 let Predicates = [HasSSE2] in {
2057 def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
2058 (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
2059 def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
2060 (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
2063 //===----------------------------------------------------------------------===//
2064 // SSE 1 & 2 - Shuffle Instructions
2065 //===----------------------------------------------------------------------===//
2067 /// sse12_shuffle - sse 1 & 2 shuffle instructions
2068 multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
2069 ValueType vt, string asm, PatFrag mem_frag,
2070 Domain d, bit IsConvertibleToThreeAddress = 0> {
2071 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
2072 (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
2073 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
2074 (i8 imm:$src3))))], IIC_DEFAULT, d>;
2075 let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
2076 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
2077 (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
2078 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
2079 (i8 imm:$src3))))], IIC_DEFAULT, d>;
2082 defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
2083 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2084 memopv4f32, SSEPackedSingle>, TB, VEX_4V;
2085 defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
2086 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2087 memopv8f32, SSEPackedSingle>, TB, VEX_4V;
2088 defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
2089 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
2090 memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
2091 defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
2092 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
2093 memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
2095 let Constraints = "$src1 = $dst" in {
2096 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
2097 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2098 memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
2100 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
2101 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2102 memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>,
2106 let Predicates = [HasAVX] in {
2107 def : Pat<(v4i32 (X86Shufp VR128:$src1,
2108 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
2109 (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
2110 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2111 (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
2113 def : Pat<(v2i64 (X86Shufp VR128:$src1,
2114 (memopv2i64 addr:$src2), (i8 imm:$imm))),
2115 (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
2116 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2117 (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
2120 def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
2121 (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
2122 def : Pat<(v8i32 (X86Shufp VR256:$src1,
2123 (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
2124 (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
2126 def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
2127 (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
2128 def : Pat<(v4i64 (X86Shufp VR256:$src1,
2129 (memopv4i64 addr:$src2), (i8 imm:$imm))),
2130 (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
2133 let Predicates = [HasSSE1] in {
2134 def : Pat<(v4i32 (X86Shufp VR128:$src1,
2135 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
2136 (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
2137 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2138 (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
2141 let Predicates = [HasSSE2] in {
2142 // Generic SHUFPD patterns
2143 def : Pat<(v2i64 (X86Shufp VR128:$src1,
2144 (memopv2i64 addr:$src2), (i8 imm:$imm))),
2145 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
2146 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2147 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
2150 //===----------------------------------------------------------------------===//
2151 // SSE 1 & 2 - Unpack Instructions
2152 //===----------------------------------------------------------------------===//
2154 /// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
2155 multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
2156 PatFrag mem_frag, RegisterClass RC,
2157 X86MemOperand x86memop, string asm,
2159 def rr : PI<opc, MRMSrcReg,
2160 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2162 (vt (OpNode RC:$src1, RC:$src2)))],
2164 def rm : PI<opc, MRMSrcMem,
2165 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2167 (vt (OpNode RC:$src1,
2168 (mem_frag addr:$src2))))],
2172 defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
2173 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2174 SSEPackedSingle>, TB, VEX_4V;
2175 defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
2176 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2177 SSEPackedDouble>, TB, OpSize, VEX_4V;
2178 defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
2179 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2180 SSEPackedSingle>, TB, VEX_4V;
2181 defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
2182 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2183 SSEPackedDouble>, TB, OpSize, VEX_4V;
2185 defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
2186 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2187 SSEPackedSingle>, TB, VEX_4V;
2188 defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
2189 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2190 SSEPackedDouble>, TB, OpSize, VEX_4V;
2191 defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
2192 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2193 SSEPackedSingle>, TB, VEX_4V;
2194 defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
2195 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2196 SSEPackedDouble>, TB, OpSize, VEX_4V;
2198 let Constraints = "$src1 = $dst" in {
2199 defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
2200 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
2201 SSEPackedSingle>, TB;
2202 defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
2203 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
2204 SSEPackedDouble>, TB, OpSize;
2205 defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
2206 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
2207 SSEPackedSingle>, TB;
2208 defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
2209 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
2210 SSEPackedDouble>, TB, OpSize;
2211 } // Constraints = "$src1 = $dst"
2213 let Predicates = [HasAVX], AddedComplexity = 1 in {
2214 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
2215 // problem is during lowering, where it's not possible to recognize the load
2216 // fold cause it has two uses through a bitcast. One use disappears at isel
2217 // time and the fold opportunity reappears.
2218 def : Pat<(v2f64 (X86Movddup VR128:$src)),
2219 (VUNPCKLPDrr VR128:$src, VR128:$src)>;
2222 let Predicates = [HasSSE2] in {
2223 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
2224 // problem is during lowering, where it's not possible to recognize the load
2225 // fold cause it has two uses through a bitcast. One use disappears at isel
2226 // time and the fold opportunity reappears.
2227 def : Pat<(v2f64 (X86Movddup VR128:$src)),
2228 (UNPCKLPDrr VR128:$src, VR128:$src)>;
2231 //===----------------------------------------------------------------------===//
2232 // SSE 1 & 2 - Extract Floating-Point Sign mask
2233 //===----------------------------------------------------------------------===//
2235 /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
2236 multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
2238 def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
2239 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
2240 [(set GR32:$dst, (Int RC:$src))], IIC_DEFAULT, d>;
2241 def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
2242 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
2243 IIC_DEFAULT, d>, REX_W;
2246 let Predicates = [HasAVX] in {
2247 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
2248 "movmskps", SSEPackedSingle>, TB, VEX;
2249 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
2250 "movmskpd", SSEPackedDouble>, TB,
2252 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
2253 "movmskps", SSEPackedSingle>, TB, VEX;
2254 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
2255 "movmskpd", SSEPackedDouble>, TB,
2258 def : Pat<(i32 (X86fgetsign FR32:$src)),
2259 (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2261 def : Pat<(i64 (X86fgetsign FR32:$src)),
2262 (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2264 def : Pat<(i32 (X86fgetsign FR64:$src)),
2265 (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2267 def : Pat<(i64 (X86fgetsign FR64:$src)),
2268 (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2272 def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
2273 "movmskps\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT,
2274 SSEPackedSingle>, TB, VEX;
2275 def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
2276 "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT,
2277 SSEPackedDouble>, TB,
2279 def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
2280 "movmskps\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT,
2281 SSEPackedSingle>, TB, VEX;
2282 def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
2283 "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT,
2284 SSEPackedDouble>, TB,
2288 defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
2289 SSEPackedSingle>, TB;
2290 defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
2291 SSEPackedDouble>, TB, OpSize;
2293 def : Pat<(i32 (X86fgetsign FR32:$src)),
2294 (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2295 sub_ss))>, Requires<[HasSSE1]>;
2296 def : Pat<(i64 (X86fgetsign FR32:$src)),
2297 (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2298 sub_ss))>, Requires<[HasSSE1]>;
2299 def : Pat<(i32 (X86fgetsign FR64:$src)),
2300 (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2301 sub_sd))>, Requires<[HasSSE2]>;
2302 def : Pat<(i64 (X86fgetsign FR64:$src)),
2303 (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2304 sub_sd))>, Requires<[HasSSE2]>;
2306 //===---------------------------------------------------------------------===//
2307 // SSE2 - Packed Integer Logical Instructions
2308 //===---------------------------------------------------------------------===//
2310 let ExeDomain = SSEPackedInt in { // SSE integer instructions
2312 /// PDI_binop_rm - Simple SSE2 binary operator.
2313 multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2314 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2315 X86MemOperand x86memop, bit IsCommutable = 0,
2317 let isCommutable = IsCommutable in
2318 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
2319 (ins RC:$src1, RC:$src2),
2321 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2322 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2323 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>;
2324 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
2325 (ins RC:$src1, x86memop:$src2),
2327 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2328 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2329 [(set RC:$dst, (OpVT (OpNode RC:$src1,
2330 (bitconvert (memop_frag addr:$src2)))))]>;
2332 } // ExeDomain = SSEPackedInt
2334 // These are ordered here for pattern ordering requirements with the fp versions
2336 let Predicates = [HasAVX] in {
2337 defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
2338 i128mem, 1, 0>, VEX_4V;
2339 defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
2340 i128mem, 1, 0>, VEX_4V;
2341 defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
2342 i128mem, 1, 0>, VEX_4V;
2343 defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
2344 i128mem, 0, 0>, VEX_4V;
2347 let Constraints = "$src1 = $dst" in {
2348 defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
2350 defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
2352 defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
2354 defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
2356 } // Constraints = "$src1 = $dst"
2358 let Predicates = [HasAVX2] in {
2359 defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
2360 i256mem, 1, 0>, VEX_4V;
2361 defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
2362 i256mem, 1, 0>, VEX_4V;
2363 defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
2364 i256mem, 1, 0>, VEX_4V;
2365 defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
2366 i256mem, 0, 0>, VEX_4V;
2369 //===----------------------------------------------------------------------===//
2370 // SSE 1 & 2 - Logical Instructions
2371 //===----------------------------------------------------------------------===//
2373 /// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
2375 multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
2377 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
2378 FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V;
2380 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
2381 FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V;
2383 let Constraints = "$src1 = $dst" in {
2384 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
2385 f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
2387 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
2388 f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
2392 // Alias bitwise logical operations using SSE logical ops on packed FP values.
2393 let mayLoad = 0 in {
2394 defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
2395 defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
2396 defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
2399 let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
2400 defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
2402 /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
2404 multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
2406 // In AVX no need to add a pattern for 128-bit logical rr ps, because they
2407 // are all promoted to v2i64, and the patterns are covered by the int
2408 // version. This is needed in SSE only, because v2i64 isn't supported on
2409 // SSE1, but only on SSE2.
2410 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2411 !strconcat(OpcodeStr, "ps"), f128mem, [],
2412 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
2413 (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
2415 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2416 !strconcat(OpcodeStr, "pd"), f128mem,
2417 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2418 (bc_v2i64 (v2f64 VR128:$src2))))],
2419 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2420 (memopv2i64 addr:$src2)))], 0>,
2422 let Constraints = "$src1 = $dst" in {
2423 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2424 !strconcat(OpcodeStr, "ps"), f128mem,
2425 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
2426 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
2427 (memopv2i64 addr:$src2)))]>, TB;
2429 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2430 !strconcat(OpcodeStr, "pd"), f128mem,
2431 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2432 (bc_v2i64 (v2f64 VR128:$src2))))],
2433 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2434 (memopv2i64 addr:$src2)))]>, TB, OpSize;
2438 /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
2440 multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
2442 defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
2443 !strconcat(OpcodeStr, "ps"), f256mem,
2444 [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
2445 [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
2446 (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V;
2448 defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
2449 !strconcat(OpcodeStr, "pd"), f256mem,
2450 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
2451 (bc_v4i64 (v4f64 VR256:$src2))))],
2452 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
2453 (memopv4i64 addr:$src2)))], 0>,
2457 // AVX 256-bit packed logical ops forms
2458 defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
2459 defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
2460 defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
2461 defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
2463 defm AND : sse12_fp_packed_logical<0x54, "and", and>;
2464 defm OR : sse12_fp_packed_logical<0x56, "or", or>;
2465 defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
2466 let isCommutable = 0 in
2467 defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
2469 //===----------------------------------------------------------------------===//
2470 // SSE 1 & 2 - Arithmetic Instructions
2471 //===----------------------------------------------------------------------===//
2473 /// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
2476 /// In addition, we also have a special variant of the scalar form here to
2477 /// represent the associated intrinsic operation. This form is unlike the
2478 /// plain scalar form, in that it takes an entire vector (instead of a scalar)
2479 /// and leaves the top elements unmodified (therefore these cannot be commuted).
2481 /// These three forms can each be reg+reg or reg+mem.
2484 /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
2486 multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2488 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
2489 OpNode, FR32, f32mem, Is2Addr>, XS;
2490 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
2491 OpNode, FR64, f64mem, Is2Addr>, XD;
2494 multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
2496 let mayLoad = 0 in {
2497 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
2498 v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
2499 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
2500 v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
2504 multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
2506 let mayLoad = 0 in {
2507 defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
2508 v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
2509 defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
2510 v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
2514 multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
2516 defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
2517 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
2518 defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
2519 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
2522 multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
2524 defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
2525 !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
2526 SSEPackedSingle, Is2Addr>, TB;
2528 defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
2529 !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
2530 SSEPackedDouble, Is2Addr>, TB, OpSize;
2533 multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
2534 defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
2535 !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
2536 SSEPackedSingle, 0>, TB;
2538 defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
2539 !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
2540 SSEPackedDouble, 0>, TB, OpSize;
2543 // Binary Arithmetic instructions
2544 defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
2545 basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG;
2546 defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
2547 basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
2548 defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
2549 basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG;
2550 defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
2551 basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
2553 let isCommutable = 0 in {
2554 defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
2555 basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG;
2556 defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
2557 basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
2558 defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
2559 basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG;
2560 defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
2561 basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
2562 defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
2563 basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG;
2564 defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
2565 basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
2566 basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
2567 basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
2568 defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
2569 basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG;
2570 defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
2571 basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
2572 basic_sse12_fp_binop_p_y_int<0x5D, "min">,
2573 basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
2576 let Constraints = "$src1 = $dst" in {
2577 defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
2578 basic_sse12_fp_binop_p<0x58, "add", fadd>,
2579 basic_sse12_fp_binop_s_int<0x58, "add">;
2580 defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
2581 basic_sse12_fp_binop_p<0x59, "mul", fmul>,
2582 basic_sse12_fp_binop_s_int<0x59, "mul">;
2584 let isCommutable = 0 in {
2585 defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
2586 basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
2587 basic_sse12_fp_binop_s_int<0x5C, "sub">;
2588 defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
2589 basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
2590 basic_sse12_fp_binop_s_int<0x5E, "div">;
2591 defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
2592 basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
2593 basic_sse12_fp_binop_s_int<0x5F, "max">,
2594 basic_sse12_fp_binop_p_int<0x5F, "max">;
2595 defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
2596 basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
2597 basic_sse12_fp_binop_s_int<0x5D, "min">,
2598 basic_sse12_fp_binop_p_int<0x5D, "min">;
2603 /// In addition, we also have a special variant of the scalar form here to
2604 /// represent the associated intrinsic operation. This form is unlike the
2605 /// plain scalar form, in that it takes an entire vector (instead of a
2606 /// scalar) and leaves the top elements undefined.
2608 /// And, we have a special variant form for a full-vector intrinsic form.
2610 /// sse1_fp_unop_s - SSE1 unops in scalar form.
2611 multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
2612 SDNode OpNode, Intrinsic F32Int> {
2613 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
2614 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
2615 [(set FR32:$dst, (OpNode FR32:$src))]>;
2616 // For scalar unary operations, fold a load into the operation
2617 // only in OptForSize mode. It eliminates an instruction, but it also
2618 // eliminates a whole-register clobber (the load), so it introduces a
2619 // partial register update condition.
2620 def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
2621 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
2622 [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
2623 Requires<[HasSSE1, OptForSize]>;
2624 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2625 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
2626 [(set VR128:$dst, (F32Int VR128:$src))]>;
2627 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
2628 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
2629 [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
2632 /// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
2633 multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
2634 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
2635 !strconcat(OpcodeStr,
2636 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
2638 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
2639 !strconcat(OpcodeStr,
2640 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
2641 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
2642 (ins VR128:$src1, ssmem:$src2),
2643 !strconcat(OpcodeStr,
2644 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
2647 /// sse1_fp_unop_p - SSE1 unops in packed form.
2648 multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
2649 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2650 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2651 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
2652 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2653 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2654 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
2657 /// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
2658 multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
2659 def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
2660 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2661 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
2662 def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
2663 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2664 [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
2667 /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
2668 multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
2669 Intrinsic V4F32Int> {
2670 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2671 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2672 [(set VR128:$dst, (V4F32Int VR128:$src))]>;
2673 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2674 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2675 [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
2678 /// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
2679 multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
2680 Intrinsic V4F32Int> {
2681 def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
2682 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2683 [(set VR256:$dst, (V4F32Int VR256:$src))]>;
2684 def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
2685 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2686 [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
2689 /// sse2_fp_unop_s - SSE2 unops in scalar form.
2690 multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
2691 SDNode OpNode, Intrinsic F64Int> {
2692 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
2693 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
2694 [(set FR64:$dst, (OpNode FR64:$src))]>;
2695 // See the comments in sse1_fp_unop_s for why this is OptForSize.
2696 def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
2697 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
2698 [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
2699 Requires<[HasSSE2, OptForSize]>;
2700 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2701 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
2702 [(set VR128:$dst, (F64Int VR128:$src))]>;
2703 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
2704 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
2705 [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
2708 /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
2709 multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
2710 let neverHasSideEffects = 1 in {
2711 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
2712 !strconcat(OpcodeStr,
2713 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
2715 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
2716 !strconcat(OpcodeStr,
2717 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
2719 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
2720 (ins VR128:$src1, sdmem:$src2),
2721 !strconcat(OpcodeStr,
2722 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
2725 /// sse2_fp_unop_p - SSE2 unops in vector forms.
2726 multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
2728 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2729 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2730 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
2731 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2732 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2733 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
2736 /// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
2737 multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
2738 def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
2739 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2740 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
2741 def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
2742 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2743 [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
2746 /// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
2747 multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
2748 Intrinsic V2F64Int> {
2749 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2750 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2751 [(set VR128:$dst, (V2F64Int VR128:$src))]>;
2752 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2753 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2754 [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
2757 /// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
2758 multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
2759 Intrinsic V2F64Int> {
2760 def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
2761 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2762 [(set VR256:$dst, (V2F64Int VR256:$src))]>;
2763 def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
2764 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2765 [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
2768 let Predicates = [HasAVX] in {
2770 defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
2771 sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
2773 defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
2774 sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
2775 sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
2776 sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
2777 sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
2778 sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
2779 sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
2780 sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
2783 // Reciprocal approximations. Note that these typically require refinement
2784 // in order to obtain suitable precision.
2785 defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
2786 defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
2787 sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
2788 sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
2789 sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
2791 defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
2792 defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
2793 sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
2794 sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
2795 sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
2798 let AddedComplexity = 1 in {
2799 def : Pat<(f32 (fsqrt FR32:$src)),
2800 (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
2801 def : Pat<(f32 (fsqrt (load addr:$src))),
2802 (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
2803 Requires<[HasAVX, OptForSize]>;
2804 def : Pat<(f64 (fsqrt FR64:$src)),
2805 (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
2806 def : Pat<(f64 (fsqrt (load addr:$src))),
2807 (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
2808 Requires<[HasAVX, OptForSize]>;
2810 def : Pat<(f32 (X86frsqrt FR32:$src)),
2811 (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
2812 def : Pat<(f32 (X86frsqrt (load addr:$src))),
2813 (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
2814 Requires<[HasAVX, OptForSize]>;
2816 def : Pat<(f32 (X86frcp FR32:$src)),
2817 (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
2818 def : Pat<(f32 (X86frcp (load addr:$src))),
2819 (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
2820 Requires<[HasAVX, OptForSize]>;
2823 let Predicates = [HasAVX], AddedComplexity = 1 in {
2824 def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
2825 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
2826 (VSQRTSSr (f32 (IMPLICIT_DEF)),
2827 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
2829 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
2830 (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
2832 def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
2833 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
2834 (VSQRTSDr (f64 (IMPLICIT_DEF)),
2835 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
2837 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
2838 (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
2840 def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
2841 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
2842 (VRSQRTSSr (f32 (IMPLICIT_DEF)),
2843 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
2845 def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
2846 (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
2848 def : Pat<(int_x86_sse_rcp_ss VR128:$src),
2849 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
2850 (VRCPSSr (f32 (IMPLICIT_DEF)),
2851 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
2853 def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
2854 (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
2858 defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
2859 sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
2860 sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>,
2861 sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
2862 sse2_fp_unop_p<0x51, "sqrt", fsqrt>,
2863 sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
2865 // Reciprocal approximations. Note that these typically require refinement
2866 // in order to obtain suitable precision.
2867 defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
2868 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
2869 sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
2870 defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
2871 sse1_fp_unop_p<0x53, "rcp", X86frcp>,
2872 sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
2874 // There is no f64 version of the reciprocal approximation instructions.
2876 //===----------------------------------------------------------------------===//
2877 // SSE 1 & 2 - Non-temporal stores
2878 //===----------------------------------------------------------------------===//
2880 let AddedComplexity = 400 in { // Prefer non-temporal versions
2881 def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
2882 (ins f128mem:$dst, VR128:$src),
2883 "movntps\t{$src, $dst|$dst, $src}",
2884 [(alignednontemporalstore (v4f32 VR128:$src),
2886 def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
2887 (ins f128mem:$dst, VR128:$src),
2888 "movntpd\t{$src, $dst|$dst, $src}",
2889 [(alignednontemporalstore (v2f64 VR128:$src),
2892 let ExeDomain = SSEPackedInt in
2893 def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
2894 (ins f128mem:$dst, VR128:$src),
2895 "movntdq\t{$src, $dst|$dst, $src}",
2896 [(alignednontemporalstore (v2i64 VR128:$src),
2899 def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
2900 (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
2902 def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
2903 (ins f256mem:$dst, VR256:$src),
2904 "movntps\t{$src, $dst|$dst, $src}",
2905 [(alignednontemporalstore (v8f32 VR256:$src),
2907 def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
2908 (ins f256mem:$dst, VR256:$src),
2909 "movntpd\t{$src, $dst|$dst, $src}",
2910 [(alignednontemporalstore (v4f64 VR256:$src),
2912 let ExeDomain = SSEPackedInt in
2913 def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
2914 (ins f256mem:$dst, VR256:$src),
2915 "movntdq\t{$src, $dst|$dst, $src}",
2916 [(alignednontemporalstore (v4i64 VR256:$src),
2920 def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
2921 (VMOVNTDQYmr addr:$dst, VR256:$src)>;
2922 def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
2923 (VMOVNTPDYmr addr:$dst, VR256:$src)>;
2924 def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
2925 (VMOVNTPSYmr addr:$dst, VR256:$src)>;
2927 let AddedComplexity = 400 in { // Prefer non-temporal versions
2928 def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
2929 "movntps\t{$src, $dst|$dst, $src}",
2930 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
2931 def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
2932 "movntpd\t{$src, $dst|$dst, $src}",
2933 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
2935 let ExeDomain = SSEPackedInt in
2936 def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
2937 "movntdq\t{$src, $dst|$dst, $src}",
2938 [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
2940 def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
2941 (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2943 // There is no AVX form for instructions below this point
2944 def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
2945 "movnti{l}\t{$src, $dst|$dst, $src}",
2946 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
2947 TB, Requires<[HasSSE2]>;
2948 def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
2949 "movnti{q}\t{$src, $dst|$dst, $src}",
2950 [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
2951 TB, Requires<[HasSSE2]>;
2954 //===----------------------------------------------------------------------===//
2955 // SSE 1 & 2 - Prefetch and memory fence
2956 //===----------------------------------------------------------------------===//
2958 // Prefetch intrinsic.
2959 let Predicates = [HasSSE1] in {
2960 def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
2961 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
2962 def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
2963 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
2964 def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
2965 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
2966 def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
2967 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
2971 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
2972 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
2973 TB, Requires<[HasSSE2]>;
2975 // Pause. This "instruction" is encoded as "rep; nop", so even though it
2976 // was introduced with SSE2, it's backward compatible.
2977 def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
2979 // Load, store, and memory fence
2980 def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
2981 "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
2982 def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
2983 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
2984 def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
2985 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
2987 def : Pat<(X86SFence), (SFENCE)>;
2988 def : Pat<(X86LFence), (LFENCE)>;
2989 def : Pat<(X86MFence), (MFENCE)>;
2991 //===----------------------------------------------------------------------===//
2992 // SSE 1 & 2 - Load/Store XCSR register
2993 //===----------------------------------------------------------------------===//
2995 def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
2996 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
2997 def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
2998 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
3000 def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
3001 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
3002 def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
3003 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
3005 //===---------------------------------------------------------------------===//
3006 // SSE2 - Move Aligned/Unaligned Packed Integer Instructions
3007 //===---------------------------------------------------------------------===//
3009 let ExeDomain = SSEPackedInt in { // SSE integer instructions
3011 let neverHasSideEffects = 1 in {
3012 def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3013 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3014 def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3015 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3017 def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3018 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
3019 def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3020 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
3023 let isCodeGenOnly = 1 in {
3024 def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3025 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3026 def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
3027 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3028 def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3029 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
3030 def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
3031 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
3034 let canFoldAsLoad = 1, mayLoad = 1 in {
3035 def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3036 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3037 def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
3038 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3039 let Predicates = [HasAVX] in {
3040 def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3041 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
3042 def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
3043 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
3047 let mayStore = 1 in {
3048 def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
3049 (ins i128mem:$dst, VR128:$src),
3050 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3051 def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
3052 (ins i256mem:$dst, VR256:$src),
3053 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
3054 let Predicates = [HasAVX] in {
3055 def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3056 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
3057 def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
3058 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
3062 let neverHasSideEffects = 1 in
3063 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3064 "movdqa\t{$src, $dst|$dst, $src}", []>;
3066 def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3067 "movdqu\t{$src, $dst|$dst, $src}",
3068 []>, XS, Requires<[HasSSE2]>;
3071 let isCodeGenOnly = 1 in {
3072 def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3073 "movdqa\t{$src, $dst|$dst, $src}", []>;
3075 def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3076 "movdqu\t{$src, $dst|$dst, $src}",
3077 []>, XS, Requires<[HasSSE2]>;
3080 let canFoldAsLoad = 1, mayLoad = 1 in {
3081 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3082 "movdqa\t{$src, $dst|$dst, $src}",
3083 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
3084 def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3085 "movdqu\t{$src, $dst|$dst, $src}",
3086 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
3087 XS, Requires<[HasSSE2]>;
3090 let mayStore = 1 in {
3091 def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3092 "movdqa\t{$src, $dst|$dst, $src}",
3093 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
3094 def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3095 "movdqu\t{$src, $dst|$dst, $src}",
3096 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
3097 XS, Requires<[HasSSE2]>;
3100 // Intrinsic forms of MOVDQU load and store
3101 def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3102 "vmovdqu\t{$src, $dst|$dst, $src}",
3103 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
3104 XS, VEX, Requires<[HasAVX]>;
3106 def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3107 "movdqu\t{$src, $dst|$dst, $src}",
3108 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
3109 XS, Requires<[HasSSE2]>;
3111 } // ExeDomain = SSEPackedInt
3113 let Predicates = [HasAVX] in {
3114 def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
3115 (VMOVDQUYmr addr:$dst, VR256:$src)>;
3118 //===---------------------------------------------------------------------===//
3119 // SSE2 - Packed Integer Arithmetic Instructions
3120 //===---------------------------------------------------------------------===//
3122 let ExeDomain = SSEPackedInt in { // SSE integer instructions
3124 multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
3125 RegisterClass RC, PatFrag memop_frag,
3126 X86MemOperand x86memop, bit IsCommutable = 0,
3128 let isCommutable = IsCommutable in
3129 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3130 (ins RC:$src1, RC:$src2),
3132 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3133 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3134 [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>;
3135 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3136 (ins RC:$src1, x86memop:$src2),
3138 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3139 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3140 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))]>;
3143 multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
3144 string OpcodeStr, SDNode OpNode,
3145 SDNode OpNode2, RegisterClass RC,
3146 ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
3148 // src2 is always 128-bit
3149 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3150 (ins RC:$src1, VR128:$src2),
3152 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3153 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3154 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>;
3155 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3156 (ins RC:$src1, i128mem:$src2),
3158 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3159 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3160 [(set RC:$dst, (DstVT (OpNode RC:$src1,
3161 (bc_frag (memopv2i64 addr:$src2)))))]>;
3162 def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
3163 (ins RC:$src1, i32i8imm:$src2),
3165 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3166 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3167 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))]>;
3170 /// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
3171 multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
3172 ValueType DstVT, ValueType SrcVT, RegisterClass RC,
3173 PatFrag memop_frag, X86MemOperand x86memop,
3174 bit IsCommutable = 0, bit Is2Addr = 1> {
3175 let isCommutable = IsCommutable in
3176 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3177 (ins RC:$src1, RC:$src2),
3179 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3180 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3181 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
3182 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3183 (ins RC:$src1, x86memop:$src2),
3185 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3186 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3187 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
3188 (bitconvert (memop_frag addr:$src2)))))]>;
3190 } // ExeDomain = SSEPackedInt
3192 // 128-bit Integer Arithmetic
3194 let Predicates = [HasAVX] in {
3195 defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
3196 i128mem, 1, 0 /*3addr*/>, VEX_4V;
3197 defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
3198 i128mem, 1, 0>, VEX_4V;
3199 defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
3200 i128mem, 1, 0>, VEX_4V;
3201 defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
3202 i128mem, 1, 0>, VEX_4V;
3203 defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
3204 i128mem, 1, 0>, VEX_4V;
3205 defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
3206 i128mem, 0, 0>, VEX_4V;
3207 defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
3208 i128mem, 0, 0>, VEX_4V;
3209 defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
3210 i128mem, 0, 0>, VEX_4V;
3211 defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
3212 i128mem, 0, 0>, VEX_4V;
3213 defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
3214 memopv2i64, i128mem, 1, 0>, VEX_4V;
3217 defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
3218 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3219 defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
3220 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3221 defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
3222 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3223 defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
3224 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3225 defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
3226 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3227 defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
3228 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3229 defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
3230 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3231 defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
3232 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3233 defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
3234 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3235 defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
3236 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3237 defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
3238 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3239 defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
3240 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3241 defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
3242 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3243 defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
3244 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3245 defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
3246 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3247 defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
3248 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3249 defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
3250 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3251 defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
3252 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3255 let Predicates = [HasAVX2] in {
3256 defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
3257 i256mem, 1, 0>, VEX_4V;
3258 defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
3259 i256mem, 1, 0>, VEX_4V;
3260 defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
3261 i256mem, 1, 0>, VEX_4V;
3262 defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
3263 i256mem, 1, 0>, VEX_4V;
3264 defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
3265 i256mem, 1, 0>, VEX_4V;
3266 defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
3267 i256mem, 0, 0>, VEX_4V;
3268 defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
3269 i256mem, 0, 0>, VEX_4V;
3270 defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
3271 i256mem, 0, 0>, VEX_4V;
3272 defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
3273 i256mem, 0, 0>, VEX_4V;
3274 defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
3275 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3278 defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
3279 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3280 defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
3281 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3282 defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
3283 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3284 defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
3285 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3286 defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
3287 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3288 defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
3289 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3290 defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
3291 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3292 defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
3293 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3294 defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
3295 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3296 defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
3297 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3298 defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
3299 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3300 defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
3301 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3302 defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
3303 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3304 defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
3305 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3306 defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
3307 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3308 defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
3309 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3310 defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
3311 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3312 defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
3313 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3316 let Constraints = "$src1 = $dst" in {
3317 defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
3319 defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
3321 defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
3323 defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
3325 defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
3327 defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
3329 defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
3331 defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
3333 defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
3335 defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
3336 memopv2i64, i128mem, 1>;
3339 defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
3340 VR128, memopv2i64, i128mem>;
3341 defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
3342 VR128, memopv2i64, i128mem>;
3343 defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
3344 VR128, memopv2i64, i128mem>;
3345 defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
3346 VR128, memopv2i64, i128mem>;
3347 defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
3348 VR128, memopv2i64, i128mem, 1>;
3349 defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
3350 VR128, memopv2i64, i128mem, 1>;
3351 defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
3352 VR128, memopv2i64, i128mem, 1>;
3353 defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
3354 VR128, memopv2i64, i128mem, 1>;
3355 defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
3356 VR128, memopv2i64, i128mem, 1>;
3357 defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
3358 VR128, memopv2i64, i128mem, 1>;
3359 defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
3360 VR128, memopv2i64, i128mem, 1>;
3361 defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
3362 VR128, memopv2i64, i128mem, 1>;
3363 defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
3364 VR128, memopv2i64, i128mem, 1>;
3365 defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
3366 VR128, memopv2i64, i128mem, 1>;
3367 defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
3368 VR128, memopv2i64, i128mem, 1>;
3369 defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
3370 VR128, memopv2i64, i128mem, 1>;
3371 defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
3372 VR128, memopv2i64, i128mem, 1>;
3373 defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
3374 VR128, memopv2i64, i128mem, 1>;
3376 } // Constraints = "$src1 = $dst"
3378 //===---------------------------------------------------------------------===//
3379 // SSE2 - Packed Integer Logical Instructions
3380 //===---------------------------------------------------------------------===//
3382 let Predicates = [HasAVX] in {
3383 defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3384 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3385 defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3386 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3387 defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3388 VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
3390 defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3391 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3392 defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3393 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3394 defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3395 VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
3397 defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3398 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3399 defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3400 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3402 let ExeDomain = SSEPackedInt in {
3403 // 128-bit logical shifts.
3404 def VPSLLDQri : PDIi8<0x73, MRM7r,
3405 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
3406 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3408 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
3410 def VPSRLDQri : PDIi8<0x73, MRM3r,
3411 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
3412 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3414 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
3416 // PSRADQri doesn't exist in SSE[1-3].
3418 } // Predicates = [HasAVX]
3420 let Predicates = [HasAVX2] in {
3421 defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3422 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3423 defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3424 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3425 defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3426 VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
3428 defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3429 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3430 defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3431 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3432 defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3433 VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
3435 defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3436 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3437 defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3438 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3440 let ExeDomain = SSEPackedInt in {
3441 // 256-bit logical shifts.
3442 def VPSLLDQYri : PDIi8<0x73, MRM7r,
3443 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
3444 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3446 (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
3448 def VPSRLDQYri : PDIi8<0x73, MRM3r,
3449 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
3450 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3452 (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
3454 // PSRADQYri doesn't exist in SSE[1-3].
3456 } // Predicates = [HasAVX2]
3458 let Constraints = "$src1 = $dst" in {
3459 defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
3460 VR128, v8i16, v8i16, bc_v8i16>;
3461 defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
3462 VR128, v4i32, v4i32, bc_v4i32>;
3463 defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
3464 VR128, v2i64, v2i64, bc_v2i64>;
3466 defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
3467 VR128, v8i16, v8i16, bc_v8i16>;
3468 defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
3469 VR128, v4i32, v4i32, bc_v4i32>;
3470 defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
3471 VR128, v2i64, v2i64, bc_v2i64>;
3473 defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
3474 VR128, v8i16, v8i16, bc_v8i16>;
3475 defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
3476 VR128, v4i32, v4i32, bc_v4i32>;
3478 let ExeDomain = SSEPackedInt in {
3479 // 128-bit logical shifts.
3480 def PSLLDQri : PDIi8<0x73, MRM7r,
3481 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
3482 "pslldq\t{$src2, $dst|$dst, $src2}",
3484 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
3485 def PSRLDQri : PDIi8<0x73, MRM3r,
3486 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
3487 "psrldq\t{$src2, $dst|$dst, $src2}",
3489 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
3490 // PSRADQri doesn't exist in SSE[1-3].
3492 } // Constraints = "$src1 = $dst"
3494 let Predicates = [HasAVX] in {
3495 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
3496 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
3497 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
3498 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
3499 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
3500 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
3502 // Shift up / down and insert zero's.
3503 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
3504 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
3505 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
3506 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
3509 let Predicates = [HasAVX2] in {
3510 def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
3511 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
3512 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
3513 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
3516 let Predicates = [HasSSE2] in {
3517 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
3518 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
3519 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
3520 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
3521 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
3522 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
3524 // Shift up / down and insert zero's.
3525 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
3526 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
3527 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
3528 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
3531 //===---------------------------------------------------------------------===//
3532 // SSE2 - Packed Integer Comparison Instructions
3533 //===---------------------------------------------------------------------===//
3535 let Predicates = [HasAVX] in {
3536 defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
3537 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3538 defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
3539 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3540 defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
3541 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3542 defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
3543 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3544 defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
3545 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3546 defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
3547 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3550 let Predicates = [HasAVX2] in {
3551 defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
3552 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3553 defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
3554 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3555 defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
3556 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3557 defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
3558 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3559 defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
3560 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3561 defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
3562 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3565 let Constraints = "$src1 = $dst" in {
3566 defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
3567 VR128, memopv2i64, i128mem, 1>;
3568 defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
3569 VR128, memopv2i64, i128mem, 1>;
3570 defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
3571 VR128, memopv2i64, i128mem, 1>;
3572 defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
3573 VR128, memopv2i64, i128mem>;
3574 defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
3575 VR128, memopv2i64, i128mem>;
3576 defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
3577 VR128, memopv2i64, i128mem>;
3578 } // Constraints = "$src1 = $dst"
3580 //===---------------------------------------------------------------------===//
3581 // SSE2 - Packed Integer Pack Instructions
3582 //===---------------------------------------------------------------------===//
3584 let Predicates = [HasAVX] in {
3585 defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
3586 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3587 defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
3588 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3589 defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
3590 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3593 let Predicates = [HasAVX2] in {
3594 defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
3595 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3596 defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
3597 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3598 defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
3599 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3602 let Constraints = "$src1 = $dst" in {
3603 defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
3604 VR128, memopv2i64, i128mem>;
3605 defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
3606 VR128, memopv2i64, i128mem>;
3607 defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
3608 VR128, memopv2i64, i128mem>;
3609 } // Constraints = "$src1 = $dst"
3611 //===---------------------------------------------------------------------===//
3612 // SSE2 - Packed Integer Shuffle Instructions
3613 //===---------------------------------------------------------------------===//
3615 let ExeDomain = SSEPackedInt in {
3616 multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
3617 def ri : Ii8<0x70, MRMSrcReg,
3618 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
3619 !strconcat(OpcodeStr,
3620 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3621 [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))]>;
3622 def mi : Ii8<0x70, MRMSrcMem,
3623 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
3624 !strconcat(OpcodeStr,
3625 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3627 (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
3628 (i8 imm:$src2))))]>;
3631 multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
3632 def Yri : Ii8<0x70, MRMSrcReg,
3633 (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
3634 !strconcat(OpcodeStr,
3635 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3636 [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
3637 def Ymi : Ii8<0x70, MRMSrcMem,
3638 (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
3639 !strconcat(OpcodeStr,
3640 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3642 (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
3643 (i8 imm:$src2))))]>;
3645 } // ExeDomain = SSEPackedInt
3647 let Predicates = [HasAVX] in {
3648 let AddedComplexity = 5 in
3649 defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
3651 // SSE2 with ImmT == Imm8 and XS prefix.
3652 defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
3654 // SSE2 with ImmT == Imm8 and XD prefix.
3655 defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
3657 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
3658 (VPSHUFDmi addr:$src1, imm:$imm)>;
3659 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
3660 (VPSHUFDri VR128:$src1, imm:$imm)>;
3663 let Predicates = [HasAVX2] in {
3664 defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
3665 defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
3666 defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
3669 let Predicates = [HasSSE2] in {
3670 let AddedComplexity = 5 in
3671 defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
3673 // SSE2 with ImmT == Imm8 and XS prefix.
3674 defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
3676 // SSE2 with ImmT == Imm8 and XD prefix.
3677 defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
3679 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
3680 (PSHUFDmi addr:$src1, imm:$imm)>;
3681 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
3682 (PSHUFDri VR128:$src1, imm:$imm)>;
3685 //===---------------------------------------------------------------------===//
3686 // SSE2 - Packed Integer Unpack Instructions
3687 //===---------------------------------------------------------------------===//
3689 let ExeDomain = SSEPackedInt in {
3690 multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
3691 SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
3692 def rr : PDI<opc, MRMSrcReg,
3693 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
3695 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
3696 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3697 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>;
3698 def rm : PDI<opc, MRMSrcMem,
3699 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
3701 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
3702 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3703 [(set VR128:$dst, (OpNode VR128:$src1,
3704 (bc_frag (memopv2i64
3708 multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
3709 SDNode OpNode, PatFrag bc_frag> {
3710 def Yrr : PDI<opc, MRMSrcReg,
3711 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
3712 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3713 [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
3714 def Yrm : PDI<opc, MRMSrcMem,
3715 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
3716 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3717 [(set VR256:$dst, (OpNode VR256:$src1,
3718 (bc_frag (memopv4i64 addr:$src2))))]>;
3721 let Predicates = [HasAVX] in {
3722 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
3723 bc_v16i8, 0>, VEX_4V;
3724 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
3725 bc_v8i16, 0>, VEX_4V;
3726 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
3727 bc_v4i32, 0>, VEX_4V;
3728 defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
3729 bc_v2i64, 0>, VEX_4V;
3731 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
3732 bc_v16i8, 0>, VEX_4V;
3733 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
3734 bc_v8i16, 0>, VEX_4V;
3735 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
3736 bc_v4i32, 0>, VEX_4V;
3737 defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
3738 bc_v2i64, 0>, VEX_4V;
3741 let Predicates = [HasAVX2] in {
3742 defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
3744 defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
3746 defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
3748 defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
3751 defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
3753 defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
3755 defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
3757 defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
3761 let Constraints = "$src1 = $dst" in {
3762 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
3764 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
3766 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
3768 defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
3771 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
3773 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
3775 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
3777 defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
3780 } // ExeDomain = SSEPackedInt
3782 // Patterns for using AVX1 instructions with integer vectors
3783 // Here to give AVX2 priority
3784 let Predicates = [HasAVX] in {
3785 def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
3786 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
3787 def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
3788 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
3789 def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
3790 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
3791 def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
3792 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
3794 def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
3795 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
3796 def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
3797 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
3798 def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
3799 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
3800 def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
3801 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
3804 //===---------------------------------------------------------------------===//
3805 // SSE2 - Packed Integer Extract and Insert
3806 //===---------------------------------------------------------------------===//
3808 let ExeDomain = SSEPackedInt in {
3809 multiclass sse2_pinsrw<bit Is2Addr = 1> {
3810 def rri : Ii8<0xC4, MRMSrcReg,
3811 (outs VR128:$dst), (ins VR128:$src1,
3812 GR32:$src2, i32i8imm:$src3),
3814 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
3815 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3817 (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
3818 def rmi : Ii8<0xC4, MRMSrcMem,
3819 (outs VR128:$dst), (ins VR128:$src1,
3820 i16mem:$src2, i32i8imm:$src3),
3822 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
3823 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3825 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
3830 let Predicates = [HasAVX] in
3831 def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
3832 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
3833 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3834 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
3835 imm:$src2))]>, TB, OpSize, VEX;
3836 def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
3837 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
3838 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3839 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
3843 let Predicates = [HasAVX] in {
3844 defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
3845 def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
3846 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
3847 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
3848 []>, TB, OpSize, VEX_4V;
3851 let Constraints = "$src1 = $dst" in
3852 defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
3854 } // ExeDomain = SSEPackedInt
3856 //===---------------------------------------------------------------------===//
3857 // SSE2 - Packed Mask Creation
3858 //===---------------------------------------------------------------------===//
3860 let ExeDomain = SSEPackedInt in {
3862 def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
3863 "pmovmskb\t{$src, $dst|$dst, $src}",
3864 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
3865 def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
3866 "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
3868 let Predicates = [HasAVX2] in {
3869 def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
3870 "pmovmskb\t{$src, $dst|$dst, $src}",
3871 [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
3872 def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
3873 "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
3876 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
3877 "pmovmskb\t{$src, $dst|$dst, $src}",
3878 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
3880 } // ExeDomain = SSEPackedInt
3882 //===---------------------------------------------------------------------===//
3883 // SSE2 - Conditional Store
3884 //===---------------------------------------------------------------------===//
3886 let ExeDomain = SSEPackedInt in {
3889 def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
3890 (ins VR128:$src, VR128:$mask),
3891 "maskmovdqu\t{$mask, $src|$src, $mask}",
3892 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
3894 def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
3895 (ins VR128:$src, VR128:$mask),
3896 "maskmovdqu\t{$mask, $src|$src, $mask}",
3897 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
3900 def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
3901 "maskmovdqu\t{$mask, $src|$src, $mask}",
3902 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
3904 def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
3905 "maskmovdqu\t{$mask, $src|$src, $mask}",
3906 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
3908 } // ExeDomain = SSEPackedInt
3910 //===---------------------------------------------------------------------===//
3911 // SSE2 - Move Doubleword
3912 //===---------------------------------------------------------------------===//
3914 //===---------------------------------------------------------------------===//
3915 // Move Int Doubleword to Packed Double Int
3917 def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
3918 "movd\t{$src, $dst|$dst, $src}",
3920 (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
3921 def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
3922 "movd\t{$src, $dst|$dst, $src}",
3924 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3926 def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
3927 "mov{d|q}\t{$src, $dst|$dst, $src}",
3929 (v2i64 (scalar_to_vector GR64:$src)))]>, VEX;
3930 def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
3931 "mov{d|q}\t{$src, $dst|$dst, $src}",
3932 [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX;
3934 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
3935 "movd\t{$src, $dst|$dst, $src}",
3937 (v4i32 (scalar_to_vector GR32:$src)))]>;
3938 def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
3939 "movd\t{$src, $dst|$dst, $src}",
3941 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
3942 def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
3943 "mov{d|q}\t{$src, $dst|$dst, $src}",
3945 (v2i64 (scalar_to_vector GR64:$src)))]>;
3946 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
3947 "mov{d|q}\t{$src, $dst|$dst, $src}",
3948 [(set FR64:$dst, (bitconvert GR64:$src))]>;
3950 //===---------------------------------------------------------------------===//
3951 // Move Int Doubleword to Single Scalar
3953 def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
3954 "movd\t{$src, $dst|$dst, $src}",
3955 [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
3957 def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
3958 "movd\t{$src, $dst|$dst, $src}",
3959 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
3961 def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
3962 "movd\t{$src, $dst|$dst, $src}",
3963 [(set FR32:$dst, (bitconvert GR32:$src))]>;
3965 def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
3966 "movd\t{$src, $dst|$dst, $src}",
3967 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
3969 //===---------------------------------------------------------------------===//
3970 // Move Packed Doubleword Int to Packed Double Int
3972 def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
3973 "movd\t{$src, $dst|$dst, $src}",
3974 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
3976 def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
3977 (ins i32mem:$dst, VR128:$src),
3978 "movd\t{$src, $dst|$dst, $src}",
3979 [(store (i32 (vector_extract (v4i32 VR128:$src),
3980 (iPTR 0))), addr:$dst)]>, VEX;
3981 def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
3982 "movd\t{$src, $dst|$dst, $src}",
3983 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
3985 def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
3986 "movd\t{$src, $dst|$dst, $src}",
3987 [(store (i32 (vector_extract (v4i32 VR128:$src),
3988 (iPTR 0))), addr:$dst)]>;
3990 //===---------------------------------------------------------------------===//
3991 // Move Packed Doubleword Int first element to Doubleword Int
3993 def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
3994 "mov{d|q}\t{$src, $dst|$dst, $src}",
3995 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
3997 TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
3999 def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4000 "mov{d|q}\t{$src, $dst|$dst, $src}",
4001 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
4004 //===---------------------------------------------------------------------===//
4005 // Bitcast FR64 <-> GR64
4007 let Predicates = [HasAVX] in
4008 def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4009 "vmovq\t{$src, $dst|$dst, $src}",
4010 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
4012 def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
4013 "mov{d|q}\t{$src, $dst|$dst, $src}",
4014 [(set GR64:$dst, (bitconvert FR64:$src))]>, VEX;
4015 def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
4016 "movq\t{$src, $dst|$dst, $src}",
4017 [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
4020 def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4021 "movq\t{$src, $dst|$dst, $src}",
4022 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
4023 def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
4024 "mov{d|q}\t{$src, $dst|$dst, $src}",
4025 [(set GR64:$dst, (bitconvert FR64:$src))]>;
4026 def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
4027 "movq\t{$src, $dst|$dst, $src}",
4028 [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
4030 //===---------------------------------------------------------------------===//
4031 // Move Scalar Single to Double Int
4033 def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
4034 "movd\t{$src, $dst|$dst, $src}",
4035 [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
4036 def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
4037 "movd\t{$src, $dst|$dst, $src}",
4038 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
4039 def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
4040 "movd\t{$src, $dst|$dst, $src}",
4041 [(set GR32:$dst, (bitconvert FR32:$src))]>;
4042 def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
4043 "movd\t{$src, $dst|$dst, $src}",
4044 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
4046 //===---------------------------------------------------------------------===//
4047 // Patterns and instructions to describe movd/movq to XMM register zero-extends
4049 let AddedComplexity = 15 in {
4050 def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4051 "movd\t{$src, $dst|$dst, $src}",
4052 [(set VR128:$dst, (v4i32 (X86vzmovl
4053 (v4i32 (scalar_to_vector GR32:$src)))))]>,
4055 def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4056 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
4057 [(set VR128:$dst, (v2i64 (X86vzmovl
4058 (v2i64 (scalar_to_vector GR64:$src)))))]>,
4061 let AddedComplexity = 15 in {
4062 def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4063 "movd\t{$src, $dst|$dst, $src}",
4064 [(set VR128:$dst, (v4i32 (X86vzmovl
4065 (v4i32 (scalar_to_vector GR32:$src)))))]>;
4066 def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4067 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
4068 [(set VR128:$dst, (v2i64 (X86vzmovl
4069 (v2i64 (scalar_to_vector GR64:$src)))))]>;
4072 let AddedComplexity = 20 in {
4073 def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4074 "movd\t{$src, $dst|$dst, $src}",
4076 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
4077 (loadi32 addr:$src))))))]>,
4079 def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4080 "movd\t{$src, $dst|$dst, $src}",
4082 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
4083 (loadi32 addr:$src))))))]>;
4086 let Predicates = [HasAVX] in {
4087 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4088 let AddedComplexity = 20 in {
4089 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4090 (VMOVZDI2PDIrm addr:$src)>;
4091 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4092 (VMOVZDI2PDIrm addr:$src)>;
4094 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4095 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4096 (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))),
4097 (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
4098 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4099 (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))),
4100 (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
4103 let Predicates = [HasSSE2], AddedComplexity = 20 in {
4104 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4105 (MOVZDI2PDIrm addr:$src)>;
4106 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4107 (MOVZDI2PDIrm addr:$src)>;
4110 // These are the correct encodings of the instructions so that we know how to
4111 // read correct assembly, even though we continue to emit the wrong ones for
4112 // compatibility with Darwin's buggy assembler.
4113 def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4114 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
4115 def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4116 (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
4117 def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4118 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
4119 def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4120 (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
4121 def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4122 (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
4123 def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4124 (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
4126 //===---------------------------------------------------------------------===//
4127 // SSE2 - Move Quadword
4128 //===---------------------------------------------------------------------===//
4130 //===---------------------------------------------------------------------===//
4131 // Move Quadword Int to Packed Quadword Int
4133 def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4134 "vmovq\t{$src, $dst|$dst, $src}",
4136 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
4137 VEX, Requires<[HasAVX]>;
4138 def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4139 "movq\t{$src, $dst|$dst, $src}",
4141 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
4142 Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
4144 //===---------------------------------------------------------------------===//
4145 // Move Packed Quadword Int to Quadword Int
4147 def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4148 "movq\t{$src, $dst|$dst, $src}",
4149 [(store (i64 (vector_extract (v2i64 VR128:$src),
4150 (iPTR 0))), addr:$dst)]>, VEX;
4151 def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4152 "movq\t{$src, $dst|$dst, $src}",
4153 [(store (i64 (vector_extract (v2i64 VR128:$src),
4154 (iPTR 0))), addr:$dst)]>;
4156 //===---------------------------------------------------------------------===//
4157 // Store / copy lower 64-bits of a XMM register.
4159 def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4160 "movq\t{$src, $dst|$dst, $src}",
4161 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
4162 def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4163 "movq\t{$src, $dst|$dst, $src}",
4164 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
4166 let AddedComplexity = 20 in
4167 def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4168 "vmovq\t{$src, $dst|$dst, $src}",
4170 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
4171 (loadi64 addr:$src))))))]>,
4172 XS, VEX, Requires<[HasAVX]>;
4174 let AddedComplexity = 20 in
4175 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4176 "movq\t{$src, $dst|$dst, $src}",
4178 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
4179 (loadi64 addr:$src))))))]>,
4180 XS, Requires<[HasSSE2]>;
4182 let Predicates = [HasAVX], AddedComplexity = 20 in {
4183 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4184 (VMOVZQI2PQIrm addr:$src)>;
4185 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
4186 (VMOVZQI2PQIrm addr:$src)>;
4187 def : Pat<(v2i64 (X86vzload addr:$src)),
4188 (VMOVZQI2PQIrm addr:$src)>;
4191 let Predicates = [HasSSE2], AddedComplexity = 20 in {
4192 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4193 (MOVZQI2PQIrm addr:$src)>;
4194 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
4195 (MOVZQI2PQIrm addr:$src)>;
4196 def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
4199 let Predicates = [HasAVX] in {
4200 def : Pat<(v4i64 (X86vzload addr:$src)),
4201 (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
4204 //===---------------------------------------------------------------------===//
4205 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
4206 // IA32 document. movq xmm1, xmm2 does clear the high bits.
4208 let AddedComplexity = 15 in
4209 def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4210 "vmovq\t{$src, $dst|$dst, $src}",
4211 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
4212 XS, VEX, Requires<[HasAVX]>;
4213 let AddedComplexity = 15 in
4214 def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4215 "movq\t{$src, $dst|$dst, $src}",
4216 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
4217 XS, Requires<[HasSSE2]>;
4219 let AddedComplexity = 20 in
4220 def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4221 "vmovq\t{$src, $dst|$dst, $src}",
4222 [(set VR128:$dst, (v2i64 (X86vzmovl
4223 (loadv2i64 addr:$src))))]>,
4224 XS, VEX, Requires<[HasAVX]>;
4225 let AddedComplexity = 20 in {
4226 def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4227 "movq\t{$src, $dst|$dst, $src}",
4228 [(set VR128:$dst, (v2i64 (X86vzmovl
4229 (loadv2i64 addr:$src))))]>,
4230 XS, Requires<[HasSSE2]>;
4233 let AddedComplexity = 20 in {
4234 let Predicates = [HasAVX] in {
4235 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4236 (VMOVZPQILo2PQIrm addr:$src)>;
4237 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
4238 (VMOVZPQILo2PQIrr VR128:$src)>;
4240 let Predicates = [HasSSE2] in {
4241 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4242 (MOVZPQILo2PQIrm addr:$src)>;
4243 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
4244 (MOVZPQILo2PQIrr VR128:$src)>;
4248 // Instructions to match in the assembler
4249 def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4250 "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
4251 def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4252 "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
4253 // Recognize "movd" with GR64 destination, but encode as a "movq"
4254 def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4255 "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
4257 // Instructions for the disassembler
4258 // xr = XMM register
4261 let Predicates = [HasAVX] in
4262 def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4263 "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
4264 def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4265 "movq\t{$src, $dst|$dst, $src}", []>, XS;
4267 //===---------------------------------------------------------------------===//
4268 // SSE3 - Conversion Instructions
4269 //===---------------------------------------------------------------------===//
4271 // Convert Packed Double FP to Packed DW Integers
4272 let Predicates = [HasAVX] in {
4273 // The assembler can recognize rr 256-bit instructions by seeing a ymm
4274 // register, but the same isn't true when using memory operands instead.
4275 // Provide other assembly rr and rm forms to address this explicitly.
4276 def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4277 "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
4278 def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
4279 "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
4282 def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4283 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
4284 def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4285 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
4288 def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
4289 "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
4290 def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
4291 "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
4294 def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4295 "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
4296 def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4297 "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
4299 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
4300 (VCVTTPD2DQYrr VR256:$src)>;
4301 def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
4302 (VCVTTPD2DQYrm addr:$src)>;
4304 // Convert Packed DW Integers to Packed Double FP
4305 let Predicates = [HasAVX] in {
4306 def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4307 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4308 def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4309 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4310 def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
4311 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4312 def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
4313 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4316 def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4317 "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
4318 def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4319 "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
4321 // AVX 256-bit register conversion intrinsics
4322 def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
4323 (VCVTDQ2PDYrr VR128:$src)>;
4324 def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
4325 (VCVTDQ2PDYrm addr:$src)>;
4327 def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
4328 (VCVTPD2DQYrr VR256:$src)>;
4329 def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
4330 (VCVTPD2DQYrm addr:$src)>;
4332 def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
4333 (VCVTDQ2PDYrr VR128:$src)>;
4334 def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
4335 (VCVTDQ2PDYrm addr:$src)>;
4337 //===---------------------------------------------------------------------===//
4338 // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
4339 //===---------------------------------------------------------------------===//
4340 multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
4341 ValueType vt, RegisterClass RC, PatFrag mem_frag,
4342 X86MemOperand x86memop> {
4343 def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4344 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4345 [(set RC:$dst, (vt (OpNode RC:$src)))]>;
4346 def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4347 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4348 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>;
4351 let Predicates = [HasAVX] in {
4352 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
4353 v4f32, VR128, memopv4f32, f128mem>, VEX;
4354 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
4355 v4f32, VR128, memopv4f32, f128mem>, VEX;
4356 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
4357 v8f32, VR256, memopv8f32, f256mem>, VEX;
4358 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
4359 v8f32, VR256, memopv8f32, f256mem>, VEX;
4361 defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
4362 memopv4f32, f128mem>;
4363 defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
4364 memopv4f32, f128mem>;
4366 let Predicates = [HasAVX] in {
4367 def : Pat<(v4i32 (X86Movshdup VR128:$src)),
4368 (VMOVSHDUPrr VR128:$src)>;
4369 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
4370 (VMOVSHDUPrm addr:$src)>;
4371 def : Pat<(v4i32 (X86Movsldup VR128:$src)),
4372 (VMOVSLDUPrr VR128:$src)>;
4373 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
4374 (VMOVSLDUPrm addr:$src)>;
4375 def : Pat<(v8i32 (X86Movshdup VR256:$src)),
4376 (VMOVSHDUPYrr VR256:$src)>;
4377 def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
4378 (VMOVSHDUPYrm addr:$src)>;
4379 def : Pat<(v8i32 (X86Movsldup VR256:$src)),
4380 (VMOVSLDUPYrr VR256:$src)>;
4381 def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
4382 (VMOVSLDUPYrm addr:$src)>;
4385 let Predicates = [HasSSE3] in {
4386 def : Pat<(v4i32 (X86Movshdup VR128:$src)),
4387 (MOVSHDUPrr VR128:$src)>;
4388 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
4389 (MOVSHDUPrm addr:$src)>;
4390 def : Pat<(v4i32 (X86Movsldup VR128:$src)),
4391 (MOVSLDUPrr VR128:$src)>;
4392 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
4393 (MOVSLDUPrm addr:$src)>;
4396 //===---------------------------------------------------------------------===//
4397 // SSE3 - Replicate Double FP - MOVDDUP
4398 //===---------------------------------------------------------------------===//
4400 multiclass sse3_replicate_dfp<string OpcodeStr> {
4401 let neverHasSideEffects = 1 in
4402 def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4403 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4405 def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
4406 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4409 (scalar_to_vector (loadf64 addr:$src)))))]>;
4412 // FIXME: Merge with above classe when there're patterns for the ymm version
4413 multiclass sse3_replicate_dfp_y<string OpcodeStr> {
4414 def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
4415 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4416 [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
4417 def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
4418 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4421 (scalar_to_vector (loadf64 addr:$src)))))]>;
4424 let Predicates = [HasAVX] in {
4425 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
4426 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
4429 defm MOVDDUP : sse3_replicate_dfp<"movddup">;
4431 let Predicates = [HasAVX] in {
4432 def : Pat<(X86Movddup (memopv2f64 addr:$src)),
4433 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
4434 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
4435 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
4436 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
4437 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
4438 def : Pat<(X86Movddup (bc_v2f64
4439 (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
4440 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
4443 def : Pat<(X86Movddup (memopv4f64 addr:$src)),
4444 (VMOVDDUPYrm addr:$src)>;
4445 def : Pat<(X86Movddup (memopv4i64 addr:$src)),
4446 (VMOVDDUPYrm addr:$src)>;
4447 def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
4448 (VMOVDDUPYrm addr:$src)>;
4449 def : Pat<(X86Movddup (v4i64 VR256:$src)),
4450 (VMOVDDUPYrr VR256:$src)>;
4453 let Predicates = [HasSSE3] in {
4454 def : Pat<(X86Movddup (memopv2f64 addr:$src)),
4455 (MOVDDUPrm addr:$src)>;
4456 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
4457 (MOVDDUPrm addr:$src)>;
4458 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
4459 (MOVDDUPrm addr:$src)>;
4460 def : Pat<(X86Movddup (bc_v2f64
4461 (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
4462 (MOVDDUPrm addr:$src)>;
4465 //===---------------------------------------------------------------------===//
4466 // SSE3 - Move Unaligned Integer
4467 //===---------------------------------------------------------------------===//
4469 let Predicates = [HasAVX] in {
4470 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4471 "vlddqu\t{$src, $dst|$dst, $src}",
4472 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
4473 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
4474 "vlddqu\t{$src, $dst|$dst, $src}",
4475 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
4477 def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4478 "lddqu\t{$src, $dst|$dst, $src}",
4479 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
4481 //===---------------------------------------------------------------------===//
4482 // SSE3 - Arithmetic
4483 //===---------------------------------------------------------------------===//
4485 multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
4486 X86MemOperand x86memop, bit Is2Addr = 1> {
4487 def rr : I<0xD0, MRMSrcReg,
4488 (outs RC:$dst), (ins RC:$src1, RC:$src2),
4490 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4491 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4492 [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
4493 def rm : I<0xD0, MRMSrcMem,
4494 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4496 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4497 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4498 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
4501 let Predicates = [HasAVX] in {
4502 let ExeDomain = SSEPackedSingle in {
4503 defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
4504 f128mem, 0>, TB, XD, VEX_4V;
4505 defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
4506 f256mem, 0>, TB, XD, VEX_4V;
4508 let ExeDomain = SSEPackedDouble in {
4509 defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
4510 f128mem, 0>, TB, OpSize, VEX_4V;
4511 defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
4512 f256mem, 0>, TB, OpSize, VEX_4V;
4515 let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
4516 let ExeDomain = SSEPackedSingle in
4517 defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
4519 let ExeDomain = SSEPackedDouble in
4520 defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
4521 f128mem>, TB, OpSize;
4524 //===---------------------------------------------------------------------===//
4525 // SSE3 Instructions
4526 //===---------------------------------------------------------------------===//
4529 multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4530 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
4531 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4533 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4534 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4535 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
4537 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4539 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4540 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4541 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
4543 multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4544 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
4545 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4547 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4548 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4549 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
4551 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4553 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4554 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4555 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
4558 let Predicates = [HasAVX] in {
4559 let ExeDomain = SSEPackedSingle in {
4560 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
4561 X86fhadd, 0>, VEX_4V;
4562 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
4563 X86fhsub, 0>, VEX_4V;
4564 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
4565 X86fhadd, 0>, VEX_4V;
4566 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
4567 X86fhsub, 0>, VEX_4V;
4569 let ExeDomain = SSEPackedDouble in {
4570 defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
4571 X86fhadd, 0>, VEX_4V;
4572 defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
4573 X86fhsub, 0>, VEX_4V;
4574 defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
4575 X86fhadd, 0>, VEX_4V;
4576 defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
4577 X86fhsub, 0>, VEX_4V;
4581 let Constraints = "$src1 = $dst" in {
4582 let ExeDomain = SSEPackedSingle in {
4583 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
4584 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
4586 let ExeDomain = SSEPackedDouble in {
4587 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
4588 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
4592 //===---------------------------------------------------------------------===//
4593 // SSSE3 - Packed Absolute Instructions
4594 //===---------------------------------------------------------------------===//
4597 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
4598 multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
4599 Intrinsic IntId128> {
4600 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
4602 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4603 [(set VR128:$dst, (IntId128 VR128:$src))]>,
4606 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
4608 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4611 (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
4614 /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
4615 multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
4616 Intrinsic IntId256> {
4617 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
4619 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4620 [(set VR256:$dst, (IntId256 VR256:$src))]>,
4623 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
4625 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4628 (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
4631 let Predicates = [HasAVX] in {
4632 defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
4633 int_x86_ssse3_pabs_b_128>, VEX;
4634 defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw",
4635 int_x86_ssse3_pabs_w_128>, VEX;
4636 defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
4637 int_x86_ssse3_pabs_d_128>, VEX;
4640 let Predicates = [HasAVX2] in {
4641 defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb",
4642 int_x86_avx2_pabs_b>, VEX;
4643 defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw",
4644 int_x86_avx2_pabs_w>, VEX;
4645 defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
4646 int_x86_avx2_pabs_d>, VEX;
4649 defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
4650 int_x86_ssse3_pabs_b_128>;
4651 defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
4652 int_x86_ssse3_pabs_w_128>;
4653 defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
4654 int_x86_ssse3_pabs_d_128>;
4656 //===---------------------------------------------------------------------===//
4657 // SSSE3 - Packed Binary Operator Instructions
4658 //===---------------------------------------------------------------------===//
4660 /// SS3I_binop_rm - Simple SSSE3 bin op
4661 multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4662 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
4663 X86MemOperand x86memop, bit Is2Addr = 1> {
4664 let isCommutable = 1 in
4665 def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
4666 (ins RC:$src1, RC:$src2),
4668 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4669 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4670 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
4672 def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
4673 (ins RC:$src1, x86memop:$src2),
4675 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4676 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4678 (OpVT (OpNode RC:$src1,
4679 (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
4682 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
4683 multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
4684 Intrinsic IntId128, bit Is2Addr = 1> {
4685 let isCommutable = 1 in
4686 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
4687 (ins VR128:$src1, VR128:$src2),
4689 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4690 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4691 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
4693 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
4694 (ins VR128:$src1, i128mem:$src2),
4696 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4697 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4699 (IntId128 VR128:$src1,
4700 (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
4703 multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
4704 Intrinsic IntId256> {
4705 let isCommutable = 1 in
4706 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
4707 (ins VR256:$src1, VR256:$src2),
4708 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4709 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
4711 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
4712 (ins VR256:$src1, i256mem:$src2),
4713 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4715 (IntId256 VR256:$src1,
4716 (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
4719 let ImmT = NoImm, Predicates = [HasAVX] in {
4720 let isCommutable = 0 in {
4721 defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
4722 memopv2i64, i128mem, 0>, VEX_4V;
4723 defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
4724 memopv2i64, i128mem, 0>, VEX_4V;
4725 defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
4726 memopv2i64, i128mem, 0>, VEX_4V;
4727 defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
4728 memopv2i64, i128mem, 0>, VEX_4V;
4729 defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
4730 memopv2i64, i128mem, 0>, VEX_4V;
4731 defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
4732 memopv2i64, i128mem, 0>, VEX_4V;
4733 defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
4734 memopv2i64, i128mem, 0>, VEX_4V;
4735 defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
4736 memopv2i64, i128mem, 0>, VEX_4V;
4737 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
4738 int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
4739 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
4740 int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
4741 defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
4742 int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
4744 defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
4745 int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
4748 let ImmT = NoImm, Predicates = [HasAVX2] in {
4749 let isCommutable = 0 in {
4750 defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
4751 memopv4i64, i256mem, 0>, VEX_4V;
4752 defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
4753 memopv4i64, i256mem, 0>, VEX_4V;
4754 defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
4755 memopv4i64, i256mem, 0>, VEX_4V;
4756 defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
4757 memopv4i64, i256mem, 0>, VEX_4V;
4758 defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
4759 memopv4i64, i256mem, 0>, VEX_4V;
4760 defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
4761 memopv4i64, i256mem, 0>, VEX_4V;
4762 defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
4763 memopv4i64, i256mem, 0>, VEX_4V;
4764 defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
4765 memopv4i64, i256mem, 0>, VEX_4V;
4766 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
4767 int_x86_avx2_phadd_sw>, VEX_4V;
4768 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
4769 int_x86_avx2_phsub_sw>, VEX_4V;
4770 defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
4771 int_x86_avx2_pmadd_ub_sw>, VEX_4V;
4773 defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
4774 int_x86_avx2_pmul_hr_sw>, VEX_4V;
4777 // None of these have i8 immediate fields.
4778 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
4779 let isCommutable = 0 in {
4780 defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
4781 memopv2i64, i128mem>;
4782 defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
4783 memopv2i64, i128mem>;
4784 defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
4785 memopv2i64, i128mem>;
4786 defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
4787 memopv2i64, i128mem>;
4788 defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128,
4789 memopv2i64, i128mem>;
4790 defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128,
4791 memopv2i64, i128mem>;
4792 defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128,
4793 memopv2i64, i128mem>;
4794 defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
4795 memopv2i64, i128mem>;
4796 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
4797 int_x86_ssse3_phadd_sw_128>;
4798 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
4799 int_x86_ssse3_phsub_sw_128>;
4800 defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
4801 int_x86_ssse3_pmadd_ub_sw_128>;
4803 defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
4804 int_x86_ssse3_pmul_hr_sw_128>;
4807 //===---------------------------------------------------------------------===//
4808 // SSSE3 - Packed Align Instruction Patterns
4809 //===---------------------------------------------------------------------===//
4811 multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
4812 let neverHasSideEffects = 1 in {
4813 def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
4814 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
4816 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4818 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
4821 def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
4822 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
4824 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4826 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
4831 multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
4832 let neverHasSideEffects = 1 in {
4833 def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
4834 (ins VR256:$src1, VR256:$src2, i8imm:$src3),
4836 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4839 def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
4840 (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
4842 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4847 let Predicates = [HasAVX] in
4848 defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
4849 let Predicates = [HasAVX2] in
4850 defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
4851 let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
4852 defm PALIGN : ssse3_palign<"palignr">;
4854 let Predicates = [HasAVX2] in {
4855 def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
4856 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
4857 def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
4858 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
4859 def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
4860 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
4861 def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
4862 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
4865 let Predicates = [HasAVX] in {
4866 def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4867 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4868 def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4869 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4870 def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4871 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4872 def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4873 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4876 let Predicates = [HasSSSE3] in {
4877 def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4878 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4879 def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4880 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4881 def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4882 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4883 def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
4884 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
4887 //===---------------------------------------------------------------------===//
4888 // SSSE3 - Thread synchronization
4889 //===---------------------------------------------------------------------===//
4891 let usesCustomInserter = 1 in {
4892 def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
4893 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
4894 Requires<[HasSSE3]>;
4895 def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
4896 [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
4897 Requires<[HasSSE3]>;
4900 let Uses = [EAX, ECX, EDX] in
4901 def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
4902 Requires<[HasSSE3]>;
4903 let Uses = [ECX, EAX] in
4904 def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
4905 Requires<[HasSSE3]>;
4907 def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
4908 def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
4910 def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
4911 Requires<[In32BitMode]>;
4912 def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
4913 Requires<[In64BitMode]>;
4915 //===----------------------------------------------------------------------===//
4916 // SSE4.1 - Packed Move with Sign/Zero Extend
4917 //===----------------------------------------------------------------------===//
4919 multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
4920 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4921 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4922 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
4924 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4925 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4927 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
4931 multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
4933 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
4934 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4935 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
4937 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
4938 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4939 [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
4942 let Predicates = [HasAVX] in {
4943 defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
4945 defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
4947 defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
4949 defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
4951 defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
4953 defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
4957 let Predicates = [HasAVX2] in {
4958 defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
4959 int_x86_avx2_pmovsxbw>, VEX;
4960 defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
4961 int_x86_avx2_pmovsxwd>, VEX;
4962 defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
4963 int_x86_avx2_pmovsxdq>, VEX;
4964 defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
4965 int_x86_avx2_pmovzxbw>, VEX;
4966 defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
4967 int_x86_avx2_pmovzxwd>, VEX;
4968 defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
4969 int_x86_avx2_pmovzxdq>, VEX;
4972 defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
4973 defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
4974 defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
4975 defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
4976 defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
4977 defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
4979 let Predicates = [HasAVX] in {
4980 // Common patterns involving scalar load.
4981 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
4982 (VPMOVSXBWrm addr:$src)>;
4983 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
4984 (VPMOVSXBWrm addr:$src)>;
4986 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
4987 (VPMOVSXWDrm addr:$src)>;
4988 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
4989 (VPMOVSXWDrm addr:$src)>;
4991 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
4992 (VPMOVSXDQrm addr:$src)>;
4993 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
4994 (VPMOVSXDQrm addr:$src)>;
4996 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
4997 (VPMOVZXBWrm addr:$src)>;
4998 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
4999 (VPMOVZXBWrm addr:$src)>;
5001 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
5002 (VPMOVZXWDrm addr:$src)>;
5003 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
5004 (VPMOVZXWDrm addr:$src)>;
5006 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
5007 (VPMOVZXDQrm addr:$src)>;
5008 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
5009 (VPMOVZXDQrm addr:$src)>;
5012 let Predicates = [HasSSE41] in {
5013 // Common patterns involving scalar load.
5014 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
5015 (PMOVSXBWrm addr:$src)>;
5016 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
5017 (PMOVSXBWrm addr:$src)>;
5019 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
5020 (PMOVSXWDrm addr:$src)>;
5021 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
5022 (PMOVSXWDrm addr:$src)>;
5024 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
5025 (PMOVSXDQrm addr:$src)>;
5026 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
5027 (PMOVSXDQrm addr:$src)>;
5029 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
5030 (PMOVZXBWrm addr:$src)>;
5031 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
5032 (PMOVZXBWrm addr:$src)>;
5034 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
5035 (PMOVZXWDrm addr:$src)>;
5036 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
5037 (PMOVZXWDrm addr:$src)>;
5039 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
5040 (PMOVZXDQrm addr:$src)>;
5041 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
5042 (PMOVZXDQrm addr:$src)>;
5045 let Predicates = [HasAVX] in {
5046 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
5047 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
5050 let Predicates = [HasSSE41] in {
5051 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
5052 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
5056 multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
5057 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5058 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5059 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
5061 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
5062 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5064 (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
5068 multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
5070 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
5071 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5072 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5074 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
5075 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5077 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
5081 let Predicates = [HasAVX] in {
5082 defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
5084 defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
5086 defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
5088 defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
5092 let Predicates = [HasAVX2] in {
5093 defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
5094 int_x86_avx2_pmovsxbd>, VEX;
5095 defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
5096 int_x86_avx2_pmovsxwq>, VEX;
5097 defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
5098 int_x86_avx2_pmovzxbd>, VEX;
5099 defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
5100 int_x86_avx2_pmovzxwq>, VEX;
5103 defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
5104 defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
5105 defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
5106 defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
5108 let Predicates = [HasAVX] in {
5109 // Common patterns involving scalar load
5110 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
5111 (VPMOVSXBDrm addr:$src)>;
5112 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
5113 (VPMOVSXWQrm addr:$src)>;
5115 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
5116 (VPMOVZXBDrm addr:$src)>;
5117 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
5118 (VPMOVZXWQrm addr:$src)>;
5121 let Predicates = [HasSSE41] in {
5122 // Common patterns involving scalar load
5123 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
5124 (PMOVSXBDrm addr:$src)>;
5125 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
5126 (PMOVSXWQrm addr:$src)>;
5128 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
5129 (PMOVZXBDrm addr:$src)>;
5130 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
5131 (PMOVZXWQrm addr:$src)>;
5134 multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
5135 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5136 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5137 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
5139 // Expecting a i16 load any extended to i32 value.
5140 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
5141 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5142 [(set VR128:$dst, (IntId (bitconvert
5143 (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
5147 multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
5149 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
5150 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5151 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5153 // Expecting a i16 load any extended to i32 value.
5154 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
5155 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5156 [(set VR256:$dst, (IntId (bitconvert
5157 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
5161 let Predicates = [HasAVX] in {
5162 defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
5164 defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
5167 let Predicates = [HasAVX2] in {
5168 defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
5169 int_x86_avx2_pmovsxbq>, VEX;
5170 defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
5171 int_x86_avx2_pmovzxbq>, VEX;
5173 defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
5174 defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
5176 let Predicates = [HasAVX] in {
5177 // Common patterns involving scalar load
5178 def : Pat<(int_x86_sse41_pmovsxbq
5179 (bitconvert (v4i32 (X86vzmovl
5180 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5181 (VPMOVSXBQrm addr:$src)>;
5183 def : Pat<(int_x86_sse41_pmovzxbq
5184 (bitconvert (v4i32 (X86vzmovl
5185 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5186 (VPMOVZXBQrm addr:$src)>;
5189 let Predicates = [HasSSE41] in {
5190 // Common patterns involving scalar load
5191 def : Pat<(int_x86_sse41_pmovsxbq
5192 (bitconvert (v4i32 (X86vzmovl
5193 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5194 (PMOVSXBQrm addr:$src)>;
5196 def : Pat<(int_x86_sse41_pmovzxbq
5197 (bitconvert (v4i32 (X86vzmovl
5198 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5199 (PMOVZXBQrm addr:$src)>;
5202 //===----------------------------------------------------------------------===//
5203 // SSE4.1 - Extract Instructions
5204 //===----------------------------------------------------------------------===//
5206 /// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
5207 multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
5208 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
5209 (ins VR128:$src1, i32i8imm:$src2),
5210 !strconcat(OpcodeStr,
5211 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5212 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
5214 let neverHasSideEffects = 1, mayStore = 1 in
5215 def mr : SS4AIi8<opc, MRMDestMem, (outs),
5216 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
5217 !strconcat(OpcodeStr,
5218 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5221 // There's an AssertZext in the way of writing the store pattern
5222 // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
5225 let Predicates = [HasAVX] in {
5226 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
5227 def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
5228 (ins VR128:$src1, i32i8imm:$src2),
5229 "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
5232 defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
5235 /// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
5236 multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
5237 let neverHasSideEffects = 1, mayStore = 1 in
5238 def mr : SS4AIi8<opc, MRMDestMem, (outs),
5239 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
5240 !strconcat(OpcodeStr,
5241 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5244 // There's an AssertZext in the way of writing the store pattern
5245 // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
5248 let Predicates = [HasAVX] in
5249 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
5251 defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
5254 /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
5255 multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
5256 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
5257 (ins VR128:$src1, i32i8imm:$src2),
5258 !strconcat(OpcodeStr,
5259 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5261 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
5262 def mr : SS4AIi8<opc, MRMDestMem, (outs),
5263 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
5264 !strconcat(OpcodeStr,
5265 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5266 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
5267 addr:$dst)]>, OpSize;
5270 let Predicates = [HasAVX] in
5271 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
5273 defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
5275 /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
5276 multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
5277 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
5278 (ins VR128:$src1, i32i8imm:$src2),
5279 !strconcat(OpcodeStr,
5280 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5282 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
5283 def mr : SS4AIi8<opc, MRMDestMem, (outs),
5284 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
5285 !strconcat(OpcodeStr,
5286 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5287 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
5288 addr:$dst)]>, OpSize, REX_W;
5291 let Predicates = [HasAVX] in
5292 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
5294 defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
5296 /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
5298 multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
5299 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
5300 (ins VR128:$src1, i32i8imm:$src2),
5301 !strconcat(OpcodeStr,
5302 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5304 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
5306 def mr : SS4AIi8<opc, MRMDestMem, (outs),
5307 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
5308 !strconcat(OpcodeStr,
5309 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5310 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
5311 addr:$dst)]>, OpSize;
5314 let ExeDomain = SSEPackedSingle in {
5315 let Predicates = [HasAVX] in {
5316 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
5317 def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
5318 (ins VR128:$src1, i32i8imm:$src2),
5319 "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
5322 defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
5325 // Also match an EXTRACTPS store when the store is done as f32 instead of i32.
5326 def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
5329 (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
5331 def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
5334 (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
5335 Requires<[HasSSE41]>;
5337 //===----------------------------------------------------------------------===//
5338 // SSE4.1 - Insert Instructions
5339 //===----------------------------------------------------------------------===//
5341 multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
5342 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5343 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
5345 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5347 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5349 (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
5350 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5351 (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
5353 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5355 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5357 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
5358 imm:$src3))]>, OpSize;
5361 let Predicates = [HasAVX] in
5362 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
5363 let Constraints = "$src1 = $dst" in
5364 defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
5366 multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
5367 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5368 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
5370 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5372 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5374 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
5376 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5377 (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
5379 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5381 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5383 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
5384 imm:$src3)))]>, OpSize;
5387 let Predicates = [HasAVX] in
5388 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
5389 let Constraints = "$src1 = $dst" in
5390 defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
5392 multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
5393 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5394 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
5396 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5398 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5400 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
5402 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5403 (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
5405 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5407 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5409 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
5410 imm:$src3)))]>, OpSize;
5413 let Predicates = [HasAVX] in
5414 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
5415 let Constraints = "$src1 = $dst" in
5416 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
5418 // insertps has a few different modes, there's the first two here below which
5419 // are optimized inserts that won't zero arbitrary elements in the destination
5420 // vector. The next one matches the intrinsic and could zero arbitrary elements
5421 // in the target vector.
5422 multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
5423 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5424 (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
5426 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5428 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5430 (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
5432 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5433 (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
5435 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5437 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5439 (X86insrtps VR128:$src1,
5440 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
5441 imm:$src3))]>, OpSize;
5444 let ExeDomain = SSEPackedSingle in {
5445 let Predicates = [HasAVX] in
5446 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
5447 let Constraints = "$src1 = $dst" in
5448 defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
5451 //===----------------------------------------------------------------------===//
5452 // SSE4.1 - Round Instructions
5453 //===----------------------------------------------------------------------===//
5455 multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
5456 X86MemOperand x86memop, RegisterClass RC,
5457 PatFrag mem_frag32, PatFrag mem_frag64,
5458 Intrinsic V4F32Int, Intrinsic V2F64Int> {
5459 let ExeDomain = SSEPackedSingle in {
5460 // Intrinsic operation, reg.
5461 // Vector intrinsic operation, reg
5462 def PSr : SS4AIi8<opcps, MRMSrcReg,
5463 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
5464 !strconcat(OpcodeStr,
5465 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5466 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
5469 // Vector intrinsic operation, mem
5470 def PSm : SS4AIi8<opcps, MRMSrcMem,
5471 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
5472 !strconcat(OpcodeStr,
5473 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5475 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
5477 } // ExeDomain = SSEPackedSingle
5479 let ExeDomain = SSEPackedDouble in {
5480 // Vector intrinsic operation, reg
5481 def PDr : SS4AIi8<opcpd, MRMSrcReg,
5482 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
5483 !strconcat(OpcodeStr,
5484 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5485 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
5488 // Vector intrinsic operation, mem
5489 def PDm : SS4AIi8<opcpd, MRMSrcMem,
5490 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
5491 !strconcat(OpcodeStr,
5492 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5494 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
5496 } // ExeDomain = SSEPackedDouble
5499 multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
5502 Intrinsic F64Int, bit Is2Addr = 1> {
5503 let ExeDomain = GenericDomain in {
5505 def SSr : SS4AIi8<opcss, MRMSrcReg,
5506 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
5508 !strconcat(OpcodeStr,
5509 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5510 !strconcat(OpcodeStr,
5511 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5514 // Intrinsic operation, reg.
5515 def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
5516 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
5518 !strconcat(OpcodeStr,
5519 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5520 !strconcat(OpcodeStr,
5521 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5522 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
5525 // Intrinsic operation, mem.
5526 def SSm : SS4AIi8<opcss, MRMSrcMem,
5527 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
5529 !strconcat(OpcodeStr,
5530 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5531 !strconcat(OpcodeStr,
5532 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5534 (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
5538 def SDr : SS4AIi8<opcsd, MRMSrcReg,
5539 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
5541 !strconcat(OpcodeStr,
5542 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5543 !strconcat(OpcodeStr,
5544 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5547 // Intrinsic operation, reg.
5548 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
5549 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
5551 !strconcat(OpcodeStr,
5552 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5553 !strconcat(OpcodeStr,
5554 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5555 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
5558 // Intrinsic operation, mem.
5559 def SDm : SS4AIi8<opcsd, MRMSrcMem,
5560 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
5562 !strconcat(OpcodeStr,
5563 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5564 !strconcat(OpcodeStr,
5565 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5567 (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
5569 } // ExeDomain = GenericDomain
5572 // FP round - roundss, roundps, roundsd, roundpd
5573 let Predicates = [HasAVX] in {
5575 defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
5576 memopv4f32, memopv2f64,
5577 int_x86_sse41_round_ps,
5578 int_x86_sse41_round_pd>, VEX;
5579 defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
5580 memopv8f32, memopv4f64,
5581 int_x86_avx_round_ps_256,
5582 int_x86_avx_round_pd_256>, VEX;
5583 defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
5584 int_x86_sse41_round_ss,
5585 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
5587 def : Pat<(ffloor FR32:$src),
5588 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
5589 def : Pat<(f64 (ffloor FR64:$src)),
5590 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
5591 def : Pat<(f32 (fnearbyint FR32:$src)),
5592 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
5593 def : Pat<(f64 (fnearbyint FR64:$src)),
5594 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
5595 def : Pat<(f32 (fceil FR32:$src)),
5596 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
5597 def : Pat<(f64 (fceil FR64:$src)),
5598 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
5599 def : Pat<(f32 (frint FR32:$src)),
5600 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
5601 def : Pat<(f64 (frint FR64:$src)),
5602 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
5603 def : Pat<(f32 (ftrunc FR32:$src)),
5604 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
5605 def : Pat<(f64 (ftrunc FR64:$src)),
5606 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
5609 defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
5610 memopv4f32, memopv2f64,
5611 int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
5612 let Constraints = "$src1 = $dst" in
5613 defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
5614 int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
5616 def : Pat<(ffloor FR32:$src),
5617 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
5618 def : Pat<(f64 (ffloor FR64:$src)),
5619 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
5620 def : Pat<(f32 (fnearbyint FR32:$src)),
5621 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
5622 def : Pat<(f64 (fnearbyint FR64:$src)),
5623 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
5624 def : Pat<(f32 (fceil FR32:$src)),
5625 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
5626 def : Pat<(f64 (fceil FR64:$src)),
5627 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
5628 def : Pat<(f32 (frint FR32:$src)),
5629 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
5630 def : Pat<(f64 (frint FR64:$src)),
5631 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
5632 def : Pat<(f32 (ftrunc FR32:$src)),
5633 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
5634 def : Pat<(f64 (ftrunc FR64:$src)),
5635 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
5637 //===----------------------------------------------------------------------===//
5638 // SSE4.1 - Packed Bit Test
5639 //===----------------------------------------------------------------------===//
5641 // ptest instruction we'll lower to this in X86ISelLowering primarily from
5642 // the intel intrinsic that corresponds to this.
5643 let Defs = [EFLAGS], Predicates = [HasAVX] in {
5644 def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
5645 "vptest\t{$src2, $src1|$src1, $src2}",
5646 [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
5648 def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
5649 "vptest\t{$src2, $src1|$src1, $src2}",
5650 [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
5653 def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
5654 "vptest\t{$src2, $src1|$src1, $src2}",
5655 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
5657 def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
5658 "vptest\t{$src2, $src1|$src1, $src2}",
5659 [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
5663 let Defs = [EFLAGS] in {
5664 def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
5665 "ptest\t{$src2, $src1|$src1, $src2}",
5666 [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
5668 def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
5669 "ptest\t{$src2, $src1|$src1, $src2}",
5670 [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
5674 // The bit test instructions below are AVX only
5675 multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
5676 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
5677 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
5678 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
5679 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
5680 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
5681 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
5682 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
5686 let Defs = [EFLAGS], Predicates = [HasAVX] in {
5687 let ExeDomain = SSEPackedSingle in {
5688 defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
5689 defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
5691 let ExeDomain = SSEPackedDouble in {
5692 defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
5693 defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
5697 //===----------------------------------------------------------------------===//
5698 // SSE4.1 - Misc Instructions
5699 //===----------------------------------------------------------------------===//
5701 let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
5702 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
5703 "popcnt{w}\t{$src, $dst|$dst, $src}",
5704 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
5706 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
5707 "popcnt{w}\t{$src, $dst|$dst, $src}",
5708 [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
5709 (implicit EFLAGS)]>, OpSize, XS;
5711 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
5712 "popcnt{l}\t{$src, $dst|$dst, $src}",
5713 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
5715 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
5716 "popcnt{l}\t{$src, $dst|$dst, $src}",
5717 [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
5718 (implicit EFLAGS)]>, XS;
5720 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
5721 "popcnt{q}\t{$src, $dst|$dst, $src}",
5722 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
5724 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
5725 "popcnt{q}\t{$src, $dst|$dst, $src}",
5726 [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
5727 (implicit EFLAGS)]>, XS;
5732 // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
5733 multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
5734 Intrinsic IntId128> {
5735 def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
5737 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5738 [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
5739 def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
5741 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5744 (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
5747 let Predicates = [HasAVX] in
5748 defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
5749 int_x86_sse41_phminposuw>, VEX;
5750 defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
5751 int_x86_sse41_phminposuw>;
5753 /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
5754 multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
5755 Intrinsic IntId128, bit Is2Addr = 1> {
5756 let isCommutable = 1 in
5757 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
5758 (ins VR128:$src1, VR128:$src2),
5760 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5761 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5762 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
5763 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
5764 (ins VR128:$src1, i128mem:$src2),
5766 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5767 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5769 (IntId128 VR128:$src1,
5770 (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
5773 /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
5774 multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
5775 Intrinsic IntId256> {
5776 let isCommutable = 1 in
5777 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
5778 (ins VR256:$src1, VR256:$src2),
5779 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5780 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize;
5781 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
5782 (ins VR256:$src1, i256mem:$src2),
5783 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5785 (IntId256 VR256:$src1,
5786 (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
5789 let Predicates = [HasAVX] in {
5790 let isCommutable = 0 in
5791 defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
5793 defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
5795 defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
5797 defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
5799 defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
5801 defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
5803 defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
5805 defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
5807 defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
5809 defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
5813 let Predicates = [HasAVX2] in {
5814 let isCommutable = 0 in
5815 defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
5816 int_x86_avx2_packusdw>, VEX_4V;
5817 defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
5818 int_x86_avx2_pmins_b>, VEX_4V;
5819 defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
5820 int_x86_avx2_pmins_d>, VEX_4V;
5821 defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
5822 int_x86_avx2_pminu_d>, VEX_4V;
5823 defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
5824 int_x86_avx2_pminu_w>, VEX_4V;
5825 defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
5826 int_x86_avx2_pmaxs_b>, VEX_4V;
5827 defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
5828 int_x86_avx2_pmaxs_d>, VEX_4V;
5829 defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
5830 int_x86_avx2_pmaxu_d>, VEX_4V;
5831 defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
5832 int_x86_avx2_pmaxu_w>, VEX_4V;
5833 defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
5834 int_x86_avx2_pmul_dq>, VEX_4V;
5837 let Constraints = "$src1 = $dst" in {
5838 let isCommutable = 0 in
5839 defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
5840 defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
5841 defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
5842 defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
5843 defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
5844 defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
5845 defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
5846 defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
5847 defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
5848 defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
5851 /// SS48I_binop_rm - Simple SSE41 binary operator.
5852 multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5853 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
5854 X86MemOperand x86memop, bit Is2Addr = 1> {
5855 let isCommutable = 1 in
5856 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
5857 (ins RC:$src1, RC:$src2),
5859 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5860 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5861 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize;
5862 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
5863 (ins RC:$src1, x86memop:$src2),
5865 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5866 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5868 (OpVT (OpNode RC:$src1,
5869 (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
5872 let Predicates = [HasAVX] in {
5873 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
5874 memopv2i64, i128mem, 0>, VEX_4V;
5875 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
5876 memopv2i64, i128mem, 0>, VEX_4V;
5878 let Predicates = [HasAVX2] in {
5879 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
5880 memopv4i64, i256mem, 0>, VEX_4V;
5881 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
5882 memopv4i64, i256mem, 0>, VEX_4V;
5885 let Constraints = "$src1 = $dst" in {
5886 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
5887 memopv2i64, i128mem>;
5888 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
5889 memopv2i64, i128mem>;
5892 /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
5893 multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
5894 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
5895 X86MemOperand x86memop, bit Is2Addr = 1> {
5896 let isCommutable = 1 in
5897 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
5898 (ins RC:$src1, RC:$src2, u32u8imm:$src3),
5900 !strconcat(OpcodeStr,
5901 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5902 !strconcat(OpcodeStr,
5903 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5904 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
5906 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
5907 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
5909 !strconcat(OpcodeStr,
5910 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5911 !strconcat(OpcodeStr,
5912 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5915 (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
5919 let Predicates = [HasAVX] in {
5920 let isCommutable = 0 in {
5921 let ExeDomain = SSEPackedSingle in {
5922 defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
5923 VR128, memopv4f32, i128mem, 0>, VEX_4V;
5924 defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
5925 int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
5927 let ExeDomain = SSEPackedDouble in {
5928 defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
5929 VR128, memopv2f64, i128mem, 0>, VEX_4V;
5930 defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
5931 int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
5933 defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
5934 VR128, memopv2i64, i128mem, 0>, VEX_4V;
5935 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
5936 VR128, memopv2i64, i128mem, 0>, VEX_4V;
5938 let ExeDomain = SSEPackedSingle in
5939 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
5940 VR128, memopv4f32, i128mem, 0>, VEX_4V;
5941 let ExeDomain = SSEPackedDouble in
5942 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
5943 VR128, memopv2f64, i128mem, 0>, VEX_4V;
5944 let ExeDomain = SSEPackedSingle in
5945 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
5946 VR256, memopv8f32, i256mem, 0>, VEX_4V;
5949 let Predicates = [HasAVX2] in {
5950 let isCommutable = 0 in {
5951 defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
5952 VR256, memopv4i64, i256mem, 0>, VEX_4V;
5953 defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
5954 VR256, memopv4i64, i256mem, 0>, VEX_4V;
5958 let Constraints = "$src1 = $dst" in {
5959 let isCommutable = 0 in {
5960 let ExeDomain = SSEPackedSingle in
5961 defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
5962 VR128, memopv4f32, i128mem>;
5963 let ExeDomain = SSEPackedDouble in
5964 defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
5965 VR128, memopv2f64, i128mem>;
5966 defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
5967 VR128, memopv2i64, i128mem>;
5968 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
5969 VR128, memopv2i64, i128mem>;
5971 let ExeDomain = SSEPackedSingle in
5972 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
5973 VR128, memopv4f32, i128mem>;
5974 let ExeDomain = SSEPackedDouble in
5975 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
5976 VR128, memopv2f64, i128mem>;
5979 /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
5980 multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
5981 RegisterClass RC, X86MemOperand x86memop,
5982 PatFrag mem_frag, Intrinsic IntId> {
5983 def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
5984 (ins RC:$src1, RC:$src2, RC:$src3),
5985 !strconcat(OpcodeStr,
5986 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5987 [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
5988 IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
5990 def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
5991 (ins RC:$src1, x86memop:$src2, RC:$src3),
5992 !strconcat(OpcodeStr,
5993 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5995 (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
5997 IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
6000 let Predicates = [HasAVX] in {
6001 let ExeDomain = SSEPackedDouble in {
6002 defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
6003 memopv2f64, int_x86_sse41_blendvpd>;
6004 defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
6005 memopv4f64, int_x86_avx_blendv_pd_256>;
6006 } // ExeDomain = SSEPackedDouble
6007 let ExeDomain = SSEPackedSingle in {
6008 defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
6009 memopv4f32, int_x86_sse41_blendvps>;
6010 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
6011 memopv8f32, int_x86_avx_blendv_ps_256>;
6012 } // ExeDomain = SSEPackedSingle
6013 defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
6014 memopv2i64, int_x86_sse41_pblendvb>;
6017 let Predicates = [HasAVX2] in {
6018 defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
6019 memopv4i64, int_x86_avx2_pblendvb>;
6022 let Predicates = [HasAVX] in {
6023 def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
6024 (v16i8 VR128:$src2))),
6025 (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6026 def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
6027 (v4i32 VR128:$src2))),
6028 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6029 def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
6030 (v4f32 VR128:$src2))),
6031 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6032 def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
6033 (v2i64 VR128:$src2))),
6034 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6035 def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
6036 (v2f64 VR128:$src2))),
6037 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6038 def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
6039 (v8i32 VR256:$src2))),
6040 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6041 def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
6042 (v8f32 VR256:$src2))),
6043 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6044 def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
6045 (v4i64 VR256:$src2))),
6046 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6047 def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
6048 (v4f64 VR256:$src2))),
6049 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6052 let Predicates = [HasAVX2] in {
6053 def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
6054 (v32i8 VR256:$src2))),
6055 (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6058 /// SS41I_ternary_int - SSE 4.1 ternary operator
6059 let Uses = [XMM0], Constraints = "$src1 = $dst" in {
6060 multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
6062 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
6063 (ins VR128:$src1, VR128:$src2),
6064 !strconcat(OpcodeStr,
6065 "\t{$src2, $dst|$dst, $src2}"),
6066 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
6069 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
6070 (ins VR128:$src1, i128mem:$src2),
6071 !strconcat(OpcodeStr,
6072 "\t{$src2, $dst|$dst, $src2}"),
6075 (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
6079 let ExeDomain = SSEPackedDouble in
6080 defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
6081 int_x86_sse41_blendvpd>;
6082 let ExeDomain = SSEPackedSingle in
6083 defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
6084 int_x86_sse41_blendvps>;
6085 defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
6086 int_x86_sse41_pblendvb>;
6088 let Predicates = [HasSSE41] in {
6089 def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
6090 (v16i8 VR128:$src2))),
6091 (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
6092 def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
6093 (v4i32 VR128:$src2))),
6094 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
6095 def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
6096 (v4f32 VR128:$src2))),
6097 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
6098 def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
6099 (v2i64 VR128:$src2))),
6100 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
6101 def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
6102 (v2f64 VR128:$src2))),
6103 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
6106 let Predicates = [HasAVX] in
6107 def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
6108 "vmovntdqa\t{$src, $dst|$dst, $src}",
6109 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
6111 let Predicates = [HasAVX2] in
6112 def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
6113 "vmovntdqa\t{$src, $dst|$dst, $src}",
6114 [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
6116 def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
6117 "movntdqa\t{$src, $dst|$dst, $src}",
6118 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
6121 //===----------------------------------------------------------------------===//
6122 // SSE4.2 - Compare Instructions
6123 //===----------------------------------------------------------------------===//
6125 /// SS42I_binop_rm - Simple SSE 4.2 binary operator
6126 multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6127 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
6128 X86MemOperand x86memop, bit Is2Addr = 1> {
6129 def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
6130 (ins RC:$src1, RC:$src2),
6132 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6133 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6134 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
6136 def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
6137 (ins RC:$src1, x86memop:$src2),
6139 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6140 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6142 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize;
6145 let Predicates = [HasAVX] in
6146 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
6147 memopv2i64, i128mem, 0>, VEX_4V;
6149 let Predicates = [HasAVX2] in
6150 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
6151 memopv4i64, i256mem, 0>, VEX_4V;
6153 let Constraints = "$src1 = $dst" in
6154 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
6155 memopv2i64, i128mem>;
6157 //===----------------------------------------------------------------------===//
6158 // SSE4.2 - String/text Processing Instructions
6159 //===----------------------------------------------------------------------===//
6161 // Packed Compare Implicit Length Strings, Return Mask
6162 multiclass pseudo_pcmpistrm<string asm> {
6163 def REG : PseudoI<(outs VR128:$dst),
6164 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6165 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
6167 def MEM : PseudoI<(outs VR128:$dst),
6168 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6169 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
6170 VR128:$src1, (load addr:$src2), imm:$src3))]>;
6173 let Defs = [EFLAGS], usesCustomInserter = 1 in {
6174 let AddedComplexity = 1 in
6175 defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
6176 defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
6179 let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
6180 def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
6181 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6182 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
6184 def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
6185 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6186 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
6189 let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
6190 def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
6191 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6192 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
6194 def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
6195 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6196 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
6199 // Packed Compare Explicit Length Strings, Return Mask
6200 multiclass pseudo_pcmpestrm<string asm> {
6201 def REG : PseudoI<(outs VR128:$dst),
6202 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
6203 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
6204 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
6205 def MEM : PseudoI<(outs VR128:$dst),
6206 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
6207 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
6208 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
6211 let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
6212 let AddedComplexity = 1 in
6213 defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
6214 defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
6217 let Predicates = [HasAVX],
6218 Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
6219 def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
6220 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
6221 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
6223 def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
6224 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
6225 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
6228 let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
6229 def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
6230 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
6231 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
6233 def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
6234 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
6235 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
6238 // Packed Compare Implicit Length Strings, Return Index
6239 let Defs = [ECX, EFLAGS] in {
6240 multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
6241 def rr : SS42AI<0x63, MRMSrcReg, (outs),
6242 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6243 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6244 [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
6245 (implicit EFLAGS)]>, OpSize;
6246 def rm : SS42AI<0x63, MRMSrcMem, (outs),
6247 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6248 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6249 [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
6250 (implicit EFLAGS)]>, OpSize;
6254 let Predicates = [HasAVX] in {
6255 defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
6257 defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
6259 defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
6261 defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
6263 defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
6265 defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
6269 defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
6270 defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
6271 defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
6272 defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
6273 defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
6274 defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
6276 // Packed Compare Explicit Length Strings, Return Index
6277 let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
6278 multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
6279 def rr : SS42AI<0x61, MRMSrcReg, (outs),
6280 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
6281 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
6282 [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
6283 (implicit EFLAGS)]>, OpSize;
6284 def rm : SS42AI<0x61, MRMSrcMem, (outs),
6285 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
6286 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
6288 (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
6289 (implicit EFLAGS)]>, OpSize;
6293 let Predicates = [HasAVX] in {
6294 defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
6296 defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
6298 defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
6300 defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
6302 defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
6304 defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
6308 defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
6309 defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
6310 defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
6311 defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
6312 defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
6313 defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
6315 //===----------------------------------------------------------------------===//
6316 // SSE4.2 - CRC Instructions
6317 //===----------------------------------------------------------------------===//
6319 // No CRC instructions have AVX equivalents
6321 // crc intrinsic instruction
6322 // This set of instructions are only rm, the only difference is the size
6324 let Constraints = "$src1 = $dst" in {
6325 def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
6326 (ins GR32:$src1, i8mem:$src2),
6327 "crc32{b} \t{$src2, $src1|$src1, $src2}",
6329 (int_x86_sse42_crc32_32_8 GR32:$src1,
6330 (load addr:$src2)))]>;
6331 def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
6332 (ins GR32:$src1, GR8:$src2),
6333 "crc32{b} \t{$src2, $src1|$src1, $src2}",
6335 (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
6336 def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
6337 (ins GR32:$src1, i16mem:$src2),
6338 "crc32{w} \t{$src2, $src1|$src1, $src2}",
6340 (int_x86_sse42_crc32_32_16 GR32:$src1,
6341 (load addr:$src2)))]>,
6343 def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
6344 (ins GR32:$src1, GR16:$src2),
6345 "crc32{w} \t{$src2, $src1|$src1, $src2}",
6347 (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
6349 def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
6350 (ins GR32:$src1, i32mem:$src2),
6351 "crc32{l} \t{$src2, $src1|$src1, $src2}",
6353 (int_x86_sse42_crc32_32_32 GR32:$src1,
6354 (load addr:$src2)))]>;
6355 def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
6356 (ins GR32:$src1, GR32:$src2),
6357 "crc32{l} \t{$src2, $src1|$src1, $src2}",
6359 (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
6360 def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
6361 (ins GR64:$src1, i8mem:$src2),
6362 "crc32{b} \t{$src2, $src1|$src1, $src2}",
6364 (int_x86_sse42_crc32_64_8 GR64:$src1,
6365 (load addr:$src2)))]>,
6367 def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
6368 (ins GR64:$src1, GR8:$src2),
6369 "crc32{b} \t{$src2, $src1|$src1, $src2}",
6371 (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
6373 def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
6374 (ins GR64:$src1, i64mem:$src2),
6375 "crc32{q} \t{$src2, $src1|$src1, $src2}",
6377 (int_x86_sse42_crc32_64_64 GR64:$src1,
6378 (load addr:$src2)))]>,
6380 def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
6381 (ins GR64:$src1, GR64:$src2),
6382 "crc32{q} \t{$src2, $src1|$src1, $src2}",
6384 (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
6388 //===----------------------------------------------------------------------===//
6389 // AES-NI Instructions
6390 //===----------------------------------------------------------------------===//
6392 multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
6393 Intrinsic IntId128, bit Is2Addr = 1> {
6394 def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
6395 (ins VR128:$src1, VR128:$src2),
6397 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6398 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6399 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
6401 def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
6402 (ins VR128:$src1, i128mem:$src2),
6404 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6405 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6407 (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
6410 // Perform One Round of an AES Encryption/Decryption Flow
6411 let Predicates = [HasAVX, HasAES] in {
6412 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
6413 int_x86_aesni_aesenc, 0>, VEX_4V;
6414 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
6415 int_x86_aesni_aesenclast, 0>, VEX_4V;
6416 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
6417 int_x86_aesni_aesdec, 0>, VEX_4V;
6418 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
6419 int_x86_aesni_aesdeclast, 0>, VEX_4V;
6422 let Constraints = "$src1 = $dst" in {
6423 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
6424 int_x86_aesni_aesenc>;
6425 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
6426 int_x86_aesni_aesenclast>;
6427 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
6428 int_x86_aesni_aesdec>;
6429 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
6430 int_x86_aesni_aesdeclast>;
6433 // Perform the AES InvMixColumn Transformation
6434 let Predicates = [HasAVX, HasAES] in {
6435 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
6437 "vaesimc\t{$src1, $dst|$dst, $src1}",
6439 (int_x86_aesni_aesimc VR128:$src1))]>,
6441 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
6442 (ins i128mem:$src1),
6443 "vaesimc\t{$src1, $dst|$dst, $src1}",
6444 [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
6447 def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
6449 "aesimc\t{$src1, $dst|$dst, $src1}",
6451 (int_x86_aesni_aesimc VR128:$src1))]>,
6453 def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
6454 (ins i128mem:$src1),
6455 "aesimc\t{$src1, $dst|$dst, $src1}",
6456 [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
6459 // AES Round Key Generation Assist
6460 let Predicates = [HasAVX, HasAES] in {
6461 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
6462 (ins VR128:$src1, i8imm:$src2),
6463 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6465 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
6467 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
6468 (ins i128mem:$src1, i8imm:$src2),
6469 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6471 (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
6474 def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
6475 (ins VR128:$src1, i8imm:$src2),
6476 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6478 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
6480 def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
6481 (ins i128mem:$src1, i8imm:$src2),
6482 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6484 (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
6487 //===----------------------------------------------------------------------===//
6488 // CLMUL Instructions
6489 //===----------------------------------------------------------------------===//
6491 // Carry-less Multiplication instructions
6492 let neverHasSideEffects = 1 in {
6493 // AVX carry-less Multiplication instructions
6494 def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
6495 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6496 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6500 def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
6501 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6502 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6505 let Constraints = "$src1 = $dst" in {
6506 def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
6507 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6508 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
6512 def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
6513 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6514 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
6516 } // Constraints = "$src1 = $dst"
6517 } // neverHasSideEffects = 1
6520 multiclass pclmul_alias<string asm, int immop> {
6521 def : InstAlias<!strconcat("pclmul", asm,
6522 "dq {$src, $dst|$dst, $src}"),
6523 (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
6525 def : InstAlias<!strconcat("pclmul", asm,
6526 "dq {$src, $dst|$dst, $src}"),
6527 (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
6529 def : InstAlias<!strconcat("vpclmul", asm,
6530 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
6531 (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
6533 def : InstAlias<!strconcat("vpclmul", asm,
6534 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
6535 (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
6537 defm : pclmul_alias<"hqhq", 0x11>;
6538 defm : pclmul_alias<"hqlq", 0x01>;
6539 defm : pclmul_alias<"lqhq", 0x10>;
6540 defm : pclmul_alias<"lqlq", 0x00>;
6542 //===----------------------------------------------------------------------===//
6544 //===----------------------------------------------------------------------===//
6546 //===----------------------------------------------------------------------===//
6547 // VBROADCAST - Load from memory and broadcast to all elements of the
6548 // destination operand
6550 class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
6551 X86MemOperand x86memop, Intrinsic Int> :
6552 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
6553 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6554 [(set RC:$dst, (Int addr:$src))]>, VEX;
6556 // AVX2 adds register forms
6557 class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
6559 AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
6560 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6561 [(set RC:$dst, (Int VR128:$src))]>, VEX;
6563 let ExeDomain = SSEPackedSingle in {
6564 def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
6565 int_x86_avx_vbroadcast_ss>;
6566 def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
6567 int_x86_avx_vbroadcast_ss_256>;
6569 let ExeDomain = SSEPackedDouble in
6570 def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
6571 int_x86_avx_vbroadcast_sd_256>;
6572 def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
6573 int_x86_avx_vbroadcastf128_pd_256>;
6575 let ExeDomain = SSEPackedSingle in {
6576 def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
6577 int_x86_avx2_vbroadcast_ss_ps>;
6578 def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
6579 int_x86_avx2_vbroadcast_ss_ps_256>;
6581 let ExeDomain = SSEPackedDouble in
6582 def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
6583 int_x86_avx2_vbroadcast_sd_pd_256>;
6585 let Predicates = [HasAVX2] in
6586 def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
6587 int_x86_avx2_vbroadcasti128>;
6589 let Predicates = [HasAVX] in
6590 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
6591 (VBROADCASTF128 addr:$src)>;
6594 //===----------------------------------------------------------------------===//
6595 // VINSERTF128 - Insert packed floating-point values
6597 let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
6598 def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
6599 (ins VR256:$src1, VR128:$src2, i8imm:$src3),
6600 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6603 def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
6604 (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
6605 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6609 let Predicates = [HasAVX] in {
6610 def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
6611 (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
6612 def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
6613 (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
6614 def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
6615 (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
6618 //===----------------------------------------------------------------------===//
6619 // VEXTRACTF128 - Extract packed floating-point values
6621 let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
6622 def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
6623 (ins VR256:$src1, i8imm:$src2),
6624 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6627 def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
6628 (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
6629 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6633 let Predicates = [HasAVX] in {
6634 def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
6635 (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
6636 def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
6637 (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
6638 def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
6639 (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
6642 //===----------------------------------------------------------------------===//
6643 // VMASKMOV - Conditional SIMD Packed Loads and Stores
6645 multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
6646 Intrinsic IntLd, Intrinsic IntLd256,
6647 Intrinsic IntSt, Intrinsic IntSt256> {
6648 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
6649 (ins VR128:$src1, f128mem:$src2),
6650 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6651 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
6653 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
6654 (ins VR256:$src1, f256mem:$src2),
6655 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6656 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
6658 def mr : AVX8I<opc_mr, MRMDestMem, (outs),
6659 (ins f128mem:$dst, VR128:$src1, VR128:$src2),
6660 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6661 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
6662 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
6663 (ins f256mem:$dst, VR256:$src1, VR256:$src2),
6664 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6665 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
6668 let ExeDomain = SSEPackedSingle in
6669 defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
6670 int_x86_avx_maskload_ps,
6671 int_x86_avx_maskload_ps_256,
6672 int_x86_avx_maskstore_ps,
6673 int_x86_avx_maskstore_ps_256>;
6674 let ExeDomain = SSEPackedDouble in
6675 defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
6676 int_x86_avx_maskload_pd,
6677 int_x86_avx_maskload_pd_256,
6678 int_x86_avx_maskstore_pd,
6679 int_x86_avx_maskstore_pd_256>;
6681 //===----------------------------------------------------------------------===//
6682 // VPERMIL - Permute Single and Double Floating-Point Values
6684 multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
6685 RegisterClass RC, X86MemOperand x86memop_f,
6686 X86MemOperand x86memop_i, PatFrag i_frag,
6687 Intrinsic IntVar, ValueType vt> {
6688 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
6689 (ins RC:$src1, RC:$src2),
6690 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6691 [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
6692 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
6693 (ins RC:$src1, x86memop_i:$src2),
6694 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6695 [(set RC:$dst, (IntVar RC:$src1,
6696 (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
6698 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
6699 (ins RC:$src1, i8imm:$src2),
6700 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6701 [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX;
6702 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
6703 (ins x86memop_f:$src1, i8imm:$src2),
6704 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6706 (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX;
6709 let ExeDomain = SSEPackedSingle in {
6710 defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
6711 memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
6712 defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
6713 memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>;
6715 let ExeDomain = SSEPackedDouble in {
6716 defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
6717 memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
6718 defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
6719 memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>;
6722 let Predicates = [HasAVX] in {
6723 def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
6724 (VPERMILPSYri VR256:$src1, imm:$imm)>;
6725 def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
6726 (VPERMILPDYri VR256:$src1, imm:$imm)>;
6727 def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
6729 (VPERMILPSYmi addr:$src1, imm:$imm)>;
6730 def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
6731 (VPERMILPDYmi addr:$src1, imm:$imm)>;
6733 def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
6734 (VPERMILPDri VR128:$src1, imm:$imm)>;
6735 def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
6736 (VPERMILPDmi addr:$src1, imm:$imm)>;
6739 //===----------------------------------------------------------------------===//
6740 // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
6742 let ExeDomain = SSEPackedSingle in {
6743 def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
6744 (ins VR256:$src1, VR256:$src2, i8imm:$src3),
6745 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6746 [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
6747 (i8 imm:$src3))))]>, VEX_4V;
6748 def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
6749 (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
6750 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6751 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
6752 (i8 imm:$src3)))]>, VEX_4V;
6755 let Predicates = [HasAVX] in {
6756 def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6757 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
6758 def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6759 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
6760 def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6761 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
6762 def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6763 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
6764 def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6765 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
6767 def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
6768 (memopv8f32 addr:$src2), (i8 imm:$imm))),
6769 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
6770 def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
6771 (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
6772 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
6773 def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
6774 (memopv4i64 addr:$src2), (i8 imm:$imm))),
6775 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
6776 def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
6777 (memopv4f64 addr:$src2), (i8 imm:$imm))),
6778 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
6779 def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
6780 (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
6781 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
6782 def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
6783 (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
6784 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
6787 //===----------------------------------------------------------------------===//
6788 // VZERO - Zero YMM registers
6790 let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
6791 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
6792 // Zero All YMM registers
6793 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
6794 [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>;
6796 // Zero Upper bits of YMM registers
6797 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
6798 [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>;
6801 //===----------------------------------------------------------------------===//
6802 // Half precision conversion instructions
6803 //===----------------------------------------------------------------------===//
6804 multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
6805 let Predicates = [HasAVX, HasF16C] in {
6806 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
6807 "vcvtph2ps\t{$src, $dst|$dst, $src}",
6808 [(set RC:$dst, (Int VR128:$src))]>,
6810 let neverHasSideEffects = 1, mayLoad = 1 in
6811 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
6812 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
6816 multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
6817 let Predicates = [HasAVX, HasF16C] in {
6818 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
6819 (ins RC:$src1, i32i8imm:$src2),
6820 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6821 [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
6823 let neverHasSideEffects = 1, mayLoad = 1 in
6824 def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst),
6825 (ins RC:$src1, i32i8imm:$src2),
6826 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
6831 defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
6832 defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
6833 defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
6834 defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
6836 //===----------------------------------------------------------------------===//
6837 // AVX2 Instructions
6838 //===----------------------------------------------------------------------===//
6840 /// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
6841 multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
6842 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
6843 X86MemOperand x86memop> {
6844 let isCommutable = 1 in
6845 def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
6846 (ins RC:$src1, RC:$src2, u32u8imm:$src3),
6847 !strconcat(OpcodeStr,
6848 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
6849 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
6851 def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
6852 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
6853 !strconcat(OpcodeStr,
6854 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
6857 (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
6861 let isCommutable = 0 in {
6862 defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
6863 VR128, memopv2i64, i128mem>;
6864 defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
6865 VR256, memopv4i64, i256mem>;
6868 //===----------------------------------------------------------------------===//
6869 // VPBROADCAST - Load from memory and broadcast to all elements of the
6870 // destination operand
6872 multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
6873 X86MemOperand x86memop, PatFrag ld_frag,
6874 Intrinsic Int128, Intrinsic Int256> {
6875 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
6876 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6877 [(set VR128:$dst, (Int128 VR128:$src))]>, VEX;
6878 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
6879 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6881 (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
6882 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
6883 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6884 [(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
6885 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
6886 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6888 (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
6891 defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
6892 int_x86_avx2_pbroadcastb_128,
6893 int_x86_avx2_pbroadcastb_256>;
6894 defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
6895 int_x86_avx2_pbroadcastw_128,
6896 int_x86_avx2_pbroadcastw_256>;
6897 defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
6898 int_x86_avx2_pbroadcastd_128,
6899 int_x86_avx2_pbroadcastd_256>;
6900 defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
6901 int_x86_avx2_pbroadcastq_128,
6902 int_x86_avx2_pbroadcastq_256>;
6904 let Predicates = [HasAVX2] in {
6905 def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
6906 (VPBROADCASTBrm addr:$src)>;
6907 def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
6908 (VPBROADCASTBYrm addr:$src)>;
6909 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
6910 (VPBROADCASTWrm addr:$src)>;
6911 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
6912 (VPBROADCASTWYrm addr:$src)>;
6913 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
6914 (VPBROADCASTDrm addr:$src)>;
6915 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
6916 (VPBROADCASTDYrm addr:$src)>;
6917 def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
6918 (VPBROADCASTQrm addr:$src)>;
6919 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
6920 (VPBROADCASTQYrm addr:$src)>;
6923 // AVX1 broadcast patterns
6924 let Predicates = [HasAVX] in {
6925 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
6926 (VBROADCASTSSYrm addr:$src)>;
6927 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
6928 (VBROADCASTSDrm addr:$src)>;
6929 def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
6930 (VBROADCASTSSYrm addr:$src)>;
6931 def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
6932 (VBROADCASTSDrm addr:$src)>;
6934 def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
6935 (VBROADCASTSSrm addr:$src)>;
6936 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
6937 (VBROADCASTSSrm addr:$src)>;
6940 //===----------------------------------------------------------------------===//
6941 // VPERM - Permute instructions
6944 multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
6946 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
6947 (ins VR256:$src1, VR256:$src2),
6948 !strconcat(OpcodeStr,
6949 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6950 [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V;
6951 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
6952 (ins VR256:$src1, i256mem:$src2),
6953 !strconcat(OpcodeStr,
6954 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6955 [(set VR256:$dst, (Int VR256:$src1,
6956 (bitconvert (mem_frag addr:$src2))))]>,
6960 defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
6961 let ExeDomain = SSEPackedSingle in
6962 defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
6964 multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
6966 def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
6967 (ins VR256:$src1, i8imm:$src2),
6968 !strconcat(OpcodeStr,
6969 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6970 [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX;
6971 def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
6972 (ins i256mem:$src1, i8imm:$src2),
6973 !strconcat(OpcodeStr,
6974 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6975 [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>,
6979 defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>,
6981 let ExeDomain = SSEPackedDouble in
6982 defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
6985 //===----------------------------------------------------------------------===//
6986 // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
6988 let AddedComplexity = 1 in {
6989 def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
6990 (ins VR256:$src1, VR256:$src2, i8imm:$src3),
6991 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6992 [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
6993 (i8 imm:$src3))))]>, VEX_4V;
6994 def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
6995 (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
6996 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6997 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
6998 (i8 imm:$src3)))]>, VEX_4V;
7001 let Predicates = [HasAVX2], AddedComplexity = 1 in {
7002 def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7003 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7004 def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7005 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7006 def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7007 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7009 def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
7011 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
7012 def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
7013 (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
7014 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
7015 def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
7017 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
7021 //===----------------------------------------------------------------------===//
7022 // VINSERTI128 - Insert packed integer values
7024 def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
7025 (ins VR256:$src1, VR128:$src2, i8imm:$src3),
7026 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7028 (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>,
7030 def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
7031 (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
7032 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7034 (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
7035 imm:$src3))]>, VEX_4V;
7037 let Predicates = [HasAVX2] in {
7038 def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
7040 (VINSERTI128rr VR256:$src1, VR128:$src2,
7041 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7042 def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
7044 (VINSERTI128rr VR256:$src1, VR128:$src2,
7045 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7046 def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
7048 (VINSERTI128rr VR256:$src1, VR128:$src2,
7049 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7050 def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
7052 (VINSERTI128rr VR256:$src1, VR128:$src2,
7053 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7057 let Predicates = [HasAVX] in {
7058 def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
7060 (VINSERTF128rr VR256:$src1, VR128:$src2,
7061 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7062 def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
7064 (VINSERTF128rr VR256:$src1, VR128:$src2,
7065 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7066 def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
7068 (VINSERTF128rr VR256:$src1, VR128:$src2,
7069 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7070 def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
7072 (VINSERTF128rr VR256:$src1, VR128:$src2,
7073 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7074 def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
7076 (VINSERTF128rr VR256:$src1, VR128:$src2,
7077 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7078 def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
7080 (VINSERTF128rr VR256:$src1, VR128:$src2,
7081 (INSERT_get_vinsertf128_imm VR256:$ins))>;
7084 //===----------------------------------------------------------------------===//
7085 // VEXTRACTI128 - Extract packed integer values
7087 def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
7088 (ins VR256:$src1, i8imm:$src2),
7089 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7091 (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
7093 let neverHasSideEffects = 1, mayStore = 1 in
7094 def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
7095 (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
7096 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
7098 let Predicates = [HasAVX2] in {
7099 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7100 (v2i64 (VEXTRACTI128rr
7101 (v4i64 VR256:$src1),
7102 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7103 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7104 (v4i32 (VEXTRACTI128rr
7105 (v8i32 VR256:$src1),
7106 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7107 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7108 (v8i16 (VEXTRACTI128rr
7109 (v16i16 VR256:$src1),
7110 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7111 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7112 (v16i8 (VEXTRACTI128rr
7113 (v32i8 VR256:$src1),
7114 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7118 let Predicates = [HasAVX] in {
7119 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7120 (v4f32 (VEXTRACTF128rr
7121 (v8f32 VR256:$src1),
7122 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7123 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7124 (v2f64 (VEXTRACTF128rr
7125 (v4f64 VR256:$src1),
7126 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7127 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7128 (v2i64 (VEXTRACTF128rr
7129 (v4i64 VR256:$src1),
7130 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7131 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7132 (v4i32 (VEXTRACTF128rr
7133 (v8i32 VR256:$src1),
7134 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7135 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7136 (v8i16 (VEXTRACTF128rr
7137 (v16i16 VR256:$src1),
7138 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7139 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7140 (v16i8 (VEXTRACTF128rr
7141 (v32i8 VR256:$src1),
7142 (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7145 //===----------------------------------------------------------------------===//
7146 // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
7148 multiclass avx2_pmovmask<string OpcodeStr,
7149 Intrinsic IntLd128, Intrinsic IntLd256,
7150 Intrinsic IntSt128, Intrinsic IntSt256> {
7151 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
7152 (ins VR128:$src1, i128mem:$src2),
7153 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7154 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
7155 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
7156 (ins VR256:$src1, i256mem:$src2),
7157 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7158 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
7159 def mr : AVX28I<0x8e, MRMDestMem, (outs),
7160 (ins i128mem:$dst, VR128:$src1, VR128:$src2),
7161 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7162 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
7163 def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
7164 (ins i256mem:$dst, VR256:$src1, VR256:$src2),
7165 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7166 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
7169 defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
7170 int_x86_avx2_maskload_d,
7171 int_x86_avx2_maskload_d_256,
7172 int_x86_avx2_maskstore_d,
7173 int_x86_avx2_maskstore_d_256>;
7174 defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
7175 int_x86_avx2_maskload_q,
7176 int_x86_avx2_maskload_q_256,
7177 int_x86_avx2_maskstore_q,
7178 int_x86_avx2_maskstore_q_256>, VEX_W;
7181 //===----------------------------------------------------------------------===//
7182 // Variable Bit Shifts
7184 multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
7185 ValueType vt128, ValueType vt256> {
7186 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
7187 (ins VR128:$src1, VR128:$src2),
7188 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7190 (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
7192 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
7193 (ins VR128:$src1, i128mem:$src2),
7194 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7196 (vt128 (OpNode VR128:$src1,
7197 (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
7199 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
7200 (ins VR256:$src1, VR256:$src2),
7201 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7203 (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
7205 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
7206 (ins VR256:$src1, i256mem:$src2),
7207 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7209 (vt256 (OpNode VR256:$src1,
7210 (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
7214 defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
7215 defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
7216 defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
7217 defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
7218 defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;