1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def fp32imm0 : PatLeaf<(f32 fpimm), [{
49 return N->isExactlyValue(+0.0);
52 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
54 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
55 return getI8Imm(X86::getShuffleSHUFImmediate(N));
58 def v2f64_v2i64_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
62 def MOVLHPS_shuffle_mask : PatLeaf<(build_vector), [{
63 return X86::isMOVLHPSMask(N);
66 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
67 return X86::isMOVHLPSMask(N);
70 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
71 return X86::isUNPCKLMask(N);
74 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
75 return X86::isUNPCKHMask(N);
78 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
79 return X86::isPSHUFDMask(N);
80 }], SHUFFLE_get_shuf_imm>;
82 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
83 return X86::isSHUFPMask(N);
84 }], SHUFFLE_get_shuf_imm>;
86 // Only use SHUFP for v4i32 if no other options are available.
87 // FIXME: add tblgen hook to reduce the complexity of pattern.
88 def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{
89 return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N);
90 }], SHUFFLE_get_shuf_imm>;
92 //===----------------------------------------------------------------------===//
93 // SSE scalar FP Instructions
94 //===----------------------------------------------------------------------===//
96 // Instruction templates
97 // SSI - SSE1 instructions with XS prefix.
98 // SDI - SSE2 instructions with XD prefix.
99 // PSI - SSE1 instructions with TB prefix.
100 // PDI - SSE2 instructions with TB and OpSize prefixes.
101 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
102 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
103 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
104 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
105 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
106 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
107 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
108 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
109 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
110 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
111 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
112 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
113 let Pattern = pattern;
115 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
116 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
117 let Pattern = pattern;
120 // Some 'special' instructions
121 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
122 "#IMPLICIT_DEF $dst",
123 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
124 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
125 "#IMPLICIT_DEF $dst",
126 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
128 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
129 // scheduler into a branch sequence.
130 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
131 def CMOV_FR32 : I<0, Pseudo,
132 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
133 "#CMOV_FR32 PSEUDO!",
134 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
135 def CMOV_FR64 : I<0, Pseudo,
136 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
137 "#CMOV_FR64 PSEUDO!",
138 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
142 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
143 "movss {$src, $dst|$dst, $src}", []>;
144 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
145 "movss {$src, $dst|$dst, $src}",
146 [(set FR32:$dst, (loadf32 addr:$src))]>;
147 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
148 "movsd {$src, $dst|$dst, $src}", []>;
149 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
150 "movsd {$src, $dst|$dst, $src}",
151 [(set FR64:$dst, (loadf64 addr:$src))]>;
153 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
154 "movss {$src, $dst|$dst, $src}",
155 [(store FR32:$src, addr:$dst)]>;
156 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
157 "movsd {$src, $dst|$dst, $src}",
158 [(store FR64:$src, addr:$dst)]>;
160 // FR32 / FR64 to 128-bit vector conversion.
161 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
162 "movss {$src, $dst|$dst, $src}",
164 (v4f32 (scalar_to_vector FR32:$src)))]>;
165 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
166 "movss {$src, $dst|$dst, $src}",
168 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
169 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
170 "movsd {$src, $dst|$dst, $src}",
172 (v2f64 (scalar_to_vector FR64:$src)))]>;
173 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
174 "movsd {$src, $dst|$dst, $src}",
176 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
178 // Arithmetic instructions
179 let isTwoAddress = 1 in {
180 let isCommutable = 1 in {
181 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
182 "addss {$src2, $dst|$dst, $src2}",
183 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
184 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
185 "addsd {$src2, $dst|$dst, $src2}",
186 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
187 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
188 "mulss {$src2, $dst|$dst, $src2}",
189 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
190 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
191 "mulsd {$src2, $dst|$dst, $src2}",
192 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
195 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
196 "addss {$src2, $dst|$dst, $src2}",
197 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
198 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
199 "addsd {$src2, $dst|$dst, $src2}",
200 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
201 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
202 "mulss {$src2, $dst|$dst, $src2}",
203 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
204 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
205 "mulsd {$src2, $dst|$dst, $src2}",
206 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
208 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
209 "divss {$src2, $dst|$dst, $src2}",
210 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
211 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
212 "divss {$src2, $dst|$dst, $src2}",
213 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
214 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
215 "divsd {$src2, $dst|$dst, $src2}",
216 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
217 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
218 "divsd {$src2, $dst|$dst, $src2}",
219 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
221 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
222 "subss {$src2, $dst|$dst, $src2}",
223 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
224 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
225 "subss {$src2, $dst|$dst, $src2}",
226 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
227 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
228 "subsd {$src2, $dst|$dst, $src2}",
229 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
230 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
231 "subsd {$src2, $dst|$dst, $src2}",
232 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
235 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
236 "sqrtss {$src, $dst|$dst, $src}",
237 [(set FR32:$dst, (fsqrt FR32:$src))]>;
238 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
239 "sqrtss {$src, $dst|$dst, $src}",
240 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
241 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
242 "sqrtsd {$src, $dst|$dst, $src}",
243 [(set FR64:$dst, (fsqrt FR64:$src))]>;
244 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
245 "sqrtsd {$src, $dst|$dst, $src}",
246 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
248 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
249 "rsqrtss {$src, $dst|$dst, $src}", []>;
250 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
251 "rsqrtss {$src, $dst|$dst, $src}", []>;
252 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
253 "rcpss {$src, $dst|$dst, $src}", []>;
254 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
255 "rcpss {$src, $dst|$dst, $src}", []>;
257 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
258 "maxss {$src, $dst|$dst, $src}", []>;
259 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
260 "maxss {$src, $dst|$dst, $src}", []>;
261 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
262 "maxsd {$src, $dst|$dst, $src}", []>;
263 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
264 "maxsd {$src, $dst|$dst, $src}", []>;
265 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
266 "minss {$src, $dst|$dst, $src}", []>;
267 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
268 "minss {$src, $dst|$dst, $src}", []>;
269 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
270 "minsd {$src, $dst|$dst, $src}", []>;
271 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
272 "minsd {$src, $dst|$dst, $src}", []>;
275 // Aliases to match intrinsics which expect XMM operand(s).
276 let isTwoAddress = 1 in {
277 let isCommutable = 1 in {
278 def Int_ADDSSrr : SSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
280 "addss {$src2, $dst|$dst, $src2}",
281 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
283 def Int_ADDSDrr : SDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
285 "addsd {$src2, $dst|$dst, $src2}",
286 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
288 def Int_MULSSrr : SSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
290 "mulss {$src2, $dst|$dst, $src2}",
291 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
293 def Int_MULSDrr : SDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
295 "mulsd {$src2, $dst|$dst, $src2}",
296 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
300 def Int_ADDSSrm : SSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
302 "addss {$src2, $dst|$dst, $src2}",
303 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
304 (load addr:$src2)))]>;
305 def Int_ADDSDrm : SDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
307 "addsd {$src2, $dst|$dst, $src2}",
308 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
309 (load addr:$src2)))]>;
310 def Int_MULSSrm : SSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
312 "mulss {$src2, $dst|$dst, $src2}",
313 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
314 (load addr:$src2)))]>;
315 def Int_MULSDrm : SDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
317 "mulsd {$src2, $dst|$dst, $src2}",
318 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
319 (load addr:$src2)))]>;
321 def Int_DIVSSrr : SSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
322 "divss {$src2, $dst|$dst, $src2}",
323 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
325 def Int_DIVSSrm : SSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
326 "divss {$src2, $dst|$dst, $src2}",
327 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
328 (load addr:$src2)))]>;
329 def Int_DIVSDrr : SDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
330 "divsd {$src2, $dst|$dst, $src2}",
331 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
333 def Int_DIVSDrm : SDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
334 "divsd {$src2, $dst|$dst, $src2}",
335 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
336 (load addr:$src2)))]>;
338 def Int_SUBSSrr : SSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
339 "subss {$src2, $dst|$dst, $src2}",
340 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
342 def Int_SUBSSrm : SSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
343 "subss {$src2, $dst|$dst, $src2}",
344 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
345 (load addr:$src2)))]>;
346 def Int_SUBSDrr : SDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
347 "subsd {$src2, $dst|$dst, $src2}",
348 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
350 def Int_SUBSDrm : SDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
351 "subsd {$src2, $dst|$dst, $src2}",
352 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
353 (load addr:$src2)))]>;
356 def Int_SQRTSSrr : SSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
357 "sqrtss {$src, $dst|$dst, $src}",
358 [(set VR128:$dst, (int_x86_sse_sqrt_ss VR128:$src))]>;
359 def Int_SQRTSSrm : SSI<0x51, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
360 "sqrtss {$src, $dst|$dst, $src}",
361 [(set VR128:$dst, (int_x86_sse_sqrt_ss
362 (load addr:$src)))]>;
363 def Int_SQRTSDrr : SDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
364 "sqrtsd {$src, $dst|$dst, $src}",
365 [(set VR128:$dst, (int_x86_sse2_sqrt_sd VR128:$src))]>;
366 def Int_SQRTSDrm : SDI<0x51, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
367 "sqrtsd {$src, $dst|$dst, $src}",
368 [(set VR128:$dst, (int_x86_sse2_sqrt_sd
369 (load addr:$src)))]>;
371 def Int_RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
372 "rsqrtss {$src, $dst|$dst, $src}",
373 [(set VR128:$dst, (int_x86_sse_rsqrt_ss VR128:$src))]>;
374 def Int_RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
375 "rsqrtss {$src, $dst|$dst, $src}",
376 [(set VR128:$dst, (int_x86_sse_rsqrt_ss
377 (load addr:$src)))]>;
378 def Int_RCPSSrr : SSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
379 "rcpss {$src, $dst|$dst, $src}",
380 [(set VR128:$dst, (int_x86_sse_rcp_ss VR128:$src))]>;
381 def Int_RCPSSrm : SSI<0x53, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
382 "rcpss {$src, $dst|$dst, $src}",
383 [(set VR128:$dst, (int_x86_sse_rcp_ss
384 (load addr:$src)))]>;
386 let isTwoAddress = 1 in {
387 def Int_MAXSSrr : SSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
389 "maxss {$src2, $dst|$dst, $src2}",
390 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
392 def Int_MAXSSrm : SSI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
394 "maxss {$src2, $dst|$dst, $src2}",
395 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
396 (load addr:$src2)))]>;
397 def Int_MAXSDrr : SDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
399 "maxsd {$src2, $dst|$dst, $src2}",
400 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
402 def Int_MAXSDrm : SDI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
404 "maxsd {$src2, $dst|$dst, $src2}",
405 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
406 (load addr:$src2)))]>;
407 def Int_MINSSrr : SSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
409 "minss {$src2, $dst|$dst, $src2}",
410 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
412 def Int_MINSSrm : SSI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
414 "minss {$src2, $dst|$dst, $src2}",
415 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
416 (load addr:$src2)))]>;
417 def Int_MINSDrr : SDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
419 "minsd {$src2, $dst|$dst, $src2}",
420 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
422 def Int_MINSDrm : SDI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
424 "minsd {$src2, $dst|$dst, $src2}",
425 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
426 (load addr:$src2)))]>;
429 // Conversion instructions
430 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
431 "cvtss2si {$src, $dst|$dst, $src}", []>;
432 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
433 "cvtss2si {$src, $dst|$dst, $src}", []>;
435 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
436 "cvttss2si {$src, $dst|$dst, $src}",
437 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
438 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
439 "cvttss2si {$src, $dst|$dst, $src}",
440 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
441 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
442 "cvttsd2si {$src, $dst|$dst, $src}",
443 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
444 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
445 "cvttsd2si {$src, $dst|$dst, $src}",
446 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
447 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
448 "cvtsd2ss {$src, $dst|$dst, $src}",
449 [(set FR32:$dst, (fround FR64:$src))]>;
450 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
451 "cvtsd2ss {$src, $dst|$dst, $src}",
452 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
453 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
454 "cvtsi2ss {$src, $dst|$dst, $src}",
455 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
456 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
457 "cvtsi2ss {$src, $dst|$dst, $src}",
458 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
459 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
460 "cvtsi2sd {$src, $dst|$dst, $src}",
461 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
462 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
463 "cvtsi2sd {$src, $dst|$dst, $src}",
464 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
465 // SSE2 instructions with XS prefix
466 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
467 "cvtss2sd {$src, $dst|$dst, $src}",
468 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
470 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
471 "cvtss2sd {$src, $dst|$dst, $src}",
472 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
475 // Comparison instructions
476 let isTwoAddress = 1 in {
477 def CMPSSrr : SSI<0xC2, MRMSrcReg,
478 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
479 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
480 def CMPSSrm : SSI<0xC2, MRMSrcMem,
481 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
482 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
483 def CMPSDrr : SDI<0xC2, MRMSrcReg,
484 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
485 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
486 def CMPSDrm : SDI<0xC2, MRMSrcMem,
487 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
488 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
491 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
492 "ucomiss {$src2, $src1|$src1, $src2}",
493 [(X86cmp FR32:$src1, FR32:$src2)]>;
494 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
495 "ucomiss {$src2, $src1|$src1, $src2}",
496 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
497 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
498 "ucomisd {$src2, $src1|$src1, $src2}",
499 [(X86cmp FR64:$src1, FR64:$src2)]>;
500 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
501 "ucomisd {$src2, $src1|$src1, $src2}",
502 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
504 // Aliases of packed instructions for scalar use. These all have names that
507 // Alias instructions that map fld0 to pxor for sse.
508 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
509 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
510 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
511 Requires<[HasSSE1]>, TB, OpSize;
512 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
513 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
514 Requires<[HasSSE2]>, TB, OpSize;
516 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
517 // Upper bits are disregarded.
518 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
519 "movaps {$src, $dst|$dst, $src}", []>;
520 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
521 "movapd {$src, $dst|$dst, $src}", []>;
523 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
524 // Upper bits are disregarded.
525 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
526 "movaps {$src, $dst|$dst, $src}",
527 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
528 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
529 "movapd {$src, $dst|$dst, $src}",
530 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
532 // Alias bitwise logical operations using SSE logical ops on packed FP values.
533 let isTwoAddress = 1 in {
534 let isCommutable = 1 in {
535 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
536 "andps {$src2, $dst|$dst, $src2}",
537 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
538 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
539 "andpd {$src2, $dst|$dst, $src2}",
540 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
541 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
542 "orps {$src2, $dst|$dst, $src2}", []>;
543 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
544 "orpd {$src2, $dst|$dst, $src2}", []>;
545 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
546 "xorps {$src2, $dst|$dst, $src2}",
547 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
548 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
549 "xorpd {$src2, $dst|$dst, $src2}",
550 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
552 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
553 "andps {$src2, $dst|$dst, $src2}",
554 [(set FR32:$dst, (X86fand FR32:$src1,
555 (X86loadpf32 addr:$src2)))]>;
556 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
557 "andpd {$src2, $dst|$dst, $src2}",
558 [(set FR64:$dst, (X86fand FR64:$src1,
559 (X86loadpf64 addr:$src2)))]>;
560 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
561 "orps {$src2, $dst|$dst, $src2}", []>;
562 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
563 "orpd {$src2, $dst|$dst, $src2}", []>;
564 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
565 "xorps {$src2, $dst|$dst, $src2}",
566 [(set FR32:$dst, (X86fxor FR32:$src1,
567 (X86loadpf32 addr:$src2)))]>;
568 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
569 "xorpd {$src2, $dst|$dst, $src2}",
570 [(set FR64:$dst, (X86fxor FR64:$src1,
571 (X86loadpf64 addr:$src2)))]>;
573 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
574 "andnps {$src2, $dst|$dst, $src2}", []>;
575 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
576 "andnps {$src2, $dst|$dst, $src2}", []>;
577 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
578 "andnpd {$src2, $dst|$dst, $src2}", []>;
579 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
580 "andnpd {$src2, $dst|$dst, $src2}", []>;
583 //===----------------------------------------------------------------------===//
584 // SSE packed FP Instructions
585 //===----------------------------------------------------------------------===//
587 // Some 'special' instructions
588 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
589 "#IMPLICIT_DEF $dst",
590 [(set VR128:$dst, (v4f32 (undef)))]>,
594 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
595 "movaps {$src, $dst|$dst, $src}", []>;
596 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
597 "movaps {$src, $dst|$dst, $src}",
598 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
599 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
600 "movapd {$src, $dst|$dst, $src}", []>;
601 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
602 "movapd {$src, $dst|$dst, $src}",
603 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
605 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
606 "movaps {$src, $dst|$dst, $src}",
607 [(store (v4f32 VR128:$src), addr:$dst)]>;
608 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
609 "movapd {$src, $dst|$dst, $src}",
610 [(store (v2f64 VR128:$src), addr:$dst)]>;
612 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
613 "movups {$src, $dst|$dst, $src}", []>;
614 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
615 "movups {$src, $dst|$dst, $src}", []>;
616 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
617 "movups {$src, $dst|$dst, $src}", []>;
618 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
619 "movupd {$src, $dst|$dst, $src}", []>;
620 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
621 "movupd {$src, $dst|$dst, $src}", []>;
622 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
623 "movupd {$src, $dst|$dst, $src}", []>;
625 let isTwoAddress = 1 in {
626 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
627 "movlps {$src2, $dst|$dst, $src2}", []>;
628 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
629 "movlpd {$src2, $dst|$dst, $src2}", []>;
630 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
631 "movhps {$src2, $dst|$dst, $src2}", []>;
632 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
633 "movhpd {$src2, $dst|$dst, $src2}",
635 (v2f64 (vector_shuffle VR128:$src1,
636 (scalar_to_vector (loadf64 addr:$src2)),
637 UNPCKL_shuffle_mask)))]>;
640 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
641 "movlps {$src, $dst|$dst, $src}", []>;
642 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
643 "movlpd {$src, $dst|$dst, $src}", []>;
645 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
646 "movhps {$src, $dst|$dst, $src}", []>;
647 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
648 "movhpd {$src, $dst|$dst, $src}", []>;
650 let isTwoAddress = 1 in {
651 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
652 "movlhps {$src2, $dst|$dst, $src2}",
654 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
655 MOVLHPS_shuffle_mask)))]>;
657 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
658 "movhlps {$src2, $dst|$dst, $src2}",
660 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
661 MOVHLPS_shuffle_mask)))]>;
664 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
665 "movmskps {$src, $dst|$dst, $src}",
666 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
667 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
668 "movmskpd {$src, $dst|$dst, $src}",
669 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
671 // Conversion instructions
672 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
673 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
674 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
675 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
676 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
677 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
678 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
679 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
681 // SSE2 instructions without OpSize prefix
682 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
683 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
685 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
686 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
689 // SSE2 instructions with XS prefix
690 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
691 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
692 XS, Requires<[HasSSE2]>;
693 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
694 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
695 XS, Requires<[HasSSE2]>;
697 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
698 "cvtps2pi {$src, $dst|$dst, $src}", []>;
699 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
700 "cvtps2pi {$src, $dst|$dst, $src}", []>;
701 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
702 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
703 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
704 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
706 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
707 "cvtps2dq {$src, $dst|$dst, $src}", []>;
708 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
709 "cvtps2dq {$src, $dst|$dst, $src}", []>;
710 // SSE2 packed instructions with XD prefix
711 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
712 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
713 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
714 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
716 // SSE2 instructions without OpSize prefix
717 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
718 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
720 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
721 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
724 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
725 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
726 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
727 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
730 let isTwoAddress = 1 in {
731 let isCommutable = 1 in {
732 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
733 "addps {$src2, $dst|$dst, $src2}",
734 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
735 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
736 "addpd {$src2, $dst|$dst, $src2}",
737 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
738 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
739 "mulps {$src2, $dst|$dst, $src2}",
740 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
741 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
742 "mulpd {$src2, $dst|$dst, $src2}",
743 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
746 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
747 "addps {$src2, $dst|$dst, $src2}",
748 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
749 (load addr:$src2))))]>;
750 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
751 "addpd {$src2, $dst|$dst, $src2}",
752 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
753 (load addr:$src2))))]>;
754 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
755 "mulps {$src2, $dst|$dst, $src2}",
756 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
757 (load addr:$src2))))]>;
758 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
759 "mulpd {$src2, $dst|$dst, $src2}",
760 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
761 (load addr:$src2))))]>;
763 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
764 "divps {$src2, $dst|$dst, $src2}",
765 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
766 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
767 "divps {$src2, $dst|$dst, $src2}",
768 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
769 (load addr:$src2))))]>;
770 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
771 "divpd {$src2, $dst|$dst, $src2}",
772 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
773 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
774 "divpd {$src2, $dst|$dst, $src2}",
775 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
776 (load addr:$src2))))]>;
778 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
779 "subps {$src2, $dst|$dst, $src2}",
780 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
781 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
782 "subps {$src2, $dst|$dst, $src2}",
783 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
784 (load addr:$src2))))]>;
785 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
786 "subpd {$src2, $dst|$dst, $src2}",
787 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
788 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
789 "subpd {$src2, $dst|$dst, $src2}",
790 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
791 (load addr:$src2))))]>;
794 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
795 "sqrtps {$src, $dst|$dst, $src}",
796 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
797 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
798 "sqrtps {$src, $dst|$dst, $src}",
799 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
800 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
801 "sqrtpd {$src, $dst|$dst, $src}",
802 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
803 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
804 "sqrtpd {$src, $dst|$dst, $src}",
805 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
807 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
808 "rsqrtps {$src, $dst|$dst, $src}", []>;
809 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
810 "rsqrtps {$src, $dst|$dst, $src}", []>;
811 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
812 "rcpps {$src, $dst|$dst, $src}", []>;
813 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
814 "rcpps {$src, $dst|$dst, $src}", []>;
816 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
817 "maxps {$src, $dst|$dst, $src}", []>;
818 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
819 "maxps {$src, $dst|$dst, $src}", []>;
820 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
821 "maxpd {$src, $dst|$dst, $src}", []>;
822 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
823 "maxpd {$src, $dst|$dst, $src}", []>;
824 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
825 "minps {$src, $dst|$dst, $src}", []>;
826 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
827 "minps {$src, $dst|$dst, $src}", []>;
828 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
829 "minpd {$src, $dst|$dst, $src}", []>;
830 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
831 "minpd {$src, $dst|$dst, $src}", []>;
834 let isTwoAddress = 1 in {
835 let isCommutable = 1 in {
836 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
837 "andps {$src2, $dst|$dst, $src2}",
838 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
839 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
840 "andpd {$src2, $dst|$dst, $src2}",
841 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
842 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
843 "orps {$src2, $dst|$dst, $src2}",
844 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
845 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
846 "orpd {$src2, $dst|$dst, $src2}",
847 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
848 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
849 "xorps {$src2, $dst|$dst, $src2}",
850 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
851 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
852 "xorpd {$src2, $dst|$dst, $src2}",
853 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
855 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
856 "andps {$src2, $dst|$dst, $src2}",
857 [(set VR128:$dst, (v4i32 (and VR128:$src1,
858 (load addr:$src2))))]>;
859 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
860 "andpd {$src2, $dst|$dst, $src2}",
861 [(set VR128:$dst, (v2i64 (and VR128:$src1,
862 (load addr:$src2))))]>;
863 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
864 "orps {$src2, $dst|$dst, $src2}",
865 [(set VR128:$dst, (v4i32 (or VR128:$src1,
866 (load addr:$src2))))]>;
867 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
868 "orpd {$src2, $dst|$dst, $src2}",
869 [(set VR128:$dst, (v2i64 (or VR128:$src1,
870 (load addr:$src2))))]>;
871 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
872 "xorps {$src2, $dst|$dst, $src2}",
873 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
874 (load addr:$src2))))]>;
875 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
876 "xorpd {$src2, $dst|$dst, $src2}",
877 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
878 (load addr:$src2))))]>;
879 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
880 "andnps {$src2, $dst|$dst, $src2}",
881 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
883 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
884 "andnps {$src2, $dst|$dst, $src2}",
885 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
886 (load addr:$src2))))]>;
887 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
888 "andnpd {$src2, $dst|$dst, $src2}",
889 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
892 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
893 "andnpd {$src2, $dst|$dst, $src2}",
894 [(set VR128:$dst, (v2i64 (and VR128:$src1,
895 (load addr:$src2))))]>;
898 let isTwoAddress = 1 in {
899 def CMPPSrr : PSI<0xC2, MRMSrcReg,
900 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
901 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
902 def CMPPSrm : PSI<0xC2, MRMSrcMem,
903 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
904 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
905 def CMPPDrr : PDI<0xC2, MRMSrcReg,
906 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
907 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
908 def CMPPDrm : PDI<0xC2, MRMSrcMem,
909 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
910 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
913 // Shuffle and unpack instructions
914 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
915 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
916 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
917 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
918 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
919 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
920 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
921 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
922 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
923 [(set VR128:$dst, (v4i32 (vector_shuffle
924 VR128:$src1, (undef),
925 PSHUFD_shuffle_mask:$src2)))]>;
926 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
927 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
928 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
929 [(set VR128:$dst, (v4i32 (vector_shuffle
930 (load addr:$src1), (undef),
931 PSHUFD_shuffle_mask:$src2)))]>;
933 let isTwoAddress = 1 in {
934 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
935 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
936 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
937 [(set VR128:$dst, (v4f32 (vector_shuffle
938 VR128:$src1, VR128:$src2,
939 SHUFP_shuffle_mask:$src3)))]>;
940 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
941 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
942 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
943 [(set VR128:$dst, (v4f32 (vector_shuffle
944 VR128:$src1, (load addr:$src2),
945 SHUFP_shuffle_mask:$src3)))]>;
946 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
947 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
948 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
949 [(set VR128:$dst, (v2f64 (vector_shuffle
950 VR128:$src1, VR128:$src2,
951 SHUFP_shuffle_mask:$src3)))]>;
952 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
953 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
954 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
955 [(set VR128:$dst, (v2f64 (vector_shuffle
956 VR128:$src1, (load addr:$src2),
957 SHUFP_shuffle_mask:$src3)))]>;
959 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
960 (ops VR128:$dst, VR128:$src1, VR128:$src2),
961 "unpckhps {$src2, $dst|$dst, $src2}",
962 [(set VR128:$dst, (v4f32 (vector_shuffle
963 VR128:$src1, VR128:$src2,
964 UNPCKH_shuffle_mask)))]>;
965 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
966 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
967 "unpckhps {$src2, $dst|$dst, $src2}",
968 [(set VR128:$dst, (v4f32 (vector_shuffle
969 VR128:$src1, (load addr:$src2),
970 UNPCKH_shuffle_mask)))]>;
971 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
972 (ops VR128:$dst, VR128:$src1, VR128:$src2),
973 "unpckhpd {$src2, $dst|$dst, $src2}",
974 [(set VR128:$dst, (v2f64 (vector_shuffle
975 VR128:$src1, VR128:$src2,
976 UNPCKH_shuffle_mask)))]>;
977 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
978 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
979 "unpckhpd {$src2, $dst|$dst, $src2}",
980 [(set VR128:$dst, (v2f64 (vector_shuffle
981 VR128:$src1, (load addr:$src2),
982 UNPCKH_shuffle_mask)))]>;
984 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
985 (ops VR128:$dst, VR128:$src1, VR128:$src2),
986 "unpcklps {$src2, $dst|$dst, $src2}",
987 [(set VR128:$dst, (v4f32 (vector_shuffle
988 VR128:$src1, VR128:$src2,
989 UNPCKL_shuffle_mask)))]>;
990 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
991 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
992 "unpcklps {$src2, $dst|$dst, $src2}",
993 [(set VR128:$dst, (v4f32 (vector_shuffle
994 VR128:$src1, (load addr:$src2),
995 UNPCKL_shuffle_mask)))]>;
996 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
997 (ops VR128:$dst, VR128:$src1, VR128:$src2),
998 "unpcklpd {$src2, $dst|$dst, $src2}",
999 [(set VR128:$dst, (v2f64 (vector_shuffle
1000 VR128:$src1, VR128:$src2,
1001 UNPCKL_shuffle_mask)))]>;
1002 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
1003 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1004 "unpcklpd {$src2, $dst|$dst, $src2}",
1005 [(set VR128:$dst, (v2f64 (vector_shuffle
1006 VR128:$src1, (load addr:$src2),
1007 UNPCKL_shuffle_mask)))]>;
1010 //===----------------------------------------------------------------------===//
1011 // SSE integer instructions
1012 //===----------------------------------------------------------------------===//
1014 // Move Instructions
1015 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
1016 "movd {$src, $dst|$dst, $src}",
1018 (v4i32 (scalar_to_vector R32:$src)))]>;
1019 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1020 "movd {$src, $dst|$dst, $src}",
1022 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
1024 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
1025 "movd {$src, $dst|$dst, $src}", []>;
1027 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
1028 "movdqa {$src, $dst|$dst, $src}", []>;
1029 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
1030 "movdqa {$src, $dst|$dst, $src}",
1031 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
1032 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1033 "movdqa {$src, $dst|$dst, $src}",
1034 [(store (v4i32 VR128:$src), addr:$dst)]>;
1036 // SSE2 instructions with XS prefix
1037 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
1038 "movq {$src, $dst|$dst, $src}",
1040 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
1041 Requires<[HasSSE2]>;
1042 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
1043 "movq {$src, $dst|$dst, $src}", []>, XS,
1044 Requires<[HasSSE2]>;
1045 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
1046 "movq {$src, $dst|$dst, $src}", []>;
1048 // 128-bit Integer Arithmetic
1049 let isTwoAddress = 1 in {
1050 let isCommutable = 1 in {
1051 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1052 "paddb {$src2, $dst|$dst, $src2}",
1053 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
1054 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1055 "paddw {$src2, $dst|$dst, $src2}",
1056 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
1057 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1058 "paddd {$src2, $dst|$dst, $src2}",
1059 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
1061 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1062 "paddb {$src2, $dst|$dst, $src2}",
1063 [(set VR128:$dst, (v16i8 (add VR128:$src1,
1064 (load addr:$src2))))]>;
1065 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1066 "paddw {$src2, $dst|$dst, $src2}",
1067 [(set VR128:$dst, (v8i16 (add VR128:$src1,
1068 (load addr:$src2))))]>;
1069 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1070 "paddd {$src2, $dst|$dst, $src2}",
1071 [(set VR128:$dst, (v4i32 (add VR128:$src1,
1072 (load addr:$src2))))]>;
1074 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1075 "psubb {$src2, $dst|$dst, $src2}",
1076 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
1077 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1078 "psubw {$src2, $dst|$dst, $src2}",
1079 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
1080 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1081 "psubd {$src2, $dst|$dst, $src2}",
1082 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
1084 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1085 "psubb {$src2, $dst|$dst, $src2}",
1086 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
1087 (load addr:$src2))))]>;
1088 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1089 "psubw {$src2, $dst|$dst, $src2}",
1090 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
1091 (load addr:$src2))))]>;
1092 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1093 "psubd {$src2, $dst|$dst, $src2}",
1094 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
1095 (load addr:$src2))))]>;
1097 // Unpack and interleave
1098 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
1099 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1100 "punpcklbw {$src2, $dst|$dst, $src2}",
1102 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1103 UNPCKL_shuffle_mask)))]>;
1104 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
1105 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1106 "punpcklbw {$src2, $dst|$dst, $src2}",
1108 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1109 UNPCKL_shuffle_mask)))]>;
1110 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
1111 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1112 "punpcklwd {$src2, $dst|$dst, $src2}",
1114 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1115 UNPCKL_shuffle_mask)))]>;
1116 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
1117 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1118 "punpcklwd {$src2, $dst|$dst, $src2}",
1120 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1121 UNPCKL_shuffle_mask)))]>;
1122 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
1123 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1124 "punpckldq {$src2, $dst|$dst, $src2}",
1126 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1127 UNPCKL_shuffle_mask)))]>;
1128 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
1129 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1130 "punpckldq {$src2, $dst|$dst, $src2}",
1132 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1133 UNPCKL_shuffle_mask)))]>;
1134 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
1135 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1136 "punpcklqdq {$src2, $dst|$dst, $src2}",
1138 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1139 UNPCKL_shuffle_mask)))]>;
1140 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
1141 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1142 "punpcklqdq {$src2, $dst|$dst, $src2}",
1144 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1145 UNPCKL_shuffle_mask)))]>;
1147 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
1148 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1149 "punpckhbw {$src2, $dst|$dst, $src2}",
1151 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1152 UNPCKH_shuffle_mask)))]>;
1153 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
1154 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1155 "punpckhbw {$src2, $dst|$dst, $src2}",
1157 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1158 UNPCKH_shuffle_mask)))]>;
1159 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
1160 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1161 "punpckhwd {$src2, $dst|$dst, $src2}",
1163 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1164 UNPCKH_shuffle_mask)))]>;
1165 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1166 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1167 "punpckhwd {$src2, $dst|$dst, $src2}",
1169 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1170 UNPCKH_shuffle_mask)))]>;
1171 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1172 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1173 "punpckhdq {$src2, $dst|$dst, $src2}",
1175 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1176 UNPCKH_shuffle_mask)))]>;
1177 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1178 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1179 "punpckhdq {$src2, $dst|$dst, $src2}",
1181 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1182 UNPCKH_shuffle_mask)))]>;
1183 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1184 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1185 "punpckhdq {$src2, $dst|$dst, $src2}",
1187 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1188 UNPCKH_shuffle_mask)))]>;
1189 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1190 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1191 "punpckhqdq {$src2, $dst|$dst, $src2}",
1193 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1194 UNPCKH_shuffle_mask)))]>;
1197 //===----------------------------------------------------------------------===//
1198 // Miscellaneous Instructions
1199 //===----------------------------------------------------------------------===//
1201 // Prefetching loads
1202 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1203 "prefetcht0 $src", []>, TB,
1204 Requires<[HasSSE1]>;
1205 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1206 "prefetcht0 $src", []>, TB,
1207 Requires<[HasSSE1]>;
1208 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1209 "prefetcht0 $src", []>, TB,
1210 Requires<[HasSSE1]>;
1211 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1212 "prefetcht0 $src", []>, TB,
1213 Requires<[HasSSE1]>;
1215 // Non-temporal stores
1216 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1217 "movntq {$src, $dst|$dst, $src}", []>, TB,
1218 Requires<[HasSSE1]>;
1219 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1220 "movntps {$src, $dst|$dst, $src}", []>, TB,
1221 Requires<[HasSSE1]>;
1222 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1223 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1224 Requires<[HasSSE1]>;
1227 def SFENCE : I<0xAE, MRM7m, (ops),
1228 "sfence", []>, TB, Requires<[HasSSE1]>;
1230 // Load MXCSR register
1231 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1232 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1234 //===----------------------------------------------------------------------===//
1235 // Alias Instructions
1236 //===----------------------------------------------------------------------===//
1238 // Alias instructions that map zero vector to pxor / xorp* for sse.
1239 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1240 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1242 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1243 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1245 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1246 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1248 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1250 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1251 "pcmpeqd $dst, $dst",
1252 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1254 // Scalar to 128-bit vector with zero extension.
1255 // Three operand (but two address) aliases.
1256 let isTwoAddress = 1 in {
1257 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1258 "movss {$src2, $dst|$dst, $src2}", []>;
1259 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1260 "movsd {$src2, $dst|$dst, $src2}", []>;
1261 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1262 "movd {$src2, $dst|$dst, $src2}", []>;
1263 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1264 "movq {$src2, $dst|$dst, $src2}", []>;
1267 // Loading from memory automatically zeroing upper bits.
1268 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1269 "movss {$src, $dst|$dst, $src}",
1271 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1272 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1273 "movsd {$src, $dst|$dst, $src}",
1275 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1276 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1277 "movd {$src, $dst|$dst, $src}",
1279 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1281 //===----------------------------------------------------------------------===//
1282 // Non-Instruction Patterns
1283 //===----------------------------------------------------------------------===//
1285 // 128-bit vector undef's.
1286 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1287 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1288 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1289 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1290 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1292 // 128-bit vector all zero's.
1293 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1294 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1295 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1297 // 128-bit vector all one's.
1298 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1299 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1300 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1301 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1302 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1304 // Load 128-bit integer vector values.
1305 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1306 Requires<[HasSSE2]>;
1307 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1308 Requires<[HasSSE2]>;
1309 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1310 Requires<[HasSSE2]>;
1311 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1312 Requires<[HasSSE2]>;
1314 // Store 128-bit integer vector values.
1315 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1316 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1317 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1318 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1319 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1320 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1321 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1322 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1324 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1326 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1327 Requires<[HasSSE2]>;
1328 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1329 Requires<[HasSSE2]>;
1332 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
1333 Requires<[HasSSE2]>;
1334 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1335 Requires<[HasSSE2]>;
1336 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1337 Requires<[HasSSE2]>;
1339 // Zeroing a VR128 then do a MOVS* to the lower bits.
1340 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1341 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1342 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1343 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1344 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1345 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1346 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1347 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1348 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1349 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1350 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1351 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1353 // Splat v2f64 / v2i64
1354 def : Pat<(vector_shuffle (v2f64 VR128:$src), (v2f64 VR128:$src),
1355 v2f64_v2i64_splat_mask:$sm),
1356 (v2f64 (UNPCKLPDrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1357 def : Pat<(vector_shuffle (v2i64 VR128:$src), (v2i64 VR128:$src),
1358 v2f64_v2i64_splat_mask:$sm),
1359 (v2i64 (PUNPCKLQDQrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1361 // Shuffle v4i32 if others do not match
1362 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
1363 SHUFP_shuffle_mask:$sm),
1364 (v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
1365 SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
1366 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
1367 SHUFP_shuffle_mask:$sm),
1368 (v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
1369 SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;