1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def fp32imm0 : PatLeaf<(f32 fpimm), [{
49 return N->isExactlyValue(+0.0);
52 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
54 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
55 return getI8Imm(X86::getShuffleSHUFImmediate(N));
58 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
60 }], SHUFFLE_get_shuf_imm>;
62 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
63 return X86::isSplatMask(N);
66 def MOVLHPS_shuffle_mask : PatLeaf<(build_vector), [{
67 return X86::isMOVLHPSMask(N);
70 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
71 return X86::isMOVHLPSMask(N);
74 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
75 return X86::isUNPCKLMask(N);
78 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
79 return X86::isUNPCKHMask(N);
82 // Only use PSHUF if it is not a splat.
83 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
84 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
85 }], SHUFFLE_get_shuf_imm>;
87 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
88 return X86::isSHUFPMask(N);
89 }], SHUFFLE_get_shuf_imm>;
91 //===----------------------------------------------------------------------===//
92 // SSE scalar FP Instructions
93 //===----------------------------------------------------------------------===//
95 // Instruction templates
96 // SSI - SSE1 instructions with XS prefix.
97 // SDI - SSE2 instructions with XD prefix.
98 // PSI - SSE1 instructions with TB prefix.
99 // PDI - SSE2 instructions with TB and OpSize prefixes.
100 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
101 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
102 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
103 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
104 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
105 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
106 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
107 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
108 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
109 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
110 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
111 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
112 let Pattern = pattern;
114 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
115 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
116 let Pattern = pattern;
119 // Some 'special' instructions
120 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
121 "#IMPLICIT_DEF $dst",
122 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
123 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
124 "#IMPLICIT_DEF $dst",
125 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
127 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
128 // scheduler into a branch sequence.
129 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
130 def CMOV_FR32 : I<0, Pseudo,
131 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
132 "#CMOV_FR32 PSEUDO!",
133 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
134 def CMOV_FR64 : I<0, Pseudo,
135 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
136 "#CMOV_FR64 PSEUDO!",
137 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
141 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
142 "movss {$src, $dst|$dst, $src}", []>;
143 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
144 "movss {$src, $dst|$dst, $src}",
145 [(set FR32:$dst, (loadf32 addr:$src))]>;
146 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
147 "movsd {$src, $dst|$dst, $src}", []>;
148 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
149 "movsd {$src, $dst|$dst, $src}",
150 [(set FR64:$dst, (loadf64 addr:$src))]>;
152 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
153 "movss {$src, $dst|$dst, $src}",
154 [(store FR32:$src, addr:$dst)]>;
155 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
156 "movsd {$src, $dst|$dst, $src}",
157 [(store FR64:$src, addr:$dst)]>;
159 // FR32 / FR64 to 128-bit vector conversion.
160 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
161 "movss {$src, $dst|$dst, $src}",
163 (v4f32 (scalar_to_vector FR32:$src)))]>;
164 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
165 "movss {$src, $dst|$dst, $src}",
167 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
168 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
169 "movsd {$src, $dst|$dst, $src}",
171 (v2f64 (scalar_to_vector FR64:$src)))]>;
172 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
173 "movsd {$src, $dst|$dst, $src}",
175 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
177 // Arithmetic instructions
178 let isTwoAddress = 1 in {
179 let isCommutable = 1 in {
180 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
181 "addss {$src2, $dst|$dst, $src2}",
182 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
183 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
184 "addsd {$src2, $dst|$dst, $src2}",
185 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
186 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
187 "mulss {$src2, $dst|$dst, $src2}",
188 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
189 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
190 "mulsd {$src2, $dst|$dst, $src2}",
191 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
194 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
195 "addss {$src2, $dst|$dst, $src2}",
196 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
197 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
198 "addsd {$src2, $dst|$dst, $src2}",
199 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
200 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
201 "mulss {$src2, $dst|$dst, $src2}",
202 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
203 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
204 "mulsd {$src2, $dst|$dst, $src2}",
205 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
207 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
208 "divss {$src2, $dst|$dst, $src2}",
209 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
210 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
211 "divss {$src2, $dst|$dst, $src2}",
212 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
213 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
214 "divsd {$src2, $dst|$dst, $src2}",
215 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
216 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
217 "divsd {$src2, $dst|$dst, $src2}",
218 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
220 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
221 "subss {$src2, $dst|$dst, $src2}",
222 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
223 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
224 "subss {$src2, $dst|$dst, $src2}",
225 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
226 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
227 "subsd {$src2, $dst|$dst, $src2}",
228 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
229 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
230 "subsd {$src2, $dst|$dst, $src2}",
231 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
234 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
235 "sqrtss {$src, $dst|$dst, $src}",
236 [(set FR32:$dst, (fsqrt FR32:$src))]>;
237 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
238 "sqrtss {$src, $dst|$dst, $src}",
239 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
240 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
241 "sqrtsd {$src, $dst|$dst, $src}",
242 [(set FR64:$dst, (fsqrt FR64:$src))]>;
243 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
244 "sqrtsd {$src, $dst|$dst, $src}",
245 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
247 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
248 "rsqrtss {$src, $dst|$dst, $src}", []>;
249 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
250 "rsqrtss {$src, $dst|$dst, $src}", []>;
251 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
252 "rcpss {$src, $dst|$dst, $src}", []>;
253 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
254 "rcpss {$src, $dst|$dst, $src}", []>;
256 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
257 "maxss {$src, $dst|$dst, $src}", []>;
258 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
259 "maxss {$src, $dst|$dst, $src}", []>;
260 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
261 "maxsd {$src, $dst|$dst, $src}", []>;
262 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
263 "maxsd {$src, $dst|$dst, $src}", []>;
264 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
265 "minss {$src, $dst|$dst, $src}", []>;
266 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
267 "minss {$src, $dst|$dst, $src}", []>;
268 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
269 "minsd {$src, $dst|$dst, $src}", []>;
270 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
271 "minsd {$src, $dst|$dst, $src}", []>;
274 // Aliases to match intrinsics which expect XMM operand(s).
275 let isTwoAddress = 1 in {
276 let isCommutable = 1 in {
277 def Int_ADDSSrr : SSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
279 "addss {$src2, $dst|$dst, $src2}",
280 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
282 def Int_ADDSDrr : SDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
284 "addsd {$src2, $dst|$dst, $src2}",
285 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
287 def Int_MULSSrr : SSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
289 "mulss {$src2, $dst|$dst, $src2}",
290 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
292 def Int_MULSDrr : SDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
294 "mulsd {$src2, $dst|$dst, $src2}",
295 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
299 def Int_ADDSSrm : SSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
301 "addss {$src2, $dst|$dst, $src2}",
302 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
303 (load addr:$src2)))]>;
304 def Int_ADDSDrm : SDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
306 "addsd {$src2, $dst|$dst, $src2}",
307 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
308 (load addr:$src2)))]>;
309 def Int_MULSSrm : SSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
311 "mulss {$src2, $dst|$dst, $src2}",
312 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
313 (load addr:$src2)))]>;
314 def Int_MULSDrm : SDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
316 "mulsd {$src2, $dst|$dst, $src2}",
317 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
318 (load addr:$src2)))]>;
320 def Int_DIVSSrr : SSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
321 "divss {$src2, $dst|$dst, $src2}",
322 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
324 def Int_DIVSSrm : SSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
325 "divss {$src2, $dst|$dst, $src2}",
326 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
327 (load addr:$src2)))]>;
328 def Int_DIVSDrr : SDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
329 "divsd {$src2, $dst|$dst, $src2}",
330 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
332 def Int_DIVSDrm : SDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
333 "divsd {$src2, $dst|$dst, $src2}",
334 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
335 (load addr:$src2)))]>;
337 def Int_SUBSSrr : SSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
338 "subss {$src2, $dst|$dst, $src2}",
339 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
341 def Int_SUBSSrm : SSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
342 "subss {$src2, $dst|$dst, $src2}",
343 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
344 (load addr:$src2)))]>;
345 def Int_SUBSDrr : SDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
346 "subsd {$src2, $dst|$dst, $src2}",
347 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
349 def Int_SUBSDrm : SDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
350 "subsd {$src2, $dst|$dst, $src2}",
351 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
352 (load addr:$src2)))]>;
355 def Int_SQRTSSrr : SSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
356 "sqrtss {$src, $dst|$dst, $src}",
357 [(set VR128:$dst, (int_x86_sse_sqrt_ss VR128:$src))]>;
358 def Int_SQRTSSrm : SSI<0x51, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
359 "sqrtss {$src, $dst|$dst, $src}",
360 [(set VR128:$dst, (int_x86_sse_sqrt_ss
361 (load addr:$src)))]>;
362 def Int_SQRTSDrr : SDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
363 "sqrtsd {$src, $dst|$dst, $src}",
364 [(set VR128:$dst, (int_x86_sse2_sqrt_sd VR128:$src))]>;
365 def Int_SQRTSDrm : SDI<0x51, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
366 "sqrtsd {$src, $dst|$dst, $src}",
367 [(set VR128:$dst, (int_x86_sse2_sqrt_sd
368 (load addr:$src)))]>;
370 def Int_RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
371 "rsqrtss {$src, $dst|$dst, $src}",
372 [(set VR128:$dst, (int_x86_sse_rsqrt_ss VR128:$src))]>;
373 def Int_RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
374 "rsqrtss {$src, $dst|$dst, $src}",
375 [(set VR128:$dst, (int_x86_sse_rsqrt_ss
376 (load addr:$src)))]>;
377 def Int_RCPSSrr : SSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
378 "rcpss {$src, $dst|$dst, $src}",
379 [(set VR128:$dst, (int_x86_sse_rcp_ss VR128:$src))]>;
380 def Int_RCPSSrm : SSI<0x53, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
381 "rcpss {$src, $dst|$dst, $src}",
382 [(set VR128:$dst, (int_x86_sse_rcp_ss
383 (load addr:$src)))]>;
385 let isTwoAddress = 1 in {
386 def Int_MAXSSrr : SSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
388 "maxss {$src2, $dst|$dst, $src2}",
389 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
391 def Int_MAXSSrm : SSI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
393 "maxss {$src2, $dst|$dst, $src2}",
394 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
395 (load addr:$src2)))]>;
396 def Int_MAXSDrr : SDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
398 "maxsd {$src2, $dst|$dst, $src2}",
399 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
401 def Int_MAXSDrm : SDI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
403 "maxsd {$src2, $dst|$dst, $src2}",
404 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
405 (load addr:$src2)))]>;
406 def Int_MINSSrr : SSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
408 "minss {$src2, $dst|$dst, $src2}",
409 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
411 def Int_MINSSrm : SSI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
413 "minss {$src2, $dst|$dst, $src2}",
414 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
415 (load addr:$src2)))]>;
416 def Int_MINSDrr : SDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
418 "minsd {$src2, $dst|$dst, $src2}",
419 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
421 def Int_MINSDrm : SDI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
423 "minsd {$src2, $dst|$dst, $src2}",
424 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
425 (load addr:$src2)))]>;
428 // Conversion instructions
429 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
430 "cvtss2si {$src, $dst|$dst, $src}", []>;
431 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
432 "cvtss2si {$src, $dst|$dst, $src}", []>;
434 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
435 "cvttss2si {$src, $dst|$dst, $src}",
436 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
437 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
438 "cvttss2si {$src, $dst|$dst, $src}",
439 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
440 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
441 "cvttsd2si {$src, $dst|$dst, $src}",
442 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
443 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
444 "cvttsd2si {$src, $dst|$dst, $src}",
445 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
446 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
447 "cvtsd2ss {$src, $dst|$dst, $src}",
448 [(set FR32:$dst, (fround FR64:$src))]>;
449 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
450 "cvtsd2ss {$src, $dst|$dst, $src}",
451 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
452 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
453 "cvtsi2ss {$src, $dst|$dst, $src}",
454 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
455 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
456 "cvtsi2ss {$src, $dst|$dst, $src}",
457 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
458 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
459 "cvtsi2sd {$src, $dst|$dst, $src}",
460 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
461 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
462 "cvtsi2sd {$src, $dst|$dst, $src}",
463 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
464 // SSE2 instructions with XS prefix
465 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
466 "cvtss2sd {$src, $dst|$dst, $src}",
467 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
469 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
470 "cvtss2sd {$src, $dst|$dst, $src}",
471 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
474 // Comparison instructions
475 let isTwoAddress = 1 in {
476 def CMPSSrr : SSI<0xC2, MRMSrcReg,
477 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
478 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
479 def CMPSSrm : SSI<0xC2, MRMSrcMem,
480 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
481 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
482 def CMPSDrr : SDI<0xC2, MRMSrcReg,
483 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
484 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
485 def CMPSDrm : SDI<0xC2, MRMSrcMem,
486 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
487 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
490 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
491 "ucomiss {$src2, $src1|$src1, $src2}",
492 [(X86cmp FR32:$src1, FR32:$src2)]>;
493 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
494 "ucomiss {$src2, $src1|$src1, $src2}",
495 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
496 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
497 "ucomisd {$src2, $src1|$src1, $src2}",
498 [(X86cmp FR64:$src1, FR64:$src2)]>;
499 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
500 "ucomisd {$src2, $src1|$src1, $src2}",
501 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
503 // Aliases of packed instructions for scalar use. These all have names that
506 // Alias instructions that map fld0 to pxor for sse.
507 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
508 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
509 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
510 Requires<[HasSSE1]>, TB, OpSize;
511 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
512 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
513 Requires<[HasSSE2]>, TB, OpSize;
515 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
516 // Upper bits are disregarded.
517 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
518 "movaps {$src, $dst|$dst, $src}", []>;
519 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
520 "movapd {$src, $dst|$dst, $src}", []>;
522 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
523 // Upper bits are disregarded.
524 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
525 "movaps {$src, $dst|$dst, $src}",
526 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
527 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
528 "movapd {$src, $dst|$dst, $src}",
529 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
531 // Alias bitwise logical operations using SSE logical ops on packed FP values.
532 let isTwoAddress = 1 in {
533 let isCommutable = 1 in {
534 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
535 "andps {$src2, $dst|$dst, $src2}",
536 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
537 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
538 "andpd {$src2, $dst|$dst, $src2}",
539 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
540 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
541 "orps {$src2, $dst|$dst, $src2}", []>;
542 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
543 "orpd {$src2, $dst|$dst, $src2}", []>;
544 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
545 "xorps {$src2, $dst|$dst, $src2}",
546 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
547 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
548 "xorpd {$src2, $dst|$dst, $src2}",
549 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
551 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
552 "andps {$src2, $dst|$dst, $src2}",
553 [(set FR32:$dst, (X86fand FR32:$src1,
554 (X86loadpf32 addr:$src2)))]>;
555 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
556 "andpd {$src2, $dst|$dst, $src2}",
557 [(set FR64:$dst, (X86fand FR64:$src1,
558 (X86loadpf64 addr:$src2)))]>;
559 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
560 "orps {$src2, $dst|$dst, $src2}", []>;
561 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
562 "orpd {$src2, $dst|$dst, $src2}", []>;
563 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
564 "xorps {$src2, $dst|$dst, $src2}",
565 [(set FR32:$dst, (X86fxor FR32:$src1,
566 (X86loadpf32 addr:$src2)))]>;
567 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
568 "xorpd {$src2, $dst|$dst, $src2}",
569 [(set FR64:$dst, (X86fxor FR64:$src1,
570 (X86loadpf64 addr:$src2)))]>;
572 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
573 "andnps {$src2, $dst|$dst, $src2}", []>;
574 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
575 "andnps {$src2, $dst|$dst, $src2}", []>;
576 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
577 "andnpd {$src2, $dst|$dst, $src2}", []>;
578 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
579 "andnpd {$src2, $dst|$dst, $src2}", []>;
582 //===----------------------------------------------------------------------===//
583 // SSE packed FP Instructions
584 //===----------------------------------------------------------------------===//
586 // Some 'special' instructions
587 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
588 "#IMPLICIT_DEF $dst",
589 [(set VR128:$dst, (v4f32 (undef)))]>,
593 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
594 "movaps {$src, $dst|$dst, $src}", []>;
595 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
596 "movaps {$src, $dst|$dst, $src}",
597 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
598 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
599 "movapd {$src, $dst|$dst, $src}", []>;
600 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
601 "movapd {$src, $dst|$dst, $src}",
602 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
604 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
605 "movaps {$src, $dst|$dst, $src}",
606 [(store (v4f32 VR128:$src), addr:$dst)]>;
607 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
608 "movapd {$src, $dst|$dst, $src}",
609 [(store (v2f64 VR128:$src), addr:$dst)]>;
611 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
612 "movups {$src, $dst|$dst, $src}", []>;
613 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
614 "movups {$src, $dst|$dst, $src}", []>;
615 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
616 "movups {$src, $dst|$dst, $src}", []>;
617 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
618 "movupd {$src, $dst|$dst, $src}", []>;
619 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
620 "movupd {$src, $dst|$dst, $src}", []>;
621 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
622 "movupd {$src, $dst|$dst, $src}", []>;
624 let isTwoAddress = 1 in {
625 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
626 "movlps {$src2, $dst|$dst, $src2}", []>;
627 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
628 "movlpd {$src2, $dst|$dst, $src2}", []>;
629 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
630 "movhps {$src2, $dst|$dst, $src2}", []>;
631 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
632 "movhpd {$src2, $dst|$dst, $src2}",
634 (v2f64 (vector_shuffle VR128:$src1,
635 (scalar_to_vector (loadf64 addr:$src2)),
636 UNPCKL_shuffle_mask)))]>;
639 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
640 "movlps {$src, $dst|$dst, $src}", []>;
641 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
642 "movlpd {$src, $dst|$dst, $src}", []>;
644 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
645 "movhps {$src, $dst|$dst, $src}", []>;
646 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
647 "movhpd {$src, $dst|$dst, $src}", []>;
649 let isTwoAddress = 1 in {
650 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
651 "movlhps {$src2, $dst|$dst, $src2}",
653 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
654 MOVLHPS_shuffle_mask)))]>;
656 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
657 "movhlps {$src2, $dst|$dst, $src2}",
659 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
660 MOVHLPS_shuffle_mask)))]>;
663 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
664 "movmskps {$src, $dst|$dst, $src}",
665 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
666 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
667 "movmskpd {$src, $dst|$dst, $src}",
668 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
670 // Conversion instructions
671 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
672 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
673 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
674 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
675 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
676 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
677 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
678 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
680 // SSE2 instructions without OpSize prefix
681 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
682 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
684 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
685 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
688 // SSE2 instructions with XS prefix
689 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
690 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
691 XS, Requires<[HasSSE2]>;
692 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
693 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
694 XS, Requires<[HasSSE2]>;
696 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
697 "cvtps2pi {$src, $dst|$dst, $src}", []>;
698 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
699 "cvtps2pi {$src, $dst|$dst, $src}", []>;
700 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
701 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
702 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
703 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
705 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
706 "cvtps2dq {$src, $dst|$dst, $src}", []>;
707 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
708 "cvtps2dq {$src, $dst|$dst, $src}", []>;
709 // SSE2 packed instructions with XD prefix
710 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
711 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
712 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
713 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
715 // SSE2 instructions without OpSize prefix
716 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
717 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
719 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
720 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
723 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
724 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
725 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
726 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
729 let isTwoAddress = 1 in {
730 let isCommutable = 1 in {
731 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
732 "addps {$src2, $dst|$dst, $src2}",
733 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
734 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
735 "addpd {$src2, $dst|$dst, $src2}",
736 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
737 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
738 "mulps {$src2, $dst|$dst, $src2}",
739 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
740 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
741 "mulpd {$src2, $dst|$dst, $src2}",
742 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
745 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
746 "addps {$src2, $dst|$dst, $src2}",
747 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
748 (load addr:$src2))))]>;
749 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
750 "addpd {$src2, $dst|$dst, $src2}",
751 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
752 (load addr:$src2))))]>;
753 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
754 "mulps {$src2, $dst|$dst, $src2}",
755 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
756 (load addr:$src2))))]>;
757 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
758 "mulpd {$src2, $dst|$dst, $src2}",
759 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
760 (load addr:$src2))))]>;
762 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
763 "divps {$src2, $dst|$dst, $src2}",
764 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
765 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
766 "divps {$src2, $dst|$dst, $src2}",
767 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
768 (load addr:$src2))))]>;
769 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
770 "divpd {$src2, $dst|$dst, $src2}",
771 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
772 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
773 "divpd {$src2, $dst|$dst, $src2}",
774 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
775 (load addr:$src2))))]>;
777 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
778 "subps {$src2, $dst|$dst, $src2}",
779 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
780 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
781 "subps {$src2, $dst|$dst, $src2}",
782 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
783 (load addr:$src2))))]>;
784 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
785 "subpd {$src2, $dst|$dst, $src2}",
786 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
787 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
788 "subpd {$src2, $dst|$dst, $src2}",
789 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
790 (load addr:$src2))))]>;
793 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
794 "sqrtps {$src, $dst|$dst, $src}",
795 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
796 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
797 "sqrtps {$src, $dst|$dst, $src}",
798 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
799 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
800 "sqrtpd {$src, $dst|$dst, $src}",
801 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
802 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
803 "sqrtpd {$src, $dst|$dst, $src}",
804 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
806 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
807 "rsqrtps {$src, $dst|$dst, $src}", []>;
808 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
809 "rsqrtps {$src, $dst|$dst, $src}", []>;
810 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
811 "rcpps {$src, $dst|$dst, $src}", []>;
812 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
813 "rcpps {$src, $dst|$dst, $src}", []>;
815 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
816 "maxps {$src, $dst|$dst, $src}", []>;
817 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
818 "maxps {$src, $dst|$dst, $src}", []>;
819 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
820 "maxpd {$src, $dst|$dst, $src}", []>;
821 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
822 "maxpd {$src, $dst|$dst, $src}", []>;
823 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
824 "minps {$src, $dst|$dst, $src}", []>;
825 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
826 "minps {$src, $dst|$dst, $src}", []>;
827 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
828 "minpd {$src, $dst|$dst, $src}", []>;
829 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
830 "minpd {$src, $dst|$dst, $src}", []>;
833 let isTwoAddress = 1 in {
834 let isCommutable = 1 in {
835 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
836 "andps {$src2, $dst|$dst, $src2}",
837 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
838 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
839 "andpd {$src2, $dst|$dst, $src2}",
840 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
841 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
842 "orps {$src2, $dst|$dst, $src2}",
843 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
844 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
845 "orpd {$src2, $dst|$dst, $src2}",
846 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
847 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
848 "xorps {$src2, $dst|$dst, $src2}",
849 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
850 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
851 "xorpd {$src2, $dst|$dst, $src2}",
852 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
854 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
855 "andps {$src2, $dst|$dst, $src2}",
856 [(set VR128:$dst, (v4i32 (and VR128:$src1,
857 (load addr:$src2))))]>;
858 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
859 "andpd {$src2, $dst|$dst, $src2}",
860 [(set VR128:$dst, (v2i64 (and VR128:$src1,
861 (load addr:$src2))))]>;
862 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
863 "orps {$src2, $dst|$dst, $src2}",
864 [(set VR128:$dst, (v4i32 (or VR128:$src1,
865 (load addr:$src2))))]>;
866 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
867 "orpd {$src2, $dst|$dst, $src2}",
868 [(set VR128:$dst, (v2i64 (or VR128:$src1,
869 (load addr:$src2))))]>;
870 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
871 "xorps {$src2, $dst|$dst, $src2}",
872 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
873 (load addr:$src2))))]>;
874 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
875 "xorpd {$src2, $dst|$dst, $src2}",
876 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
877 (load addr:$src2))))]>;
878 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
879 "andnps {$src2, $dst|$dst, $src2}",
880 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
882 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
883 "andnps {$src2, $dst|$dst, $src2}",
884 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
885 (load addr:$src2))))]>;
886 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
887 "andnpd {$src2, $dst|$dst, $src2}",
888 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
891 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
892 "andnpd {$src2, $dst|$dst, $src2}",
893 [(set VR128:$dst, (v2i64 (and VR128:$src1,
894 (load addr:$src2))))]>;
897 let isTwoAddress = 1 in {
898 def CMPPSrr : PSI<0xC2, MRMSrcReg,
899 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
900 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
901 def CMPPSrm : PSI<0xC2, MRMSrcMem,
902 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
903 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
904 def CMPPDrr : PDI<0xC2, MRMSrcReg,
905 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
906 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
907 def CMPPDrm : PDI<0xC2, MRMSrcMem,
908 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
909 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
912 // Shuffle and unpack instructions
913 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
914 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
915 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
916 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
917 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
918 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
919 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
920 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
921 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
922 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
923 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
924 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
926 let isTwoAddress = 1 in {
927 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
928 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
929 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
930 [(set VR128:$dst, (vector_shuffle
931 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
932 SHUFP_shuffle_mask:$src3))]>;
933 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
934 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
935 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
936 [(set VR128:$dst, (vector_shuffle
937 (v4f32 VR128:$src1), (load addr:$src2),
938 SHUFP_shuffle_mask:$src3))]>;
939 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
940 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
941 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
942 [(set VR128:$dst, (vector_shuffle
943 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
944 SHUFP_shuffle_mask:$src3))]>;
945 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
946 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
947 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
948 [(set VR128:$dst, (vector_shuffle
949 (v2f64 VR128:$src1), (load addr:$src2),
950 SHUFP_shuffle_mask:$src3))]>;
952 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
953 (ops VR128:$dst, VR128:$src1, VR128:$src2),
954 "unpckhps {$src2, $dst|$dst, $src2}",
956 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
957 UNPCKH_shuffle_mask)))]>;
958 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
959 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
960 "unpckhps {$src2, $dst|$dst, $src2}",
962 (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
963 UNPCKH_shuffle_mask)))]>;
964 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
965 (ops VR128:$dst, VR128:$src1, VR128:$src2),
966 "unpckhpd {$src2, $dst|$dst, $src2}",
968 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
969 UNPCKH_shuffle_mask)))]>;
970 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
971 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
972 "unpckhpd {$src2, $dst|$dst, $src2}",
974 (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
975 UNPCKH_shuffle_mask)))]>;
977 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
978 (ops VR128:$dst, VR128:$src1, VR128:$src2),
979 "unpcklps {$src2, $dst|$dst, $src2}",
981 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
982 UNPCKL_shuffle_mask)))]>;
983 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
984 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
985 "unpcklps {$src2, $dst|$dst, $src2}",
987 (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
988 UNPCKL_shuffle_mask)))]>;
989 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
990 (ops VR128:$dst, VR128:$src1, VR128:$src2),
991 "unpcklpd {$src2, $dst|$dst, $src2}",
993 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
994 UNPCKL_shuffle_mask)))]>;
995 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
996 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
997 "unpcklpd {$src2, $dst|$dst, $src2}",
999 (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
1000 UNPCKL_shuffle_mask)))]>;
1003 //===----------------------------------------------------------------------===//
1004 // SSE integer instructions
1005 //===----------------------------------------------------------------------===//
1007 // Move Instructions
1008 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
1009 "movd {$src, $dst|$dst, $src}",
1011 (v4i32 (scalar_to_vector R32:$src)))]>;
1012 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1013 "movd {$src, $dst|$dst, $src}",
1015 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
1017 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
1018 "movd {$src, $dst|$dst, $src}", []>;
1020 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
1021 "movdqa {$src, $dst|$dst, $src}", []>;
1022 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
1023 "movdqa {$src, $dst|$dst, $src}",
1024 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
1025 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1026 "movdqa {$src, $dst|$dst, $src}",
1027 [(store (v4i32 VR128:$src), addr:$dst)]>;
1029 // SSE2 instructions with XS prefix
1030 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
1031 "movq {$src, $dst|$dst, $src}",
1033 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
1034 Requires<[HasSSE2]>;
1035 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
1036 "movq {$src, $dst|$dst, $src}", []>, XS,
1037 Requires<[HasSSE2]>;
1038 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
1039 "movq {$src, $dst|$dst, $src}", []>;
1041 // 128-bit Integer Arithmetic
1042 let isTwoAddress = 1 in {
1043 let isCommutable = 1 in {
1044 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1045 "paddb {$src2, $dst|$dst, $src2}",
1046 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
1047 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1048 "paddw {$src2, $dst|$dst, $src2}",
1049 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
1050 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1051 "paddd {$src2, $dst|$dst, $src2}",
1052 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
1054 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1055 "paddb {$src2, $dst|$dst, $src2}",
1056 [(set VR128:$dst, (v16i8 (add VR128:$src1,
1057 (load addr:$src2))))]>;
1058 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1059 "paddw {$src2, $dst|$dst, $src2}",
1060 [(set VR128:$dst, (v8i16 (add VR128:$src1,
1061 (load addr:$src2))))]>;
1062 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1063 "paddd {$src2, $dst|$dst, $src2}",
1064 [(set VR128:$dst, (v4i32 (add VR128:$src1,
1065 (load addr:$src2))))]>;
1067 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1068 "psubb {$src2, $dst|$dst, $src2}",
1069 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
1070 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1071 "psubw {$src2, $dst|$dst, $src2}",
1072 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
1073 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1074 "psubd {$src2, $dst|$dst, $src2}",
1075 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
1077 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1078 "psubb {$src2, $dst|$dst, $src2}",
1079 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
1080 (load addr:$src2))))]>;
1081 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1082 "psubw {$src2, $dst|$dst, $src2}",
1083 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
1084 (load addr:$src2))))]>;
1085 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1086 "psubd {$src2, $dst|$dst, $src2}",
1087 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
1088 (load addr:$src2))))]>;
1090 // Unpack and interleave
1091 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
1092 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1093 "punpcklbw {$src2, $dst|$dst, $src2}",
1095 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1096 UNPCKL_shuffle_mask)))]>;
1097 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
1098 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1099 "punpcklbw {$src2, $dst|$dst, $src2}",
1101 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1102 UNPCKL_shuffle_mask)))]>;
1103 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
1104 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1105 "punpcklwd {$src2, $dst|$dst, $src2}",
1107 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1108 UNPCKL_shuffle_mask)))]>;
1109 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
1110 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1111 "punpcklwd {$src2, $dst|$dst, $src2}",
1113 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1114 UNPCKL_shuffle_mask)))]>;
1115 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
1116 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1117 "punpckldq {$src2, $dst|$dst, $src2}",
1119 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1120 UNPCKL_shuffle_mask)))]>;
1121 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
1122 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1123 "punpckldq {$src2, $dst|$dst, $src2}",
1125 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1126 UNPCKL_shuffle_mask)))]>;
1127 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
1128 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1129 "punpcklqdq {$src2, $dst|$dst, $src2}",
1131 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1132 UNPCKL_shuffle_mask)))]>;
1133 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
1134 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1135 "punpcklqdq {$src2, $dst|$dst, $src2}",
1137 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1138 UNPCKL_shuffle_mask)))]>;
1140 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
1141 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1142 "punpckhbw {$src2, $dst|$dst, $src2}",
1144 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1145 UNPCKH_shuffle_mask)))]>;
1146 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
1147 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1148 "punpckhbw {$src2, $dst|$dst, $src2}",
1150 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1151 UNPCKH_shuffle_mask)))]>;
1152 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
1153 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1154 "punpckhwd {$src2, $dst|$dst, $src2}",
1156 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1157 UNPCKH_shuffle_mask)))]>;
1158 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1159 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1160 "punpckhwd {$src2, $dst|$dst, $src2}",
1162 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1163 UNPCKH_shuffle_mask)))]>;
1164 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1165 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1166 "punpckhdq {$src2, $dst|$dst, $src2}",
1168 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1169 UNPCKH_shuffle_mask)))]>;
1170 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1171 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1172 "punpckhdq {$src2, $dst|$dst, $src2}",
1174 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1175 UNPCKH_shuffle_mask)))]>;
1176 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1177 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1178 "punpckhdq {$src2, $dst|$dst, $src2}",
1180 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1181 UNPCKH_shuffle_mask)))]>;
1182 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1183 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1184 "punpckhqdq {$src2, $dst|$dst, $src2}",
1186 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1187 UNPCKH_shuffle_mask)))]>;
1190 //===----------------------------------------------------------------------===//
1191 // Miscellaneous Instructions
1192 //===----------------------------------------------------------------------===//
1194 // Prefetching loads
1195 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1196 "prefetcht0 $src", []>, TB,
1197 Requires<[HasSSE1]>;
1198 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1199 "prefetcht0 $src", []>, TB,
1200 Requires<[HasSSE1]>;
1201 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1202 "prefetcht0 $src", []>, TB,
1203 Requires<[HasSSE1]>;
1204 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1205 "prefetcht0 $src", []>, TB,
1206 Requires<[HasSSE1]>;
1208 // Non-temporal stores
1209 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1210 "movntq {$src, $dst|$dst, $src}", []>, TB,
1211 Requires<[HasSSE1]>;
1212 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1213 "movntps {$src, $dst|$dst, $src}", []>, TB,
1214 Requires<[HasSSE1]>;
1215 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1216 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1217 Requires<[HasSSE1]>;
1220 def SFENCE : I<0xAE, MRM7m, (ops),
1221 "sfence", []>, TB, Requires<[HasSSE1]>;
1223 // Load MXCSR register
1224 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1225 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1227 //===----------------------------------------------------------------------===//
1228 // Alias Instructions
1229 //===----------------------------------------------------------------------===//
1231 // Alias instructions that map zero vector to pxor / xorp* for sse.
1232 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1233 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1235 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1236 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1238 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1239 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1241 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1243 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1244 "pcmpeqd $dst, $dst",
1245 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1247 // Scalar to 128-bit vector with zero extension.
1248 // Three operand (but two address) aliases.
1249 let isTwoAddress = 1 in {
1250 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1251 "movss {$src2, $dst|$dst, $src2}", []>;
1252 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1253 "movsd {$src2, $dst|$dst, $src2}", []>;
1254 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1255 "movd {$src2, $dst|$dst, $src2}", []>;
1256 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1257 "movq {$src2, $dst|$dst, $src2}", []>;
1260 // Loading from memory automatically zeroing upper bits.
1261 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1262 "movss {$src, $dst|$dst, $src}",
1264 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1265 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1266 "movsd {$src, $dst|$dst, $src}",
1268 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1269 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1270 "movd {$src, $dst|$dst, $src}",
1272 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1274 //===----------------------------------------------------------------------===//
1275 // Non-Instruction Patterns
1276 //===----------------------------------------------------------------------===//
1278 // 128-bit vector undef's.
1279 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1280 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1281 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1282 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1283 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1285 // 128-bit vector all zero's.
1286 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1287 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1288 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1290 // 128-bit vector all one's.
1291 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1292 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1293 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1294 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1295 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1297 // Load 128-bit integer vector values.
1298 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1299 Requires<[HasSSE2]>;
1300 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1301 Requires<[HasSSE2]>;
1302 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1303 Requires<[HasSSE2]>;
1304 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1305 Requires<[HasSSE2]>;
1307 // Store 128-bit integer vector values.
1308 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1309 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1310 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1311 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1312 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1313 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1314 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1315 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1317 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1319 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1320 Requires<[HasSSE2]>;
1321 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1322 Requires<[HasSSE2]>;
1325 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1326 Requires<[HasSSE2]>;
1327 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1328 Requires<[HasSSE2]>;
1330 // Zeroing a VR128 then do a MOVS* to the lower bits.
1331 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1332 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1333 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1334 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1335 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1336 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1337 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1338 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1339 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1340 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1341 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1342 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1344 // Splat v4f32 / v4i32
1345 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1346 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1347 Requires<[HasSSE1]>;
1348 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1349 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1350 Requires<[HasSSE2]>;
1352 // Splat v2f64 / v2i64
1353 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1354 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1355 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1356 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1358 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
1359 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1360 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1361 Requires<[HasSSE2]>;
1362 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1363 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1364 Requires<[HasSSE2]>;