1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
49 def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
50 def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
51 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
53 def fp32imm0 : PatLeaf<(f32 fpimm), [{
54 return N->isExactlyValue(+0.0);
57 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
59 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
60 return getI8Imm(X86::getShuffleSHUFImmediate(N));
63 // SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
65 def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
66 return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
69 // SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
71 def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
72 return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
75 def SSE_splat_mask : PatLeaf<(build_vector), [{
76 return X86::isSplatMask(N);
77 }], SHUFFLE_get_shuf_imm>;
79 def MOVLHPS_shuffle_mask : PatLeaf<(build_vector), [{
80 return X86::isMOVLHPSMask(N);
83 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
84 return X86::isMOVHLPSMask(N);
87 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
88 return X86::isUNPCKLMask(N);
91 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
92 return X86::isUNPCKHMask(N);
95 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
96 return X86::isPSHUFDMask(N);
97 }], SHUFFLE_get_shuf_imm>;
99 def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
100 return X86::isPSHUFHWMask(N);
101 }], SHUFFLE_get_pshufhw_imm>;
103 def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
104 return X86::isPSHUFLWMask(N);
105 }], SHUFFLE_get_pshuflw_imm>;
107 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
108 return X86::isSHUFPMask(N);
109 }], SHUFFLE_get_shuf_imm>;
111 // Only use SHUFP for v4i32 if no other options are available.
112 // FIXME: add tblgen hook to reduce the complexity of pattern.
113 def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{
114 return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N);
115 }], SHUFFLE_get_shuf_imm>;
117 //===----------------------------------------------------------------------===//
118 // SSE scalar FP Instructions
119 //===----------------------------------------------------------------------===//
121 // Instruction templates
122 // SSI - SSE1 instructions with XS prefix.
123 // SDI - SSE2 instructions with XD prefix.
124 // PSI - SSE1 instructions with TB prefix.
125 // PDI - SSE2 instructions with TB and OpSize prefixes.
126 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
127 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
128 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
129 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
130 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
131 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
132 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
133 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
134 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
135 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
136 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
137 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
138 let Pattern = pattern;
140 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
141 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
142 let Pattern = pattern;
145 // Some 'special' instructions
146 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
147 "#IMPLICIT_DEF $dst",
148 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
149 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
150 "#IMPLICIT_DEF $dst",
151 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
153 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
154 // scheduler into a branch sequence.
155 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
156 def CMOV_FR32 : I<0, Pseudo,
157 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
158 "#CMOV_FR32 PSEUDO!",
159 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
160 def CMOV_FR64 : I<0, Pseudo,
161 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
162 "#CMOV_FR64 PSEUDO!",
163 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
167 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
168 "movss {$src, $dst|$dst, $src}", []>;
169 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
170 "movss {$src, $dst|$dst, $src}",
171 [(set FR32:$dst, (loadf32 addr:$src))]>;
172 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
173 "movsd {$src, $dst|$dst, $src}", []>;
174 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
175 "movsd {$src, $dst|$dst, $src}",
176 [(set FR64:$dst, (loadf64 addr:$src))]>;
178 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
179 "movss {$src, $dst|$dst, $src}",
180 [(store FR32:$src, addr:$dst)]>;
181 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
182 "movsd {$src, $dst|$dst, $src}",
183 [(store FR64:$src, addr:$dst)]>;
185 // FR32 / FR64 to 128-bit vector conversion.
186 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
187 "movss {$src, $dst|$dst, $src}",
189 (v4f32 (scalar_to_vector FR32:$src)))]>;
190 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
191 "movss {$src, $dst|$dst, $src}",
193 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
194 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
195 "movsd {$src, $dst|$dst, $src}",
197 (v2f64 (scalar_to_vector FR64:$src)))]>;
198 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
199 "movsd {$src, $dst|$dst, $src}",
201 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
203 // Arithmetic instructions
204 let isTwoAddress = 1 in {
205 let isCommutable = 1 in {
206 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
207 "addss {$src2, $dst|$dst, $src2}",
208 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
209 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
210 "addsd {$src2, $dst|$dst, $src2}",
211 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
212 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
213 "mulss {$src2, $dst|$dst, $src2}",
214 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
215 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
216 "mulsd {$src2, $dst|$dst, $src2}",
217 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
220 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
221 "addss {$src2, $dst|$dst, $src2}",
222 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
223 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
224 "addsd {$src2, $dst|$dst, $src2}",
225 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
226 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
227 "mulss {$src2, $dst|$dst, $src2}",
228 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
229 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
230 "mulsd {$src2, $dst|$dst, $src2}",
231 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
233 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
234 "divss {$src2, $dst|$dst, $src2}",
235 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
236 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
237 "divss {$src2, $dst|$dst, $src2}",
238 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
239 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
240 "divsd {$src2, $dst|$dst, $src2}",
241 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
242 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
243 "divsd {$src2, $dst|$dst, $src2}",
244 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
246 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
247 "subss {$src2, $dst|$dst, $src2}",
248 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
249 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
250 "subss {$src2, $dst|$dst, $src2}",
251 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
252 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
253 "subsd {$src2, $dst|$dst, $src2}",
254 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
255 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
256 "subsd {$src2, $dst|$dst, $src2}",
257 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
260 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
261 "sqrtss {$src, $dst|$dst, $src}",
262 [(set FR32:$dst, (fsqrt FR32:$src))]>;
263 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
264 "sqrtss {$src, $dst|$dst, $src}",
265 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
266 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
267 "sqrtsd {$src, $dst|$dst, $src}",
268 [(set FR64:$dst, (fsqrt FR64:$src))]>;
269 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
270 "sqrtsd {$src, $dst|$dst, $src}",
271 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
273 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
274 "rsqrtss {$src, $dst|$dst, $src}", []>;
275 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
276 "rsqrtss {$src, $dst|$dst, $src}", []>;
277 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
278 "rcpss {$src, $dst|$dst, $src}", []>;
279 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
280 "rcpss {$src, $dst|$dst, $src}", []>;
282 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
283 "maxss {$src, $dst|$dst, $src}", []>;
284 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
285 "maxss {$src, $dst|$dst, $src}", []>;
286 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
287 "maxsd {$src, $dst|$dst, $src}", []>;
288 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
289 "maxsd {$src, $dst|$dst, $src}", []>;
290 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
291 "minss {$src, $dst|$dst, $src}", []>;
292 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
293 "minss {$src, $dst|$dst, $src}", []>;
294 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
295 "minsd {$src, $dst|$dst, $src}", []>;
296 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
297 "minsd {$src, $dst|$dst, $src}", []>;
300 // Aliases to match intrinsics which expect XMM operand(s).
301 let isTwoAddress = 1 in {
302 let isCommutable = 1 in {
303 def Int_ADDSSrr : SSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
305 "addss {$src2, $dst|$dst, $src2}",
306 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
308 def Int_ADDSDrr : SDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
310 "addsd {$src2, $dst|$dst, $src2}",
311 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
313 def Int_MULSSrr : SSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
315 "mulss {$src2, $dst|$dst, $src2}",
316 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
318 def Int_MULSDrr : SDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
320 "mulsd {$src2, $dst|$dst, $src2}",
321 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
325 def Int_ADDSSrm : SSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
327 "addss {$src2, $dst|$dst, $src2}",
328 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
329 (load addr:$src2)))]>;
330 def Int_ADDSDrm : SDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
332 "addsd {$src2, $dst|$dst, $src2}",
333 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
334 (load addr:$src2)))]>;
335 def Int_MULSSrm : SSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
337 "mulss {$src2, $dst|$dst, $src2}",
338 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
339 (load addr:$src2)))]>;
340 def Int_MULSDrm : SDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
342 "mulsd {$src2, $dst|$dst, $src2}",
343 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
344 (load addr:$src2)))]>;
346 def Int_DIVSSrr : SSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
347 "divss {$src2, $dst|$dst, $src2}",
348 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
350 def Int_DIVSSrm : SSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
351 "divss {$src2, $dst|$dst, $src2}",
352 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
353 (load addr:$src2)))]>;
354 def Int_DIVSDrr : SDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
355 "divsd {$src2, $dst|$dst, $src2}",
356 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
358 def Int_DIVSDrm : SDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
359 "divsd {$src2, $dst|$dst, $src2}",
360 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
361 (load addr:$src2)))]>;
363 def Int_SUBSSrr : SSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
364 "subss {$src2, $dst|$dst, $src2}",
365 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
367 def Int_SUBSSrm : SSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
368 "subss {$src2, $dst|$dst, $src2}",
369 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
370 (load addr:$src2)))]>;
371 def Int_SUBSDrr : SDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
372 "subsd {$src2, $dst|$dst, $src2}",
373 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
375 def Int_SUBSDrm : SDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
376 "subsd {$src2, $dst|$dst, $src2}",
377 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
378 (load addr:$src2)))]>;
381 def Int_SQRTSSrr : SSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
382 "sqrtss {$src, $dst|$dst, $src}",
383 [(set VR128:$dst, (int_x86_sse_sqrt_ss VR128:$src))]>;
384 def Int_SQRTSSrm : SSI<0x51, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
385 "sqrtss {$src, $dst|$dst, $src}",
386 [(set VR128:$dst, (int_x86_sse_sqrt_ss
387 (load addr:$src)))]>;
388 def Int_SQRTSDrr : SDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
389 "sqrtsd {$src, $dst|$dst, $src}",
390 [(set VR128:$dst, (int_x86_sse2_sqrt_sd VR128:$src))]>;
391 def Int_SQRTSDrm : SDI<0x51, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
392 "sqrtsd {$src, $dst|$dst, $src}",
393 [(set VR128:$dst, (int_x86_sse2_sqrt_sd
394 (load addr:$src)))]>;
396 def Int_RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
397 "rsqrtss {$src, $dst|$dst, $src}",
398 [(set VR128:$dst, (int_x86_sse_rsqrt_ss VR128:$src))]>;
399 def Int_RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
400 "rsqrtss {$src, $dst|$dst, $src}",
401 [(set VR128:$dst, (int_x86_sse_rsqrt_ss
402 (load addr:$src)))]>;
403 def Int_RCPSSrr : SSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
404 "rcpss {$src, $dst|$dst, $src}",
405 [(set VR128:$dst, (int_x86_sse_rcp_ss VR128:$src))]>;
406 def Int_RCPSSrm : SSI<0x53, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
407 "rcpss {$src, $dst|$dst, $src}",
408 [(set VR128:$dst, (int_x86_sse_rcp_ss
409 (load addr:$src)))]>;
411 let isTwoAddress = 1 in {
412 def Int_MAXSSrr : SSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
414 "maxss {$src2, $dst|$dst, $src2}",
415 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
417 def Int_MAXSSrm : SSI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
419 "maxss {$src2, $dst|$dst, $src2}",
420 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
421 (load addr:$src2)))]>;
422 def Int_MAXSDrr : SDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
424 "maxsd {$src2, $dst|$dst, $src2}",
425 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
427 def Int_MAXSDrm : SDI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
429 "maxsd {$src2, $dst|$dst, $src2}",
430 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
431 (load addr:$src2)))]>;
432 def Int_MINSSrr : SSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
434 "minss {$src2, $dst|$dst, $src2}",
435 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
437 def Int_MINSSrm : SSI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
439 "minss {$src2, $dst|$dst, $src2}",
440 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
441 (load addr:$src2)))]>;
442 def Int_MINSDrr : SDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
444 "minsd {$src2, $dst|$dst, $src2}",
445 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
447 def Int_MINSDrm : SDI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
449 "minsd {$src2, $dst|$dst, $src2}",
450 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
451 (load addr:$src2)))]>;
454 // Conversion instructions
455 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
456 "cvtss2si {$src, $dst|$dst, $src}", []>;
457 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
458 "cvtss2si {$src, $dst|$dst, $src}", []>;
460 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
461 "cvttss2si {$src, $dst|$dst, $src}",
462 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
463 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
464 "cvttss2si {$src, $dst|$dst, $src}",
465 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
466 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
467 "cvttsd2si {$src, $dst|$dst, $src}",
468 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
469 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
470 "cvttsd2si {$src, $dst|$dst, $src}",
471 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
472 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
473 "cvtsd2ss {$src, $dst|$dst, $src}",
474 [(set FR32:$dst, (fround FR64:$src))]>;
475 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
476 "cvtsd2ss {$src, $dst|$dst, $src}",
477 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
478 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
479 "cvtsi2ss {$src, $dst|$dst, $src}",
480 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
481 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
482 "cvtsi2ss {$src, $dst|$dst, $src}",
483 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
484 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
485 "cvtsi2sd {$src, $dst|$dst, $src}",
486 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
487 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
488 "cvtsi2sd {$src, $dst|$dst, $src}",
489 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
490 // SSE2 instructions with XS prefix
491 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
492 "cvtss2sd {$src, $dst|$dst, $src}",
493 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
495 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
496 "cvtss2sd {$src, $dst|$dst, $src}",
497 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
500 // Comparison instructions
501 let isTwoAddress = 1 in {
502 def CMPSSrr : SSI<0xC2, MRMSrcReg,
503 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
504 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
505 def CMPSSrm : SSI<0xC2, MRMSrcMem,
506 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
507 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
508 def CMPSDrr : SDI<0xC2, MRMSrcReg,
509 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
510 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
511 def CMPSDrm : SDI<0xC2, MRMSrcMem,
512 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
513 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
516 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
517 "ucomiss {$src2, $src1|$src1, $src2}",
518 [(X86cmp FR32:$src1, FR32:$src2)]>;
519 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
520 "ucomiss {$src2, $src1|$src1, $src2}",
521 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
522 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
523 "ucomisd {$src2, $src1|$src1, $src2}",
524 [(X86cmp FR64:$src1, FR64:$src2)]>;
525 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
526 "ucomisd {$src2, $src1|$src1, $src2}",
527 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
529 // Aliases of packed instructions for scalar use. These all have names that
532 // Alias instructions that map fld0 to pxor for sse.
533 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
534 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
535 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
536 Requires<[HasSSE1]>, TB, OpSize;
537 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
538 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
539 Requires<[HasSSE2]>, TB, OpSize;
541 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
542 // Upper bits are disregarded.
543 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
544 "movaps {$src, $dst|$dst, $src}", []>;
545 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
546 "movapd {$src, $dst|$dst, $src}", []>;
548 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
549 // Upper bits are disregarded.
550 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
551 "movaps {$src, $dst|$dst, $src}",
552 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
553 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
554 "movapd {$src, $dst|$dst, $src}",
555 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
557 // Alias bitwise logical operations using SSE logical ops on packed FP values.
558 let isTwoAddress = 1 in {
559 let isCommutable = 1 in {
560 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
561 "andps {$src2, $dst|$dst, $src2}",
562 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
563 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
564 "andpd {$src2, $dst|$dst, $src2}",
565 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
566 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
567 "orps {$src2, $dst|$dst, $src2}", []>;
568 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
569 "orpd {$src2, $dst|$dst, $src2}", []>;
570 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
571 "xorps {$src2, $dst|$dst, $src2}",
572 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
573 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
574 "xorpd {$src2, $dst|$dst, $src2}",
575 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
577 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
578 "andps {$src2, $dst|$dst, $src2}",
579 [(set FR32:$dst, (X86fand FR32:$src1,
580 (X86loadpf32 addr:$src2)))]>;
581 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
582 "andpd {$src2, $dst|$dst, $src2}",
583 [(set FR64:$dst, (X86fand FR64:$src1,
584 (X86loadpf64 addr:$src2)))]>;
585 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
586 "orps {$src2, $dst|$dst, $src2}", []>;
587 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
588 "orpd {$src2, $dst|$dst, $src2}", []>;
589 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
590 "xorps {$src2, $dst|$dst, $src2}",
591 [(set FR32:$dst, (X86fxor FR32:$src1,
592 (X86loadpf32 addr:$src2)))]>;
593 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
594 "xorpd {$src2, $dst|$dst, $src2}",
595 [(set FR64:$dst, (X86fxor FR64:$src1,
596 (X86loadpf64 addr:$src2)))]>;
598 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
599 "andnps {$src2, $dst|$dst, $src2}", []>;
600 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
601 "andnps {$src2, $dst|$dst, $src2}", []>;
602 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
603 "andnpd {$src2, $dst|$dst, $src2}", []>;
604 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
605 "andnpd {$src2, $dst|$dst, $src2}", []>;
608 //===----------------------------------------------------------------------===//
609 // SSE packed FP Instructions
610 //===----------------------------------------------------------------------===//
612 // Some 'special' instructions
613 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
614 "#IMPLICIT_DEF $dst",
615 [(set VR128:$dst, (v4f32 (undef)))]>,
619 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
620 "movaps {$src, $dst|$dst, $src}", []>;
621 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
622 "movaps {$src, $dst|$dst, $src}",
623 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
624 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
625 "movapd {$src, $dst|$dst, $src}", []>;
626 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
627 "movapd {$src, $dst|$dst, $src}",
628 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
630 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
631 "movaps {$src, $dst|$dst, $src}",
632 [(store (v4f32 VR128:$src), addr:$dst)]>;
633 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
634 "movapd {$src, $dst|$dst, $src}",
635 [(store (v2f64 VR128:$src), addr:$dst)]>;
637 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
638 "movups {$src, $dst|$dst, $src}", []>;
639 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
640 "movups {$src, $dst|$dst, $src}", []>;
641 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
642 "movups {$src, $dst|$dst, $src}", []>;
643 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
644 "movupd {$src, $dst|$dst, $src}", []>;
645 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
646 "movupd {$src, $dst|$dst, $src}", []>;
647 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
648 "movupd {$src, $dst|$dst, $src}", []>;
650 let isTwoAddress = 1 in {
651 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
652 "movlps {$src2, $dst|$dst, $src2}", []>;
653 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
654 "movlpd {$src2, $dst|$dst, $src2}", []>;
655 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
656 "movhps {$src2, $dst|$dst, $src2}", []>;
657 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
658 "movhpd {$src2, $dst|$dst, $src2}",
660 (v2f64 (vector_shuffle VR128:$src1,
661 (scalar_to_vector (loadf64 addr:$src2)),
662 UNPCKL_shuffle_mask)))]>;
665 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
666 "movlps {$src, $dst|$dst, $src}", []>;
667 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
668 "movlpd {$src, $dst|$dst, $src}", []>;
670 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
671 "movhps {$src, $dst|$dst, $src}", []>;
672 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
673 "movhpd {$src, $dst|$dst, $src}", []>;
675 let isTwoAddress = 1 in {
676 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
677 "movlhps {$src2, $dst|$dst, $src2}",
679 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
680 MOVLHPS_shuffle_mask)))]>;
682 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
683 "movhlps {$src2, $dst|$dst, $src2}",
685 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
686 MOVHLPS_shuffle_mask)))]>;
689 // Conversion instructions
690 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
691 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
692 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
693 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
694 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
695 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
696 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
697 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
699 // SSE2 instructions without OpSize prefix
700 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
701 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
703 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
704 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
707 // SSE2 instructions with XS prefix
708 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
709 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
710 XS, Requires<[HasSSE2]>;
711 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
712 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
713 XS, Requires<[HasSSE2]>;
715 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
716 "cvtps2pi {$src, $dst|$dst, $src}", []>;
717 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
718 "cvtps2pi {$src, $dst|$dst, $src}", []>;
719 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
720 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
721 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
722 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
724 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
725 "cvtps2dq {$src, $dst|$dst, $src}", []>;
726 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
727 "cvtps2dq {$src, $dst|$dst, $src}", []>;
728 // SSE2 packed instructions with XD prefix
729 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
730 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
731 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
732 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
734 // SSE2 instructions without OpSize prefix
735 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
736 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
738 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
739 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
742 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
743 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
744 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
745 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
748 let isTwoAddress = 1 in {
749 let isCommutable = 1 in {
750 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
751 "addps {$src2, $dst|$dst, $src2}",
752 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
753 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
754 "addpd {$src2, $dst|$dst, $src2}",
755 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
756 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
757 "mulps {$src2, $dst|$dst, $src2}",
758 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
759 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
760 "mulpd {$src2, $dst|$dst, $src2}",
761 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
764 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
765 "addps {$src2, $dst|$dst, $src2}",
766 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
767 (load addr:$src2))))]>;
768 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
769 "addpd {$src2, $dst|$dst, $src2}",
770 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
771 (load addr:$src2))))]>;
772 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
773 "mulps {$src2, $dst|$dst, $src2}",
774 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
775 (load addr:$src2))))]>;
776 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
777 "mulpd {$src2, $dst|$dst, $src2}",
778 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
779 (load addr:$src2))))]>;
781 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
782 "divps {$src2, $dst|$dst, $src2}",
783 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
784 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
785 "divps {$src2, $dst|$dst, $src2}",
786 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
787 (load addr:$src2))))]>;
788 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
789 "divpd {$src2, $dst|$dst, $src2}",
790 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
791 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
792 "divpd {$src2, $dst|$dst, $src2}",
793 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
794 (load addr:$src2))))]>;
796 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
797 "subps {$src2, $dst|$dst, $src2}",
798 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
799 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
800 "subps {$src2, $dst|$dst, $src2}",
801 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
802 (load addr:$src2))))]>;
803 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
804 "subpd {$src2, $dst|$dst, $src2}",
805 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
806 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
807 "subpd {$src2, $dst|$dst, $src2}",
808 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
809 (load addr:$src2))))]>;
812 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
813 "sqrtps {$src, $dst|$dst, $src}",
814 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
815 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
816 "sqrtps {$src, $dst|$dst, $src}",
817 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
818 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
819 "sqrtpd {$src, $dst|$dst, $src}",
820 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
821 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
822 "sqrtpd {$src, $dst|$dst, $src}",
823 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
825 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
826 "rsqrtps {$src, $dst|$dst, $src}", []>;
827 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
828 "rsqrtps {$src, $dst|$dst, $src}", []>;
829 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
830 "rcpps {$src, $dst|$dst, $src}", []>;
831 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
832 "rcpps {$src, $dst|$dst, $src}", []>;
834 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
835 "maxps {$src, $dst|$dst, $src}", []>;
836 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
837 "maxps {$src, $dst|$dst, $src}", []>;
838 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
839 "maxpd {$src, $dst|$dst, $src}", []>;
840 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
841 "maxpd {$src, $dst|$dst, $src}", []>;
842 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
843 "minps {$src, $dst|$dst, $src}", []>;
844 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
845 "minps {$src, $dst|$dst, $src}", []>;
846 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
847 "minpd {$src, $dst|$dst, $src}", []>;
848 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
849 "minpd {$src, $dst|$dst, $src}", []>;
852 let isTwoAddress = 1 in {
853 let isCommutable = 1 in {
854 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
855 "andps {$src2, $dst|$dst, $src2}",
857 (and (bc_v4i32 (v4f32 VR128:$src1)),
858 (bc_v4i32 (v4f32 VR128:$src2))))]>;
859 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
860 "andpd {$src2, $dst|$dst, $src2}",
862 (and (bc_v2i64 (v2f64 VR128:$src1)),
863 (bc_v2i64 (v2f64 VR128:$src2))))]>;
864 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
865 "orps {$src2, $dst|$dst, $src2}",
867 (or (bc_v4i32 (v4f32 VR128:$src1)),
868 (bc_v4i32 (v4f32 VR128:$src2))))]>;
869 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
870 "orpd {$src2, $dst|$dst, $src2}",
872 (or (bc_v2i64 (v2f64 VR128:$src1)),
873 (bc_v2i64 (v2f64 VR128:$src2))))]>;
874 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
875 "xorps {$src2, $dst|$dst, $src2}",
877 (xor (bc_v4i32 (v4f32 VR128:$src1)),
878 (bc_v4i32 (v4f32 VR128:$src2))))]>;
879 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
880 "xorpd {$src2, $dst|$dst, $src2}",
882 (xor (bc_v2i64 (v2f64 VR128:$src1)),
883 (bc_v2i64 (v2f64 VR128:$src2))))]>;
885 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
886 "andps {$src2, $dst|$dst, $src2}",
888 (and (bc_v4i32 (v4f32 VR128:$src1)),
889 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
890 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
891 "andpd {$src2, $dst|$dst, $src2}",
893 (and (bc_v2i64 (v2f64 VR128:$src1)),
894 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
895 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
896 "orps {$src2, $dst|$dst, $src2}",
898 (or (bc_v4i32 (v4f32 VR128:$src1)),
899 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
900 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
901 "orpd {$src2, $dst|$dst, $src2}",
903 (or (bc_v2i64 (v2f64 VR128:$src1)),
904 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
905 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
906 "xorps {$src2, $dst|$dst, $src2}",
908 (xor (bc_v4i32 (v4f32 VR128:$src1)),
909 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
910 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
911 "xorpd {$src2, $dst|$dst, $src2}",
913 (xor (bc_v2i64 (v2f64 VR128:$src1)),
914 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
915 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
916 "andnps {$src2, $dst|$dst, $src2}",
918 (and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
919 (bc_v4i32 (v4f32 VR128:$src2))))]>;
920 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
921 "andnps {$src2, $dst|$dst, $src2}",
923 (and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
924 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
925 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
926 "andnpd {$src2, $dst|$dst, $src2}",
928 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
929 (bc_v2i64 (v2f64 VR128:$src2))))]>;
930 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
931 "andnpd {$src2, $dst|$dst, $src2}",
933 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
934 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
937 let isTwoAddress = 1 in {
938 def CMPPSrr : PSI<0xC2, MRMSrcReg,
939 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
940 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
941 def CMPPSrm : PSI<0xC2, MRMSrcMem,
942 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
943 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
944 def CMPPDrr : PDI<0xC2, MRMSrcReg,
945 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
946 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
947 def CMPPDrm : PDI<0xC2, MRMSrcMem,
948 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
949 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
952 // Shuffle and unpack instructions
953 let isTwoAddress = 1 in {
954 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
955 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
956 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
957 [(set VR128:$dst, (v4f32 (vector_shuffle
958 VR128:$src1, VR128:$src2,
959 SHUFP_shuffle_mask:$src3)))]>;
960 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
961 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
962 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
963 [(set VR128:$dst, (v4f32 (vector_shuffle
964 VR128:$src1, (load addr:$src2),
965 SHUFP_shuffle_mask:$src3)))]>;
966 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
967 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
968 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
969 [(set VR128:$dst, (v2f64 (vector_shuffle
970 VR128:$src1, VR128:$src2,
971 SHUFP_shuffle_mask:$src3)))]>;
972 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
973 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
974 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
975 [(set VR128:$dst, (v2f64 (vector_shuffle
976 VR128:$src1, (load addr:$src2),
977 SHUFP_shuffle_mask:$src3)))]>;
979 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
980 (ops VR128:$dst, VR128:$src1, VR128:$src2),
981 "unpckhps {$src2, $dst|$dst, $src2}",
982 [(set VR128:$dst, (v4f32 (vector_shuffle
983 VR128:$src1, VR128:$src2,
984 UNPCKH_shuffle_mask)))]>;
985 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
986 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
987 "unpckhps {$src2, $dst|$dst, $src2}",
988 [(set VR128:$dst, (v4f32 (vector_shuffle
989 VR128:$src1, (load addr:$src2),
990 UNPCKH_shuffle_mask)))]>;
991 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
992 (ops VR128:$dst, VR128:$src1, VR128:$src2),
993 "unpckhpd {$src2, $dst|$dst, $src2}",
994 [(set VR128:$dst, (v2f64 (vector_shuffle
995 VR128:$src1, VR128:$src2,
996 UNPCKH_shuffle_mask)))]>;
997 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
998 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
999 "unpckhpd {$src2, $dst|$dst, $src2}",
1000 [(set VR128:$dst, (v2f64 (vector_shuffle
1001 VR128:$src1, (load addr:$src2),
1002 UNPCKH_shuffle_mask)))]>;
1004 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
1005 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1006 "unpcklps {$src2, $dst|$dst, $src2}",
1007 [(set VR128:$dst, (v4f32 (vector_shuffle
1008 VR128:$src1, VR128:$src2,
1009 UNPCKL_shuffle_mask)))]>;
1010 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
1011 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1012 "unpcklps {$src2, $dst|$dst, $src2}",
1013 [(set VR128:$dst, (v4f32 (vector_shuffle
1014 VR128:$src1, (load addr:$src2),
1015 UNPCKL_shuffle_mask)))]>;
1016 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
1017 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1018 "unpcklpd {$src2, $dst|$dst, $src2}",
1019 [(set VR128:$dst, (v2f64 (vector_shuffle
1020 VR128:$src1, VR128:$src2,
1021 UNPCKL_shuffle_mask)))]>;
1022 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
1023 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1024 "unpcklpd {$src2, $dst|$dst, $src2}",
1025 [(set VR128:$dst, (v2f64 (vector_shuffle
1026 VR128:$src1, (load addr:$src2),
1027 UNPCKL_shuffle_mask)))]>;
1030 //===----------------------------------------------------------------------===//
1031 // SSE integer instructions
1032 //===----------------------------------------------------------------------===//
1034 // Move Instructions
1035 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
1036 "movd {$src, $dst|$dst, $src}",
1038 (v4i32 (scalar_to_vector R32:$src)))]>;
1039 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1040 "movd {$src, $dst|$dst, $src}",
1042 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
1044 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
1045 "movd {$src, $dst|$dst, $src}", []>;
1047 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
1048 "movdqa {$src, $dst|$dst, $src}", []>;
1049 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
1050 "movdqa {$src, $dst|$dst, $src}",
1051 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
1052 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1053 "movdqa {$src, $dst|$dst, $src}",
1054 [(store (v4i32 VR128:$src), addr:$dst)]>;
1056 // SSE2 instructions with XS prefix
1057 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
1058 "movq {$src, $dst|$dst, $src}",
1060 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
1061 Requires<[HasSSE2]>;
1062 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
1063 "movq {$src, $dst|$dst, $src}", []>, XS,
1064 Requires<[HasSSE2]>;
1065 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
1066 "movq {$src, $dst|$dst, $src}", []>;
1068 // 128-bit Integer Arithmetic
1069 let isTwoAddress = 1 in {
1070 let isCommutable = 1 in {
1071 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1072 "paddb {$src2, $dst|$dst, $src2}",
1073 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
1074 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1075 "paddw {$src2, $dst|$dst, $src2}",
1076 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
1077 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1078 "paddd {$src2, $dst|$dst, $src2}",
1079 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
1081 def PADDQrr : PDI<0xD4, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1082 "paddq {$src2, $dst|$dst, $src2}",
1083 [(set VR128:$dst, (v2i64 (add VR128:$src1, VR128:$src2)))]>;
1085 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1086 "paddb {$src2, $dst|$dst, $src2}",
1087 [(set VR128:$dst, (v16i8 (add VR128:$src1,
1088 (load addr:$src2))))]>;
1089 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1090 "paddw {$src2, $dst|$dst, $src2}",
1091 [(set VR128:$dst, (v8i16 (add VR128:$src1,
1092 (load addr:$src2))))]>;
1093 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1094 "paddd {$src2, $dst|$dst, $src2}",
1095 [(set VR128:$dst, (v4i32 (add VR128:$src1,
1096 (load addr:$src2))))]>;
1097 def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1098 "paddd {$src2, $dst|$dst, $src2}",
1099 [(set VR128:$dst, (v2i64 (add VR128:$src1,
1100 (load addr:$src2))))]>;
1102 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1103 "psubb {$src2, $dst|$dst, $src2}",
1104 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
1105 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1106 "psubw {$src2, $dst|$dst, $src2}",
1107 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
1108 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1109 "psubd {$src2, $dst|$dst, $src2}",
1110 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
1111 def PSUBQrr : PDI<0xFB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1112 "psubq {$src2, $dst|$dst, $src2}",
1113 [(set VR128:$dst, (v2i64 (sub VR128:$src1, VR128:$src2)))]>;
1115 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1116 "psubb {$src2, $dst|$dst, $src2}",
1117 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
1118 (load addr:$src2))))]>;
1119 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1120 "psubw {$src2, $dst|$dst, $src2}",
1121 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
1122 (load addr:$src2))))]>;
1123 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1124 "psubd {$src2, $dst|$dst, $src2}",
1125 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
1126 (load addr:$src2))))]>;
1127 def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1128 "psubd {$src2, $dst|$dst, $src2}",
1129 [(set VR128:$dst, (v2i64 (sub VR128:$src1,
1130 (load addr:$src2))))]>;
1134 let isTwoAddress = 1 in {
1135 let isCommutable = 1 in {
1136 def PANDrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1137 "pand {$src2, $dst|$dst, $src2}",
1138 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
1140 def PANDrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1141 "pand {$src2, $dst|$dst, $src2}",
1142 [(set VR128:$dst, (v2i64 (and VR128:$src1,
1143 (load addr:$src2))))]>;
1144 def PORrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1145 "por {$src2, $dst|$dst, $src2}",
1146 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
1148 def PORrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1149 "por {$src2, $dst|$dst, $src2}",
1150 [(set VR128:$dst, (v2i64 (or VR128:$src1,
1151 (load addr:$src2))))]>;
1152 def PXORrr : PDI<0xEF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1153 "pxor {$src2, $dst|$dst, $src2}",
1154 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
1156 def PXORrm : PDI<0xEF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1157 "pxor {$src2, $dst|$dst, $src2}",
1158 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
1159 (load addr:$src2))))]>;
1162 def PANDNrr : PDI<0xDF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1163 "pandn {$src2, $dst|$dst, $src2}",
1164 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1167 def PANDNrm : PDI<0xDF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1168 "pandn {$src2, $dst|$dst, $src2}",
1169 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1170 (load addr:$src2))))]>;
1173 // Pack instructions
1174 let isTwoAddress = 1 in {
1175 def PACKSSWBrr : PDI<0x63, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1177 "packsswb {$src2, $dst|$dst, $src2}",
1178 [(set VR128:$dst, (v8i16 (int_x86_sse2_packsswb_128
1181 def PACKSSWBrm : PDI<0x63, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
1183 "packsswb {$src2, $dst|$dst, $src2}",
1184 [(set VR128:$dst, (v8i16 (int_x86_sse2_packsswb_128
1186 (bc_v8i16 (loadv2f64 addr:$src2)))))]>;
1187 def PACKSSDWrr : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1189 "packssdw {$src2, $dst|$dst, $src2}",
1190 [(set VR128:$dst, (v4i32 (int_x86_sse2_packssdw_128
1193 def PACKSSDWrm : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1195 "packssdw {$src2, $dst|$dst, $src2}",
1196 [(set VR128:$dst, (v4i32 (int_x86_sse2_packssdw_128
1198 (bc_v4i32 (loadv2i64 addr:$src2)))))]>;
1199 def PACKUSWBrr : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1201 "packuswb {$src2, $dst|$dst, $src2}",
1202 [(set VR128:$dst, (v8i16 (int_x86_sse2_packuswb_128
1205 def PACKUSWBrm : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1207 "packuswb {$src2, $dst|$dst, $src2}",
1208 [(set VR128:$dst, (v8i16 (int_x86_sse2_packuswb_128
1210 (bc_v8i16 (loadv2i64 addr:$src2)))))]>;
1213 // Shuffle and unpack instructions
1214 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
1215 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
1216 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
1217 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
1218 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
1219 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
1221 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
1222 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
1223 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
1224 [(set VR128:$dst, (v4i32 (vector_shuffle
1225 VR128:$src1, (undef),
1226 PSHUFD_shuffle_mask:$src2)))]>;
1227 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
1228 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
1229 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
1230 [(set VR128:$dst, (v4i32 (vector_shuffle
1231 (load addr:$src1), (undef),
1232 PSHUFD_shuffle_mask:$src2)))]>;
1234 // SSE2 with ImmT == Imm8 and XS prefix.
1235 def PSHUFHWrr : Ii8<0x70, MRMDestReg,
1236 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
1237 "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
1238 [(set VR128:$dst, (v8i16 (vector_shuffle
1239 VR128:$src1, (undef),
1240 PSHUFHW_shuffle_mask:$src2)))]>,
1241 XS, Requires<[HasSSE2]>;
1242 def PSHUFHWrm : Ii8<0x70, MRMDestMem,
1243 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
1244 "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
1245 [(set VR128:$dst, (v8i16 (vector_shuffle
1246 (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
1247 PSHUFHW_shuffle_mask:$src2)))]>,
1248 XS, Requires<[HasSSE2]>;
1250 // SSE2 with ImmT == Imm8 and XD prefix.
1251 def PSHUFLWrr : Ii8<0x70, MRMDestReg,
1252 (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
1253 "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
1254 [(set VR128:$dst, (v8i16 (vector_shuffle
1255 VR128:$src1, (undef),
1256 PSHUFLW_shuffle_mask:$src2)))]>,
1257 XD, Requires<[HasSSE2]>;
1258 def PSHUFLWrm : Ii8<0x70, MRMDestMem,
1259 (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
1260 "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
1261 [(set VR128:$dst, (v8i16 (vector_shuffle
1262 (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
1263 PSHUFLW_shuffle_mask:$src2)))]>,
1264 XD, Requires<[HasSSE2]>;
1266 let isTwoAddress = 1 in {
1267 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
1268 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1269 "punpcklbw {$src2, $dst|$dst, $src2}",
1271 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1272 UNPCKL_shuffle_mask)))]>;
1273 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
1274 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1275 "punpcklbw {$src2, $dst|$dst, $src2}",
1277 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1278 UNPCKL_shuffle_mask)))]>;
1279 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
1280 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1281 "punpcklwd {$src2, $dst|$dst, $src2}",
1283 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1284 UNPCKL_shuffle_mask)))]>;
1285 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
1286 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1287 "punpcklwd {$src2, $dst|$dst, $src2}",
1289 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1290 UNPCKL_shuffle_mask)))]>;
1291 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
1292 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1293 "punpckldq {$src2, $dst|$dst, $src2}",
1295 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1296 UNPCKL_shuffle_mask)))]>;
1297 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
1298 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1299 "punpckldq {$src2, $dst|$dst, $src2}",
1301 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1302 UNPCKL_shuffle_mask)))]>;
1303 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
1304 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1305 "punpcklqdq {$src2, $dst|$dst, $src2}",
1307 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1308 UNPCKL_shuffle_mask)))]>;
1309 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
1310 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1311 "punpcklqdq {$src2, $dst|$dst, $src2}",
1313 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1314 UNPCKL_shuffle_mask)))]>;
1316 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
1317 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1318 "punpckhbw {$src2, $dst|$dst, $src2}",
1320 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1321 UNPCKH_shuffle_mask)))]>;
1322 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
1323 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1324 "punpckhbw {$src2, $dst|$dst, $src2}",
1326 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1327 UNPCKH_shuffle_mask)))]>;
1328 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
1329 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1330 "punpckhwd {$src2, $dst|$dst, $src2}",
1332 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1333 UNPCKH_shuffle_mask)))]>;
1334 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1335 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1336 "punpckhwd {$src2, $dst|$dst, $src2}",
1338 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1339 UNPCKH_shuffle_mask)))]>;
1340 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1341 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1342 "punpckhdq {$src2, $dst|$dst, $src2}",
1344 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1345 UNPCKH_shuffle_mask)))]>;
1346 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1347 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1348 "punpckhdq {$src2, $dst|$dst, $src2}",
1350 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1351 UNPCKH_shuffle_mask)))]>;
1352 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1353 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1354 "punpckhdq {$src2, $dst|$dst, $src2}",
1356 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1357 UNPCKH_shuffle_mask)))]>;
1358 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1359 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1360 "punpckhqdq {$src2, $dst|$dst, $src2}",
1362 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1363 UNPCKH_shuffle_mask)))]>;
1366 //===----------------------------------------------------------------------===//
1367 // Miscellaneous Instructions
1368 //===----------------------------------------------------------------------===//
1371 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
1372 "movmskps {$src, $dst|$dst, $src}",
1373 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
1374 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
1375 "movmskpd {$src, $dst|$dst, $src}",
1376 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
1378 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (ops R32:$dst, VR128:$src),
1379 "pmovmskb {$src, $dst|$dst, $src}",
1380 [(set R32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
1382 // Prefetching loads
1383 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1384 "prefetcht0 $src", []>, TB,
1385 Requires<[HasSSE1]>;
1386 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1387 "prefetcht0 $src", []>, TB,
1388 Requires<[HasSSE1]>;
1389 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1390 "prefetcht0 $src", []>, TB,
1391 Requires<[HasSSE1]>;
1392 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1393 "prefetcht0 $src", []>, TB,
1394 Requires<[HasSSE1]>;
1396 // Non-temporal stores
1397 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1398 "movntq {$src, $dst|$dst, $src}", []>, TB,
1399 Requires<[HasSSE1]>;
1400 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1401 "movntps {$src, $dst|$dst, $src}", []>, TB,
1402 Requires<[HasSSE1]>;
1403 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1404 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1405 Requires<[HasSSE1]>;
1408 def SFENCE : I<0xAE, MRM7m, (ops),
1409 "sfence", []>, TB, Requires<[HasSSE1]>;
1411 // Load MXCSR register
1412 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1413 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1415 //===----------------------------------------------------------------------===//
1416 // Alias Instructions
1417 //===----------------------------------------------------------------------===//
1419 // Alias instructions that map zero vector to pxor / xorp* for sse.
1420 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1421 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1423 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1424 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1426 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1427 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1429 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1431 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1432 "pcmpeqd $dst, $dst",
1433 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1435 // Scalar to 128-bit vector with zero extension.
1436 // Three operand (but two address) aliases.
1437 let isTwoAddress = 1 in {
1438 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1439 "movss {$src2, $dst|$dst, $src2}", []>;
1440 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1441 "movsd {$src2, $dst|$dst, $src2}", []>;
1442 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1443 "movd {$src2, $dst|$dst, $src2}", []>;
1444 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1445 "movq {$src2, $dst|$dst, $src2}", []>;
1448 // Loading from memory automatically zeroing upper bits.
1449 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1450 "movss {$src, $dst|$dst, $src}",
1452 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1453 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1454 "movsd {$src, $dst|$dst, $src}",
1456 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1457 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1458 "movd {$src, $dst|$dst, $src}",
1460 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1462 //===----------------------------------------------------------------------===//
1463 // Non-Instruction Patterns
1464 //===----------------------------------------------------------------------===//
1466 // 128-bit vector undef's.
1467 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1468 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1469 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1470 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1471 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1473 // 128-bit vector all zero's.
1474 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1475 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1476 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1478 // 128-bit vector all one's.
1479 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1480 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1481 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1482 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1483 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1485 // Load 128-bit integer vector values.
1486 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1487 Requires<[HasSSE2]>;
1488 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1489 Requires<[HasSSE2]>;
1490 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1491 Requires<[HasSSE2]>;
1492 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1493 Requires<[HasSSE2]>;
1495 // Store 128-bit integer vector values.
1496 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1497 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1498 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1499 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1500 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1501 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1502 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1503 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1505 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1507 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1508 Requires<[HasSSE2]>;
1509 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1510 Requires<[HasSSE2]>;
1513 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
1514 Requires<[HasSSE2]>;
1515 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>,
1516 Requires<[HasSSE2]>;
1517 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>,
1518 Requires<[HasSSE2]>;
1519 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
1520 Requires<[HasSSE2]>;
1521 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
1522 Requires<[HasSSE2]>;
1523 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
1524 Requires<[HasSSE2]>;
1525 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
1526 Requires<[HasSSE2]>;
1527 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
1528 Requires<[HasSSE2]>;
1529 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
1530 Requires<[HasSSE2]>;
1531 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
1532 Requires<[HasSSE2]>;
1533 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
1534 Requires<[HasSSE2]>;
1535 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
1536 Requires<[HasSSE2]>;
1538 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1539 Requires<[HasSSE2]>;
1540 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1541 Requires<[HasSSE2]>;
1543 // Zeroing a VR128 then do a MOVS* to the lower bits.
1544 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1545 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1546 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1547 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1548 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1549 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1550 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1551 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1552 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1553 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1554 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1555 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1557 // Splat v2f64 / v2i64
1558 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_mask:$sm),
1559 (v2f64 (UNPCKLPDrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1560 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_mask:$sm),
1561 (v2i64 (PUNPCKLQDQrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1564 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
1565 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
1566 Requires<[HasSSE1]>;
1568 // Shuffle v4i32 if others do not match
1569 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
1570 SHUFP_shuffle_mask:$sm),
1571 (v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
1572 SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
1573 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
1574 SHUFP_shuffle_mask:$sm),
1575 (v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
1576 SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;