1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
49 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
51 def fp32imm0 : PatLeaf<(f32 fpimm), [{
52 return N->isExactlyValue(+0.0);
55 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
57 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
58 return getI8Imm(X86::getShuffleSHUFImmediate(N));
61 def SSE_splat_mask : PatLeaf<(build_vector), [{
62 return X86::isSplatMask(N);
63 }], SHUFFLE_get_shuf_imm>;
65 def MOVLHPS_shuffle_mask : PatLeaf<(build_vector), [{
66 return X86::isMOVLHPSMask(N);
69 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
70 return X86::isMOVHLPSMask(N);
73 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
74 return X86::isUNPCKLMask(N);
77 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
78 return X86::isUNPCKHMask(N);
81 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
82 return X86::isPSHUFDMask(N);
83 }], SHUFFLE_get_shuf_imm>;
85 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
86 return X86::isSHUFPMask(N);
87 }], SHUFFLE_get_shuf_imm>;
89 // Only use SHUFP for v4i32 if no other options are available.
90 // FIXME: add tblgen hook to reduce the complexity of pattern.
91 def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{
92 return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N);
93 }], SHUFFLE_get_shuf_imm>;
95 //===----------------------------------------------------------------------===//
96 // SSE scalar FP Instructions
97 //===----------------------------------------------------------------------===//
99 // Instruction templates
100 // SSI - SSE1 instructions with XS prefix.
101 // SDI - SSE2 instructions with XD prefix.
102 // PSI - SSE1 instructions with TB prefix.
103 // PDI - SSE2 instructions with TB and OpSize prefixes.
104 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
105 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
106 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
107 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
108 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
109 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
110 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
111 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
112 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
113 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
114 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
115 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
116 let Pattern = pattern;
118 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
119 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
120 let Pattern = pattern;
123 // Some 'special' instructions
124 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
125 "#IMPLICIT_DEF $dst",
126 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
127 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
128 "#IMPLICIT_DEF $dst",
129 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
131 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
132 // scheduler into a branch sequence.
133 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
134 def CMOV_FR32 : I<0, Pseudo,
135 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
136 "#CMOV_FR32 PSEUDO!",
137 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
138 def CMOV_FR64 : I<0, Pseudo,
139 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
140 "#CMOV_FR64 PSEUDO!",
141 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
145 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
146 "movss {$src, $dst|$dst, $src}", []>;
147 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
148 "movss {$src, $dst|$dst, $src}",
149 [(set FR32:$dst, (loadf32 addr:$src))]>;
150 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
151 "movsd {$src, $dst|$dst, $src}", []>;
152 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
153 "movsd {$src, $dst|$dst, $src}",
154 [(set FR64:$dst, (loadf64 addr:$src))]>;
156 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
157 "movss {$src, $dst|$dst, $src}",
158 [(store FR32:$src, addr:$dst)]>;
159 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
160 "movsd {$src, $dst|$dst, $src}",
161 [(store FR64:$src, addr:$dst)]>;
163 // FR32 / FR64 to 128-bit vector conversion.
164 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
165 "movss {$src, $dst|$dst, $src}",
167 (v4f32 (scalar_to_vector FR32:$src)))]>;
168 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
169 "movss {$src, $dst|$dst, $src}",
171 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
172 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
173 "movsd {$src, $dst|$dst, $src}",
175 (v2f64 (scalar_to_vector FR64:$src)))]>;
176 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
177 "movsd {$src, $dst|$dst, $src}",
179 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
181 // Arithmetic instructions
182 let isTwoAddress = 1 in {
183 let isCommutable = 1 in {
184 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
185 "addss {$src2, $dst|$dst, $src2}",
186 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
187 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
188 "addsd {$src2, $dst|$dst, $src2}",
189 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
190 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
191 "mulss {$src2, $dst|$dst, $src2}",
192 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
193 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
194 "mulsd {$src2, $dst|$dst, $src2}",
195 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
198 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
199 "addss {$src2, $dst|$dst, $src2}",
200 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
201 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
202 "addsd {$src2, $dst|$dst, $src2}",
203 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
204 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
205 "mulss {$src2, $dst|$dst, $src2}",
206 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
207 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
208 "mulsd {$src2, $dst|$dst, $src2}",
209 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
211 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
212 "divss {$src2, $dst|$dst, $src2}",
213 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
214 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
215 "divss {$src2, $dst|$dst, $src2}",
216 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
217 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
218 "divsd {$src2, $dst|$dst, $src2}",
219 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
220 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
221 "divsd {$src2, $dst|$dst, $src2}",
222 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
224 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
225 "subss {$src2, $dst|$dst, $src2}",
226 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
227 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
228 "subss {$src2, $dst|$dst, $src2}",
229 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
230 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
231 "subsd {$src2, $dst|$dst, $src2}",
232 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
233 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
234 "subsd {$src2, $dst|$dst, $src2}",
235 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
238 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
239 "sqrtss {$src, $dst|$dst, $src}",
240 [(set FR32:$dst, (fsqrt FR32:$src))]>;
241 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
242 "sqrtss {$src, $dst|$dst, $src}",
243 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
244 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
245 "sqrtsd {$src, $dst|$dst, $src}",
246 [(set FR64:$dst, (fsqrt FR64:$src))]>;
247 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
248 "sqrtsd {$src, $dst|$dst, $src}",
249 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
251 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
252 "rsqrtss {$src, $dst|$dst, $src}", []>;
253 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
254 "rsqrtss {$src, $dst|$dst, $src}", []>;
255 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
256 "rcpss {$src, $dst|$dst, $src}", []>;
257 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
258 "rcpss {$src, $dst|$dst, $src}", []>;
260 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
261 "maxss {$src, $dst|$dst, $src}", []>;
262 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
263 "maxss {$src, $dst|$dst, $src}", []>;
264 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
265 "maxsd {$src, $dst|$dst, $src}", []>;
266 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
267 "maxsd {$src, $dst|$dst, $src}", []>;
268 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
269 "minss {$src, $dst|$dst, $src}", []>;
270 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
271 "minss {$src, $dst|$dst, $src}", []>;
272 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
273 "minsd {$src, $dst|$dst, $src}", []>;
274 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
275 "minsd {$src, $dst|$dst, $src}", []>;
278 // Aliases to match intrinsics which expect XMM operand(s).
279 let isTwoAddress = 1 in {
280 let isCommutable = 1 in {
281 def Int_ADDSSrr : SSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
283 "addss {$src2, $dst|$dst, $src2}",
284 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
286 def Int_ADDSDrr : SDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
288 "addsd {$src2, $dst|$dst, $src2}",
289 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
291 def Int_MULSSrr : SSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
293 "mulss {$src2, $dst|$dst, $src2}",
294 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
296 def Int_MULSDrr : SDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
298 "mulsd {$src2, $dst|$dst, $src2}",
299 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
303 def Int_ADDSSrm : SSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
305 "addss {$src2, $dst|$dst, $src2}",
306 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
307 (load addr:$src2)))]>;
308 def Int_ADDSDrm : SDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
310 "addsd {$src2, $dst|$dst, $src2}",
311 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
312 (load addr:$src2)))]>;
313 def Int_MULSSrm : SSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
315 "mulss {$src2, $dst|$dst, $src2}",
316 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
317 (load addr:$src2)))]>;
318 def Int_MULSDrm : SDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
320 "mulsd {$src2, $dst|$dst, $src2}",
321 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
322 (load addr:$src2)))]>;
324 def Int_DIVSSrr : SSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
325 "divss {$src2, $dst|$dst, $src2}",
326 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
328 def Int_DIVSSrm : SSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
329 "divss {$src2, $dst|$dst, $src2}",
330 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
331 (load addr:$src2)))]>;
332 def Int_DIVSDrr : SDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
333 "divsd {$src2, $dst|$dst, $src2}",
334 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
336 def Int_DIVSDrm : SDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
337 "divsd {$src2, $dst|$dst, $src2}",
338 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
339 (load addr:$src2)))]>;
341 def Int_SUBSSrr : SSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
342 "subss {$src2, $dst|$dst, $src2}",
343 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
345 def Int_SUBSSrm : SSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
346 "subss {$src2, $dst|$dst, $src2}",
347 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
348 (load addr:$src2)))]>;
349 def Int_SUBSDrr : SDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
350 "subsd {$src2, $dst|$dst, $src2}",
351 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
353 def Int_SUBSDrm : SDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
354 "subsd {$src2, $dst|$dst, $src2}",
355 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
356 (load addr:$src2)))]>;
359 def Int_SQRTSSrr : SSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
360 "sqrtss {$src, $dst|$dst, $src}",
361 [(set VR128:$dst, (int_x86_sse_sqrt_ss VR128:$src))]>;
362 def Int_SQRTSSrm : SSI<0x51, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
363 "sqrtss {$src, $dst|$dst, $src}",
364 [(set VR128:$dst, (int_x86_sse_sqrt_ss
365 (load addr:$src)))]>;
366 def Int_SQRTSDrr : SDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
367 "sqrtsd {$src, $dst|$dst, $src}",
368 [(set VR128:$dst, (int_x86_sse2_sqrt_sd VR128:$src))]>;
369 def Int_SQRTSDrm : SDI<0x51, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
370 "sqrtsd {$src, $dst|$dst, $src}",
371 [(set VR128:$dst, (int_x86_sse2_sqrt_sd
372 (load addr:$src)))]>;
374 def Int_RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
375 "rsqrtss {$src, $dst|$dst, $src}",
376 [(set VR128:$dst, (int_x86_sse_rsqrt_ss VR128:$src))]>;
377 def Int_RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
378 "rsqrtss {$src, $dst|$dst, $src}",
379 [(set VR128:$dst, (int_x86_sse_rsqrt_ss
380 (load addr:$src)))]>;
381 def Int_RCPSSrr : SSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
382 "rcpss {$src, $dst|$dst, $src}",
383 [(set VR128:$dst, (int_x86_sse_rcp_ss VR128:$src))]>;
384 def Int_RCPSSrm : SSI<0x53, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
385 "rcpss {$src, $dst|$dst, $src}",
386 [(set VR128:$dst, (int_x86_sse_rcp_ss
387 (load addr:$src)))]>;
389 let isTwoAddress = 1 in {
390 def Int_MAXSSrr : SSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
392 "maxss {$src2, $dst|$dst, $src2}",
393 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
395 def Int_MAXSSrm : SSI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
397 "maxss {$src2, $dst|$dst, $src2}",
398 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
399 (load addr:$src2)))]>;
400 def Int_MAXSDrr : SDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
402 "maxsd {$src2, $dst|$dst, $src2}",
403 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
405 def Int_MAXSDrm : SDI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
407 "maxsd {$src2, $dst|$dst, $src2}",
408 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
409 (load addr:$src2)))]>;
410 def Int_MINSSrr : SSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
412 "minss {$src2, $dst|$dst, $src2}",
413 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
415 def Int_MINSSrm : SSI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
417 "minss {$src2, $dst|$dst, $src2}",
418 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
419 (load addr:$src2)))]>;
420 def Int_MINSDrr : SDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
422 "minsd {$src2, $dst|$dst, $src2}",
423 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
425 def Int_MINSDrm : SDI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
427 "minsd {$src2, $dst|$dst, $src2}",
428 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
429 (load addr:$src2)))]>;
432 // Conversion instructions
433 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
434 "cvtss2si {$src, $dst|$dst, $src}", []>;
435 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
436 "cvtss2si {$src, $dst|$dst, $src}", []>;
438 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
439 "cvttss2si {$src, $dst|$dst, $src}",
440 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
441 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
442 "cvttss2si {$src, $dst|$dst, $src}",
443 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
444 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
445 "cvttsd2si {$src, $dst|$dst, $src}",
446 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
447 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
448 "cvttsd2si {$src, $dst|$dst, $src}",
449 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
450 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
451 "cvtsd2ss {$src, $dst|$dst, $src}",
452 [(set FR32:$dst, (fround FR64:$src))]>;
453 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
454 "cvtsd2ss {$src, $dst|$dst, $src}",
455 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
456 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
457 "cvtsi2ss {$src, $dst|$dst, $src}",
458 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
459 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
460 "cvtsi2ss {$src, $dst|$dst, $src}",
461 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
462 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
463 "cvtsi2sd {$src, $dst|$dst, $src}",
464 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
465 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
466 "cvtsi2sd {$src, $dst|$dst, $src}",
467 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
468 // SSE2 instructions with XS prefix
469 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
470 "cvtss2sd {$src, $dst|$dst, $src}",
471 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
473 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
474 "cvtss2sd {$src, $dst|$dst, $src}",
475 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
478 // Comparison instructions
479 let isTwoAddress = 1 in {
480 def CMPSSrr : SSI<0xC2, MRMSrcReg,
481 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
482 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
483 def CMPSSrm : SSI<0xC2, MRMSrcMem,
484 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
485 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
486 def CMPSDrr : SDI<0xC2, MRMSrcReg,
487 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
488 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
489 def CMPSDrm : SDI<0xC2, MRMSrcMem,
490 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
491 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
494 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
495 "ucomiss {$src2, $src1|$src1, $src2}",
496 [(X86cmp FR32:$src1, FR32:$src2)]>;
497 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
498 "ucomiss {$src2, $src1|$src1, $src2}",
499 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
500 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
501 "ucomisd {$src2, $src1|$src1, $src2}",
502 [(X86cmp FR64:$src1, FR64:$src2)]>;
503 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
504 "ucomisd {$src2, $src1|$src1, $src2}",
505 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
507 // Aliases of packed instructions for scalar use. These all have names that
510 // Alias instructions that map fld0 to pxor for sse.
511 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
512 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
513 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
514 Requires<[HasSSE1]>, TB, OpSize;
515 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
516 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
517 Requires<[HasSSE2]>, TB, OpSize;
519 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
520 // Upper bits are disregarded.
521 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
522 "movaps {$src, $dst|$dst, $src}", []>;
523 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
524 "movapd {$src, $dst|$dst, $src}", []>;
526 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
527 // Upper bits are disregarded.
528 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
529 "movaps {$src, $dst|$dst, $src}",
530 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
531 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
532 "movapd {$src, $dst|$dst, $src}",
533 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
535 // Alias bitwise logical operations using SSE logical ops on packed FP values.
536 let isTwoAddress = 1 in {
537 let isCommutable = 1 in {
538 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
539 "andps {$src2, $dst|$dst, $src2}",
540 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
541 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
542 "andpd {$src2, $dst|$dst, $src2}",
543 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
544 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
545 "orps {$src2, $dst|$dst, $src2}", []>;
546 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
547 "orpd {$src2, $dst|$dst, $src2}", []>;
548 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
549 "xorps {$src2, $dst|$dst, $src2}",
550 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
551 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
552 "xorpd {$src2, $dst|$dst, $src2}",
553 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
555 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
556 "andps {$src2, $dst|$dst, $src2}",
557 [(set FR32:$dst, (X86fand FR32:$src1,
558 (X86loadpf32 addr:$src2)))]>;
559 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
560 "andpd {$src2, $dst|$dst, $src2}",
561 [(set FR64:$dst, (X86fand FR64:$src1,
562 (X86loadpf64 addr:$src2)))]>;
563 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
564 "orps {$src2, $dst|$dst, $src2}", []>;
565 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
566 "orpd {$src2, $dst|$dst, $src2}", []>;
567 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
568 "xorps {$src2, $dst|$dst, $src2}",
569 [(set FR32:$dst, (X86fxor FR32:$src1,
570 (X86loadpf32 addr:$src2)))]>;
571 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
572 "xorpd {$src2, $dst|$dst, $src2}",
573 [(set FR64:$dst, (X86fxor FR64:$src1,
574 (X86loadpf64 addr:$src2)))]>;
576 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
577 "andnps {$src2, $dst|$dst, $src2}", []>;
578 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
579 "andnps {$src2, $dst|$dst, $src2}", []>;
580 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
581 "andnpd {$src2, $dst|$dst, $src2}", []>;
582 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
583 "andnpd {$src2, $dst|$dst, $src2}", []>;
586 //===----------------------------------------------------------------------===//
587 // SSE packed FP Instructions
588 //===----------------------------------------------------------------------===//
590 // Some 'special' instructions
591 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
592 "#IMPLICIT_DEF $dst",
593 [(set VR128:$dst, (v4f32 (undef)))]>,
597 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
598 "movaps {$src, $dst|$dst, $src}", []>;
599 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
600 "movaps {$src, $dst|$dst, $src}",
601 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
602 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
603 "movapd {$src, $dst|$dst, $src}", []>;
604 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
605 "movapd {$src, $dst|$dst, $src}",
606 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
608 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
609 "movaps {$src, $dst|$dst, $src}",
610 [(store (v4f32 VR128:$src), addr:$dst)]>;
611 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
612 "movapd {$src, $dst|$dst, $src}",
613 [(store (v2f64 VR128:$src), addr:$dst)]>;
615 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
616 "movups {$src, $dst|$dst, $src}", []>;
617 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
618 "movups {$src, $dst|$dst, $src}", []>;
619 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
620 "movups {$src, $dst|$dst, $src}", []>;
621 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
622 "movupd {$src, $dst|$dst, $src}", []>;
623 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
624 "movupd {$src, $dst|$dst, $src}", []>;
625 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
626 "movupd {$src, $dst|$dst, $src}", []>;
628 let isTwoAddress = 1 in {
629 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
630 "movlps {$src2, $dst|$dst, $src2}", []>;
631 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
632 "movlpd {$src2, $dst|$dst, $src2}", []>;
633 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
634 "movhps {$src2, $dst|$dst, $src2}", []>;
635 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
636 "movhpd {$src2, $dst|$dst, $src2}",
638 (v2f64 (vector_shuffle VR128:$src1,
639 (scalar_to_vector (loadf64 addr:$src2)),
640 UNPCKL_shuffle_mask)))]>;
643 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
644 "movlps {$src, $dst|$dst, $src}", []>;
645 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
646 "movlpd {$src, $dst|$dst, $src}", []>;
648 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
649 "movhps {$src, $dst|$dst, $src}", []>;
650 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
651 "movhpd {$src, $dst|$dst, $src}", []>;
653 let isTwoAddress = 1 in {
654 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
655 "movlhps {$src2, $dst|$dst, $src2}",
657 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
658 MOVLHPS_shuffle_mask)))]>;
660 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
661 "movhlps {$src2, $dst|$dst, $src2}",
663 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
664 MOVHLPS_shuffle_mask)))]>;
667 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
668 "movmskps {$src, $dst|$dst, $src}",
669 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
670 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
671 "movmskpd {$src, $dst|$dst, $src}",
672 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
674 // Conversion instructions
675 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
676 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
677 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
678 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
679 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
680 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
681 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
682 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
684 // SSE2 instructions without OpSize prefix
685 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
686 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
688 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
689 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
692 // SSE2 instructions with XS prefix
693 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
694 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
695 XS, Requires<[HasSSE2]>;
696 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
697 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
698 XS, Requires<[HasSSE2]>;
700 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
701 "cvtps2pi {$src, $dst|$dst, $src}", []>;
702 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
703 "cvtps2pi {$src, $dst|$dst, $src}", []>;
704 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
705 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
706 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
707 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
709 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
710 "cvtps2dq {$src, $dst|$dst, $src}", []>;
711 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
712 "cvtps2dq {$src, $dst|$dst, $src}", []>;
713 // SSE2 packed instructions with XD prefix
714 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
715 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
716 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
717 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
719 // SSE2 instructions without OpSize prefix
720 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
721 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
723 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
724 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
727 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
728 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
729 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
730 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
733 let isTwoAddress = 1 in {
734 let isCommutable = 1 in {
735 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
736 "addps {$src2, $dst|$dst, $src2}",
737 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
738 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
739 "addpd {$src2, $dst|$dst, $src2}",
740 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
741 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
742 "mulps {$src2, $dst|$dst, $src2}",
743 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
744 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
745 "mulpd {$src2, $dst|$dst, $src2}",
746 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
749 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
750 "addps {$src2, $dst|$dst, $src2}",
751 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
752 (load addr:$src2))))]>;
753 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
754 "addpd {$src2, $dst|$dst, $src2}",
755 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
756 (load addr:$src2))))]>;
757 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
758 "mulps {$src2, $dst|$dst, $src2}",
759 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
760 (load addr:$src2))))]>;
761 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
762 "mulpd {$src2, $dst|$dst, $src2}",
763 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
764 (load addr:$src2))))]>;
766 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
767 "divps {$src2, $dst|$dst, $src2}",
768 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
769 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
770 "divps {$src2, $dst|$dst, $src2}",
771 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
772 (load addr:$src2))))]>;
773 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
774 "divpd {$src2, $dst|$dst, $src2}",
775 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
776 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
777 "divpd {$src2, $dst|$dst, $src2}",
778 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
779 (load addr:$src2))))]>;
781 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
782 "subps {$src2, $dst|$dst, $src2}",
783 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
784 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
785 "subps {$src2, $dst|$dst, $src2}",
786 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
787 (load addr:$src2))))]>;
788 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
789 "subpd {$src2, $dst|$dst, $src2}",
790 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
791 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
792 "subpd {$src2, $dst|$dst, $src2}",
793 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
794 (load addr:$src2))))]>;
797 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
798 "sqrtps {$src, $dst|$dst, $src}",
799 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
800 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
801 "sqrtps {$src, $dst|$dst, $src}",
802 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
803 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
804 "sqrtpd {$src, $dst|$dst, $src}",
805 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
806 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
807 "sqrtpd {$src, $dst|$dst, $src}",
808 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
810 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
811 "rsqrtps {$src, $dst|$dst, $src}", []>;
812 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
813 "rsqrtps {$src, $dst|$dst, $src}", []>;
814 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
815 "rcpps {$src, $dst|$dst, $src}", []>;
816 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
817 "rcpps {$src, $dst|$dst, $src}", []>;
819 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
820 "maxps {$src, $dst|$dst, $src}", []>;
821 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
822 "maxps {$src, $dst|$dst, $src}", []>;
823 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
824 "maxpd {$src, $dst|$dst, $src}", []>;
825 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
826 "maxpd {$src, $dst|$dst, $src}", []>;
827 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
828 "minps {$src, $dst|$dst, $src}", []>;
829 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
830 "minps {$src, $dst|$dst, $src}", []>;
831 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
832 "minpd {$src, $dst|$dst, $src}", []>;
833 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
834 "minpd {$src, $dst|$dst, $src}", []>;
837 let isTwoAddress = 1 in {
838 let isCommutable = 1 in {
839 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
840 "andps {$src2, $dst|$dst, $src2}",
842 (and (bc_v4i32 (v4f32 VR128:$src1)),
843 (bc_v4i32 (v4f32 VR128:$src2))))]>;
844 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
845 "andpd {$src2, $dst|$dst, $src2}",
847 (and (bc_v2i64 (v2f64 VR128:$src1)),
848 (bc_v2i64 (v2f64 VR128:$src2))))]>;
849 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
850 "orps {$src2, $dst|$dst, $src2}",
852 (or (bc_v4i32 (v4f32 VR128:$src1)),
853 (bc_v4i32 (v4f32 VR128:$src2))))]>;
854 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
855 "orpd {$src2, $dst|$dst, $src2}",
857 (or (bc_v2i64 (v2f64 VR128:$src1)),
858 (bc_v2i64 (v2f64 VR128:$src2))))]>;
859 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
860 "xorps {$src2, $dst|$dst, $src2}",
862 (xor (bc_v4i32 (v4f32 VR128:$src1)),
863 (bc_v4i32 (v4f32 VR128:$src2))))]>;
864 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
865 "xorpd {$src2, $dst|$dst, $src2}",
867 (xor (bc_v2i64 (v2f64 VR128:$src1)),
868 (bc_v2i64 (v2f64 VR128:$src2))))]>;
870 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
871 "andps {$src2, $dst|$dst, $src2}",
873 (and (bc_v4i32 (v4f32 VR128:$src1)),
874 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
875 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
876 "andpd {$src2, $dst|$dst, $src2}",
878 (and (bc_v2i64 (v2f64 VR128:$src1)),
879 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
880 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
881 "orps {$src2, $dst|$dst, $src2}",
883 (or (bc_v4i32 (v4f32 VR128:$src1)),
884 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
885 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
886 "orpd {$src2, $dst|$dst, $src2}",
888 (or (bc_v2i64 (v2f64 VR128:$src1)),
889 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
890 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
891 "xorps {$src2, $dst|$dst, $src2}",
893 (xor (bc_v4i32 (v4f32 VR128:$src1)),
894 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
895 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
896 "xorpd {$src2, $dst|$dst, $src2}",
898 (xor (bc_v2i64 (v2f64 VR128:$src1)),
899 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
900 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
901 "andnps {$src2, $dst|$dst, $src2}",
903 (and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
904 (bc_v4i32 (v4f32 VR128:$src2))))]>;
905 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
906 "andnps {$src2, $dst|$dst, $src2}",
908 (and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
909 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
910 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
911 "andnpd {$src2, $dst|$dst, $src2}",
913 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
914 (bc_v2i64 (v2f64 VR128:$src2))))]>;
915 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
916 "andnpd {$src2, $dst|$dst, $src2}",
918 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
919 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
922 let isTwoAddress = 1 in {
923 def CMPPSrr : PSI<0xC2, MRMSrcReg,
924 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
925 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
926 def CMPPSrm : PSI<0xC2, MRMSrcMem,
927 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
928 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
929 def CMPPDrr : PDI<0xC2, MRMSrcReg,
930 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
931 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
932 def CMPPDrm : PDI<0xC2, MRMSrcMem,
933 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
934 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
937 // Shuffle and unpack instructions
938 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
939 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
940 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
941 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
942 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
943 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
944 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
945 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
946 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
947 [(set VR128:$dst, (v4i32 (vector_shuffle
948 VR128:$src1, (undef),
949 PSHUFD_shuffle_mask:$src2)))]>;
950 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
951 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
952 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
953 [(set VR128:$dst, (v4i32 (vector_shuffle
954 (load addr:$src1), (undef),
955 PSHUFD_shuffle_mask:$src2)))]>;
957 let isTwoAddress = 1 in {
958 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
959 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
960 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
961 [(set VR128:$dst, (v4f32 (vector_shuffle
962 VR128:$src1, VR128:$src2,
963 SHUFP_shuffle_mask:$src3)))]>;
964 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
965 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
966 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
967 [(set VR128:$dst, (v4f32 (vector_shuffle
968 VR128:$src1, (load addr:$src2),
969 SHUFP_shuffle_mask:$src3)))]>;
970 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
971 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
972 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
973 [(set VR128:$dst, (v2f64 (vector_shuffle
974 VR128:$src1, VR128:$src2,
975 SHUFP_shuffle_mask:$src3)))]>;
976 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
977 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
978 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
979 [(set VR128:$dst, (v2f64 (vector_shuffle
980 VR128:$src1, (load addr:$src2),
981 SHUFP_shuffle_mask:$src3)))]>;
983 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
984 (ops VR128:$dst, VR128:$src1, VR128:$src2),
985 "unpckhps {$src2, $dst|$dst, $src2}",
986 [(set VR128:$dst, (v4f32 (vector_shuffle
987 VR128:$src1, VR128:$src2,
988 UNPCKH_shuffle_mask)))]>;
989 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
990 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
991 "unpckhps {$src2, $dst|$dst, $src2}",
992 [(set VR128:$dst, (v4f32 (vector_shuffle
993 VR128:$src1, (load addr:$src2),
994 UNPCKH_shuffle_mask)))]>;
995 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
996 (ops VR128:$dst, VR128:$src1, VR128:$src2),
997 "unpckhpd {$src2, $dst|$dst, $src2}",
998 [(set VR128:$dst, (v2f64 (vector_shuffle
999 VR128:$src1, VR128:$src2,
1000 UNPCKH_shuffle_mask)))]>;
1001 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
1002 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1003 "unpckhpd {$src2, $dst|$dst, $src2}",
1004 [(set VR128:$dst, (v2f64 (vector_shuffle
1005 VR128:$src1, (load addr:$src2),
1006 UNPCKH_shuffle_mask)))]>;
1008 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
1009 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1010 "unpcklps {$src2, $dst|$dst, $src2}",
1011 [(set VR128:$dst, (v4f32 (vector_shuffle
1012 VR128:$src1, VR128:$src2,
1013 UNPCKL_shuffle_mask)))]>;
1014 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
1015 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1016 "unpcklps {$src2, $dst|$dst, $src2}",
1017 [(set VR128:$dst, (v4f32 (vector_shuffle
1018 VR128:$src1, (load addr:$src2),
1019 UNPCKL_shuffle_mask)))]>;
1020 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
1021 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1022 "unpcklpd {$src2, $dst|$dst, $src2}",
1023 [(set VR128:$dst, (v2f64 (vector_shuffle
1024 VR128:$src1, VR128:$src2,
1025 UNPCKL_shuffle_mask)))]>;
1026 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
1027 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1028 "unpcklpd {$src2, $dst|$dst, $src2}",
1029 [(set VR128:$dst, (v2f64 (vector_shuffle
1030 VR128:$src1, (load addr:$src2),
1031 UNPCKL_shuffle_mask)))]>;
1034 //===----------------------------------------------------------------------===//
1035 // SSE integer instructions
1036 //===----------------------------------------------------------------------===//
1038 // Move Instructions
1039 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
1040 "movd {$src, $dst|$dst, $src}",
1042 (v4i32 (scalar_to_vector R32:$src)))]>;
1043 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1044 "movd {$src, $dst|$dst, $src}",
1046 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
1048 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
1049 "movd {$src, $dst|$dst, $src}", []>;
1051 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
1052 "movdqa {$src, $dst|$dst, $src}", []>;
1053 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
1054 "movdqa {$src, $dst|$dst, $src}",
1055 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
1056 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1057 "movdqa {$src, $dst|$dst, $src}",
1058 [(store (v4i32 VR128:$src), addr:$dst)]>;
1060 // SSE2 instructions with XS prefix
1061 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
1062 "movq {$src, $dst|$dst, $src}",
1064 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
1065 Requires<[HasSSE2]>;
1066 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
1067 "movq {$src, $dst|$dst, $src}", []>, XS,
1068 Requires<[HasSSE2]>;
1069 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
1070 "movq {$src, $dst|$dst, $src}", []>;
1072 // 128-bit Integer Arithmetic
1073 let isTwoAddress = 1 in {
1074 let isCommutable = 1 in {
1075 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1076 "paddb {$src2, $dst|$dst, $src2}",
1077 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
1078 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1079 "paddw {$src2, $dst|$dst, $src2}",
1080 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
1081 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1082 "paddd {$src2, $dst|$dst, $src2}",
1083 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
1085 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1086 "paddb {$src2, $dst|$dst, $src2}",
1087 [(set VR128:$dst, (v16i8 (add VR128:$src1,
1088 (load addr:$src2))))]>;
1089 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1090 "paddw {$src2, $dst|$dst, $src2}",
1091 [(set VR128:$dst, (v8i16 (add VR128:$src1,
1092 (load addr:$src2))))]>;
1093 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1094 "paddd {$src2, $dst|$dst, $src2}",
1095 [(set VR128:$dst, (v4i32 (add VR128:$src1,
1096 (load addr:$src2))))]>;
1098 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1099 "psubb {$src2, $dst|$dst, $src2}",
1100 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
1101 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1102 "psubw {$src2, $dst|$dst, $src2}",
1103 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
1104 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1105 "psubd {$src2, $dst|$dst, $src2}",
1106 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
1108 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1109 "psubb {$src2, $dst|$dst, $src2}",
1110 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
1111 (load addr:$src2))))]>;
1112 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1113 "psubw {$src2, $dst|$dst, $src2}",
1114 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
1115 (load addr:$src2))))]>;
1116 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1117 "psubd {$src2, $dst|$dst, $src2}",
1118 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
1119 (load addr:$src2))))]>;
1121 // Unpack and interleave
1122 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
1123 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1124 "punpcklbw {$src2, $dst|$dst, $src2}",
1126 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1127 UNPCKL_shuffle_mask)))]>;
1128 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
1129 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1130 "punpcklbw {$src2, $dst|$dst, $src2}",
1132 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1133 UNPCKL_shuffle_mask)))]>;
1134 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
1135 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1136 "punpcklwd {$src2, $dst|$dst, $src2}",
1138 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1139 UNPCKL_shuffle_mask)))]>;
1140 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
1141 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1142 "punpcklwd {$src2, $dst|$dst, $src2}",
1144 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1145 UNPCKL_shuffle_mask)))]>;
1146 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
1147 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1148 "punpckldq {$src2, $dst|$dst, $src2}",
1150 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1151 UNPCKL_shuffle_mask)))]>;
1152 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
1153 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1154 "punpckldq {$src2, $dst|$dst, $src2}",
1156 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1157 UNPCKL_shuffle_mask)))]>;
1158 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
1159 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1160 "punpcklqdq {$src2, $dst|$dst, $src2}",
1162 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1163 UNPCKL_shuffle_mask)))]>;
1164 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
1165 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1166 "punpcklqdq {$src2, $dst|$dst, $src2}",
1168 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1169 UNPCKL_shuffle_mask)))]>;
1171 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
1172 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1173 "punpckhbw {$src2, $dst|$dst, $src2}",
1175 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1176 UNPCKH_shuffle_mask)))]>;
1177 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
1178 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1179 "punpckhbw {$src2, $dst|$dst, $src2}",
1181 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1182 UNPCKH_shuffle_mask)))]>;
1183 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
1184 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1185 "punpckhwd {$src2, $dst|$dst, $src2}",
1187 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1188 UNPCKH_shuffle_mask)))]>;
1189 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1190 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1191 "punpckhwd {$src2, $dst|$dst, $src2}",
1193 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1194 UNPCKH_shuffle_mask)))]>;
1195 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1196 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1197 "punpckhdq {$src2, $dst|$dst, $src2}",
1199 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1200 UNPCKH_shuffle_mask)))]>;
1201 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1202 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1203 "punpckhdq {$src2, $dst|$dst, $src2}",
1205 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1206 UNPCKH_shuffle_mask)))]>;
1207 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1208 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1209 "punpckhdq {$src2, $dst|$dst, $src2}",
1211 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1212 UNPCKH_shuffle_mask)))]>;
1213 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1214 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1215 "punpckhqdq {$src2, $dst|$dst, $src2}",
1217 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1218 UNPCKH_shuffle_mask)))]>;
1221 //===----------------------------------------------------------------------===//
1222 // Miscellaneous Instructions
1223 //===----------------------------------------------------------------------===//
1225 // Prefetching loads
1226 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1227 "prefetcht0 $src", []>, TB,
1228 Requires<[HasSSE1]>;
1229 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1230 "prefetcht0 $src", []>, TB,
1231 Requires<[HasSSE1]>;
1232 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1233 "prefetcht0 $src", []>, TB,
1234 Requires<[HasSSE1]>;
1235 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1236 "prefetcht0 $src", []>, TB,
1237 Requires<[HasSSE1]>;
1239 // Non-temporal stores
1240 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1241 "movntq {$src, $dst|$dst, $src}", []>, TB,
1242 Requires<[HasSSE1]>;
1243 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1244 "movntps {$src, $dst|$dst, $src}", []>, TB,
1245 Requires<[HasSSE1]>;
1246 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1247 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1248 Requires<[HasSSE1]>;
1251 def SFENCE : I<0xAE, MRM7m, (ops),
1252 "sfence", []>, TB, Requires<[HasSSE1]>;
1254 // Load MXCSR register
1255 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1256 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1258 //===----------------------------------------------------------------------===//
1259 // Alias Instructions
1260 //===----------------------------------------------------------------------===//
1262 // Alias instructions that map zero vector to pxor / xorp* for sse.
1263 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1264 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1266 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1267 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1269 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1270 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1272 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1274 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1275 "pcmpeqd $dst, $dst",
1276 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1278 // Scalar to 128-bit vector with zero extension.
1279 // Three operand (but two address) aliases.
1280 let isTwoAddress = 1 in {
1281 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1282 "movss {$src2, $dst|$dst, $src2}", []>;
1283 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1284 "movsd {$src2, $dst|$dst, $src2}", []>;
1285 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1286 "movd {$src2, $dst|$dst, $src2}", []>;
1287 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1288 "movq {$src2, $dst|$dst, $src2}", []>;
1291 // Loading from memory automatically zeroing upper bits.
1292 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1293 "movss {$src, $dst|$dst, $src}",
1295 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1296 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1297 "movsd {$src, $dst|$dst, $src}",
1299 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1300 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1301 "movd {$src, $dst|$dst, $src}",
1303 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1305 //===----------------------------------------------------------------------===//
1306 // Non-Instruction Patterns
1307 //===----------------------------------------------------------------------===//
1309 // 128-bit vector undef's.
1310 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1311 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1312 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1313 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1314 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1316 // 128-bit vector all zero's.
1317 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1318 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1319 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1321 // 128-bit vector all one's.
1322 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1323 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1324 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1325 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1326 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1328 // Load 128-bit integer vector values.
1329 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1330 Requires<[HasSSE2]>;
1331 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1332 Requires<[HasSSE2]>;
1333 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1334 Requires<[HasSSE2]>;
1335 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1336 Requires<[HasSSE2]>;
1338 // Store 128-bit integer vector values.
1339 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1340 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1341 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1342 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1343 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1344 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1345 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1346 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1348 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1350 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1351 Requires<[HasSSE2]>;
1352 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1353 Requires<[HasSSE2]>;
1356 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
1357 Requires<[HasSSE2]>;
1358 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1359 Requires<[HasSSE2]>;
1360 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1361 Requires<[HasSSE2]>;
1363 // Zeroing a VR128 then do a MOVS* to the lower bits.
1364 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1365 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1366 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1367 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1368 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1369 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1370 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1371 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1372 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1373 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1374 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1375 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1377 // Splat v2f64 / v2i64
1378 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_mask:$sm),
1379 (v2f64 (UNPCKLPDrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1380 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_mask:$sm),
1381 (v2i64 (PUNPCKLQDQrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1384 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
1385 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
1386 Requires<[HasSSE1]>;
1388 // Shuffle v4i32 if others do not match
1389 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
1390 SHUFP_shuffle_mask:$sm),
1391 (v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
1392 SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
1393 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
1394 SHUFP_shuffle_mask:$sm),
1395 (v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
1396 SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;