1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def fp32imm0 : PatLeaf<(f32 fpimm), [{
49 return N->isExactlyValue(+0.0);
52 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
54 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
55 return getI8Imm(X86::getShuffleSHUFImmediate(N));
58 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
60 }], SHUFFLE_get_shuf_imm>;
62 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
63 return X86::isSplatMask(N);
66 def MOVLHPS_shuffle_mask : PatLeaf<(build_vector), [{
67 return X86::isMOVLHPSMask(N);
70 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
71 return X86::isMOVHLPSMask(N);
74 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
75 return X86::isUNPCKLMask(N);
78 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
79 return X86::isUNPCKHMask(N);
82 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
83 return X86::isPSHUFDMask(N);
84 }], SHUFFLE_get_shuf_imm>;
86 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
87 return X86::isSHUFPMask(N);
88 }], SHUFFLE_get_shuf_imm>;
90 //===----------------------------------------------------------------------===//
91 // SSE scalar FP Instructions
92 //===----------------------------------------------------------------------===//
94 // Instruction templates
95 // SSI - SSE1 instructions with XS prefix.
96 // SDI - SSE2 instructions with XD prefix.
97 // PSI - SSE1 instructions with TB prefix.
98 // PDI - SSE2 instructions with TB and OpSize prefixes.
99 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
100 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
101 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
102 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
103 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
104 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
105 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
106 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
107 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
108 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
109 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
110 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
111 let Pattern = pattern;
113 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
114 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
115 let Pattern = pattern;
118 // Some 'special' instructions
119 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
120 "#IMPLICIT_DEF $dst",
121 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
122 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
123 "#IMPLICIT_DEF $dst",
124 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
126 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
127 // scheduler into a branch sequence.
128 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
129 def CMOV_FR32 : I<0, Pseudo,
130 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
131 "#CMOV_FR32 PSEUDO!",
132 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
133 def CMOV_FR64 : I<0, Pseudo,
134 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
135 "#CMOV_FR64 PSEUDO!",
136 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
140 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
141 "movss {$src, $dst|$dst, $src}", []>;
142 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
143 "movss {$src, $dst|$dst, $src}",
144 [(set FR32:$dst, (loadf32 addr:$src))]>;
145 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
146 "movsd {$src, $dst|$dst, $src}", []>;
147 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
148 "movsd {$src, $dst|$dst, $src}",
149 [(set FR64:$dst, (loadf64 addr:$src))]>;
151 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
152 "movss {$src, $dst|$dst, $src}",
153 [(store FR32:$src, addr:$dst)]>;
154 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
155 "movsd {$src, $dst|$dst, $src}",
156 [(store FR64:$src, addr:$dst)]>;
158 // FR32 / FR64 to 128-bit vector conversion.
159 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
160 "movss {$src, $dst|$dst, $src}",
162 (v4f32 (scalar_to_vector FR32:$src)))]>;
163 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
164 "movss {$src, $dst|$dst, $src}",
166 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
167 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
168 "movsd {$src, $dst|$dst, $src}",
170 (v2f64 (scalar_to_vector FR64:$src)))]>;
171 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
172 "movsd {$src, $dst|$dst, $src}",
174 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
176 // Arithmetic instructions
177 let isTwoAddress = 1 in {
178 let isCommutable = 1 in {
179 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
180 "addss {$src2, $dst|$dst, $src2}",
181 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
182 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
183 "addsd {$src2, $dst|$dst, $src2}",
184 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
185 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
186 "mulss {$src2, $dst|$dst, $src2}",
187 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
188 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
189 "mulsd {$src2, $dst|$dst, $src2}",
190 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
193 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
194 "addss {$src2, $dst|$dst, $src2}",
195 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
196 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
197 "addsd {$src2, $dst|$dst, $src2}",
198 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
199 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
200 "mulss {$src2, $dst|$dst, $src2}",
201 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
202 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
203 "mulsd {$src2, $dst|$dst, $src2}",
204 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
206 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
207 "divss {$src2, $dst|$dst, $src2}",
208 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
209 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
210 "divss {$src2, $dst|$dst, $src2}",
211 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
212 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
213 "divsd {$src2, $dst|$dst, $src2}",
214 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
215 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
216 "divsd {$src2, $dst|$dst, $src2}",
217 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
219 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
220 "subss {$src2, $dst|$dst, $src2}",
221 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
222 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
223 "subss {$src2, $dst|$dst, $src2}",
224 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
225 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
226 "subsd {$src2, $dst|$dst, $src2}",
227 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
228 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
229 "subsd {$src2, $dst|$dst, $src2}",
230 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
233 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
234 "sqrtss {$src, $dst|$dst, $src}",
235 [(set FR32:$dst, (fsqrt FR32:$src))]>;
236 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
237 "sqrtss {$src, $dst|$dst, $src}",
238 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
239 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
240 "sqrtsd {$src, $dst|$dst, $src}",
241 [(set FR64:$dst, (fsqrt FR64:$src))]>;
242 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
243 "sqrtsd {$src, $dst|$dst, $src}",
244 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
246 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
247 "rsqrtss {$src, $dst|$dst, $src}", []>;
248 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
249 "rsqrtss {$src, $dst|$dst, $src}", []>;
250 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
251 "rcpss {$src, $dst|$dst, $src}", []>;
252 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
253 "rcpss {$src, $dst|$dst, $src}", []>;
255 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
256 "maxss {$src, $dst|$dst, $src}", []>;
257 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
258 "maxss {$src, $dst|$dst, $src}", []>;
259 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
260 "maxsd {$src, $dst|$dst, $src}", []>;
261 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
262 "maxsd {$src, $dst|$dst, $src}", []>;
263 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
264 "minss {$src, $dst|$dst, $src}", []>;
265 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
266 "minss {$src, $dst|$dst, $src}", []>;
267 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
268 "minsd {$src, $dst|$dst, $src}", []>;
269 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
270 "minsd {$src, $dst|$dst, $src}", []>;
273 // Aliases to match intrinsics which expect XMM operand(s).
274 let isTwoAddress = 1 in {
275 let isCommutable = 1 in {
276 def Int_ADDSSrr : SSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
278 "addss {$src2, $dst|$dst, $src2}",
279 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
281 def Int_ADDSDrr : SDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
283 "addsd {$src2, $dst|$dst, $src2}",
284 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
286 def Int_MULSSrr : SSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
288 "mulss {$src2, $dst|$dst, $src2}",
289 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
291 def Int_MULSDrr : SDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
293 "mulsd {$src2, $dst|$dst, $src2}",
294 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
298 def Int_ADDSSrm : SSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
300 "addss {$src2, $dst|$dst, $src2}",
301 [(set VR128:$dst, (int_x86_sse_add_ss VR128:$src1,
302 (load addr:$src2)))]>;
303 def Int_ADDSDrm : SDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
305 "addsd {$src2, $dst|$dst, $src2}",
306 [(set VR128:$dst, (int_x86_sse2_add_sd VR128:$src1,
307 (load addr:$src2)))]>;
308 def Int_MULSSrm : SSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
310 "mulss {$src2, $dst|$dst, $src2}",
311 [(set VR128:$dst, (int_x86_sse_mul_ss VR128:$src1,
312 (load addr:$src2)))]>;
313 def Int_MULSDrm : SDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
315 "mulsd {$src2, $dst|$dst, $src2}",
316 [(set VR128:$dst, (int_x86_sse2_mul_sd VR128:$src1,
317 (load addr:$src2)))]>;
319 def Int_DIVSSrr : SSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
320 "divss {$src2, $dst|$dst, $src2}",
321 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
323 def Int_DIVSSrm : SSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
324 "divss {$src2, $dst|$dst, $src2}",
325 [(set VR128:$dst, (int_x86_sse_div_ss VR128:$src1,
326 (load addr:$src2)))]>;
327 def Int_DIVSDrr : SDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
328 "divsd {$src2, $dst|$dst, $src2}",
329 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
331 def Int_DIVSDrm : SDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
332 "divsd {$src2, $dst|$dst, $src2}",
333 [(set VR128:$dst, (int_x86_sse2_div_sd VR128:$src1,
334 (load addr:$src2)))]>;
336 def Int_SUBSSrr : SSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
337 "subss {$src2, $dst|$dst, $src2}",
338 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
340 def Int_SUBSSrm : SSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
341 "subss {$src2, $dst|$dst, $src2}",
342 [(set VR128:$dst, (int_x86_sse_sub_ss VR128:$src1,
343 (load addr:$src2)))]>;
344 def Int_SUBSDrr : SDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
345 "subsd {$src2, $dst|$dst, $src2}",
346 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
348 def Int_SUBSDrm : SDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
349 "subsd {$src2, $dst|$dst, $src2}",
350 [(set VR128:$dst, (int_x86_sse2_sub_sd VR128:$src1,
351 (load addr:$src2)))]>;
354 def Int_SQRTSSrr : SSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
355 "sqrtss {$src, $dst|$dst, $src}",
356 [(set VR128:$dst, (int_x86_sse_sqrt_ss VR128:$src))]>;
357 def Int_SQRTSSrm : SSI<0x51, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
358 "sqrtss {$src, $dst|$dst, $src}",
359 [(set VR128:$dst, (int_x86_sse_sqrt_ss
360 (load addr:$src)))]>;
361 def Int_SQRTSDrr : SDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
362 "sqrtsd {$src, $dst|$dst, $src}",
363 [(set VR128:$dst, (int_x86_sse2_sqrt_sd VR128:$src))]>;
364 def Int_SQRTSDrm : SDI<0x51, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
365 "sqrtsd {$src, $dst|$dst, $src}",
366 [(set VR128:$dst, (int_x86_sse2_sqrt_sd
367 (load addr:$src)))]>;
369 def Int_RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
370 "rsqrtss {$src, $dst|$dst, $src}",
371 [(set VR128:$dst, (int_x86_sse_rsqrt_ss VR128:$src))]>;
372 def Int_RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
373 "rsqrtss {$src, $dst|$dst, $src}",
374 [(set VR128:$dst, (int_x86_sse_rsqrt_ss
375 (load addr:$src)))]>;
376 def Int_RCPSSrr : SSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
377 "rcpss {$src, $dst|$dst, $src}",
378 [(set VR128:$dst, (int_x86_sse_rcp_ss VR128:$src))]>;
379 def Int_RCPSSrm : SSI<0x53, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
380 "rcpss {$src, $dst|$dst, $src}",
381 [(set VR128:$dst, (int_x86_sse_rcp_ss
382 (load addr:$src)))]>;
384 let isTwoAddress = 1 in {
385 def Int_MAXSSrr : SSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
387 "maxss {$src2, $dst|$dst, $src2}",
388 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
390 def Int_MAXSSrm : SSI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
392 "maxss {$src2, $dst|$dst, $src2}",
393 [(set VR128:$dst, (int_x86_sse_max_ss VR128:$src1,
394 (load addr:$src2)))]>;
395 def Int_MAXSDrr : SDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
397 "maxsd {$src2, $dst|$dst, $src2}",
398 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
400 def Int_MAXSDrm : SDI<0x5F, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
402 "maxsd {$src2, $dst|$dst, $src2}",
403 [(set VR128:$dst, (int_x86_sse2_max_sd VR128:$src1,
404 (load addr:$src2)))]>;
405 def Int_MINSSrr : SSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
407 "minss {$src2, $dst|$dst, $src2}",
408 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
410 def Int_MINSSrm : SSI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
412 "minss {$src2, $dst|$dst, $src2}",
413 [(set VR128:$dst, (int_x86_sse_min_ss VR128:$src1,
414 (load addr:$src2)))]>;
415 def Int_MINSDrr : SDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
417 "minsd {$src2, $dst|$dst, $src2}",
418 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
420 def Int_MINSDrm : SDI<0x5D, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
422 "minsd {$src2, $dst|$dst, $src2}",
423 [(set VR128:$dst, (int_x86_sse2_min_sd VR128:$src1,
424 (load addr:$src2)))]>;
427 // Conversion instructions
428 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
429 "cvtss2si {$src, $dst|$dst, $src}", []>;
430 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
431 "cvtss2si {$src, $dst|$dst, $src}", []>;
433 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
434 "cvttss2si {$src, $dst|$dst, $src}",
435 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
436 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
437 "cvttss2si {$src, $dst|$dst, $src}",
438 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
439 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
440 "cvttsd2si {$src, $dst|$dst, $src}",
441 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
442 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
443 "cvttsd2si {$src, $dst|$dst, $src}",
444 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
445 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
446 "cvtsd2ss {$src, $dst|$dst, $src}",
447 [(set FR32:$dst, (fround FR64:$src))]>;
448 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
449 "cvtsd2ss {$src, $dst|$dst, $src}",
450 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
451 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
452 "cvtsi2ss {$src, $dst|$dst, $src}",
453 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
454 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
455 "cvtsi2ss {$src, $dst|$dst, $src}",
456 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
457 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
458 "cvtsi2sd {$src, $dst|$dst, $src}",
459 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
460 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
461 "cvtsi2sd {$src, $dst|$dst, $src}",
462 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
463 // SSE2 instructions with XS prefix
464 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
465 "cvtss2sd {$src, $dst|$dst, $src}",
466 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
468 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
469 "cvtss2sd {$src, $dst|$dst, $src}",
470 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
473 // Comparison instructions
474 let isTwoAddress = 1 in {
475 def CMPSSrr : SSI<0xC2, MRMSrcReg,
476 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
477 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
478 def CMPSSrm : SSI<0xC2, MRMSrcMem,
479 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
480 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
481 def CMPSDrr : SDI<0xC2, MRMSrcReg,
482 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
483 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
484 def CMPSDrm : SDI<0xC2, MRMSrcMem,
485 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
486 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
489 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
490 "ucomiss {$src2, $src1|$src1, $src2}",
491 [(X86cmp FR32:$src1, FR32:$src2)]>;
492 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
493 "ucomiss {$src2, $src1|$src1, $src2}",
494 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
495 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
496 "ucomisd {$src2, $src1|$src1, $src2}",
497 [(X86cmp FR64:$src1, FR64:$src2)]>;
498 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
499 "ucomisd {$src2, $src1|$src1, $src2}",
500 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
502 // Aliases of packed instructions for scalar use. These all have names that
505 // Alias instructions that map fld0 to pxor for sse.
506 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
507 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
508 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
509 Requires<[HasSSE1]>, TB, OpSize;
510 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
511 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
512 Requires<[HasSSE2]>, TB, OpSize;
514 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
515 // Upper bits are disregarded.
516 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
517 "movaps {$src, $dst|$dst, $src}", []>;
518 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
519 "movapd {$src, $dst|$dst, $src}", []>;
521 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
522 // Upper bits are disregarded.
523 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
524 "movaps {$src, $dst|$dst, $src}",
525 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
526 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
527 "movapd {$src, $dst|$dst, $src}",
528 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
530 // Alias bitwise logical operations using SSE logical ops on packed FP values.
531 let isTwoAddress = 1 in {
532 let isCommutable = 1 in {
533 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
534 "andps {$src2, $dst|$dst, $src2}",
535 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
536 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
537 "andpd {$src2, $dst|$dst, $src2}",
538 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
539 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
540 "orps {$src2, $dst|$dst, $src2}", []>;
541 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
542 "orpd {$src2, $dst|$dst, $src2}", []>;
543 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
544 "xorps {$src2, $dst|$dst, $src2}",
545 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
546 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
547 "xorpd {$src2, $dst|$dst, $src2}",
548 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
550 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
551 "andps {$src2, $dst|$dst, $src2}",
552 [(set FR32:$dst, (X86fand FR32:$src1,
553 (X86loadpf32 addr:$src2)))]>;
554 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
555 "andpd {$src2, $dst|$dst, $src2}",
556 [(set FR64:$dst, (X86fand FR64:$src1,
557 (X86loadpf64 addr:$src2)))]>;
558 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
559 "orps {$src2, $dst|$dst, $src2}", []>;
560 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
561 "orpd {$src2, $dst|$dst, $src2}", []>;
562 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
563 "xorps {$src2, $dst|$dst, $src2}",
564 [(set FR32:$dst, (X86fxor FR32:$src1,
565 (X86loadpf32 addr:$src2)))]>;
566 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
567 "xorpd {$src2, $dst|$dst, $src2}",
568 [(set FR64:$dst, (X86fxor FR64:$src1,
569 (X86loadpf64 addr:$src2)))]>;
571 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
572 "andnps {$src2, $dst|$dst, $src2}", []>;
573 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
574 "andnps {$src2, $dst|$dst, $src2}", []>;
575 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
576 "andnpd {$src2, $dst|$dst, $src2}", []>;
577 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
578 "andnpd {$src2, $dst|$dst, $src2}", []>;
581 //===----------------------------------------------------------------------===//
582 // SSE packed FP Instructions
583 //===----------------------------------------------------------------------===//
585 // Some 'special' instructions
586 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
587 "#IMPLICIT_DEF $dst",
588 [(set VR128:$dst, (v4f32 (undef)))]>,
592 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
593 "movaps {$src, $dst|$dst, $src}", []>;
594 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
595 "movaps {$src, $dst|$dst, $src}",
596 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
597 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
598 "movapd {$src, $dst|$dst, $src}", []>;
599 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
600 "movapd {$src, $dst|$dst, $src}",
601 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
603 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
604 "movaps {$src, $dst|$dst, $src}",
605 [(store (v4f32 VR128:$src), addr:$dst)]>;
606 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
607 "movapd {$src, $dst|$dst, $src}",
608 [(store (v2f64 VR128:$src), addr:$dst)]>;
610 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
611 "movups {$src, $dst|$dst, $src}", []>;
612 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
613 "movups {$src, $dst|$dst, $src}", []>;
614 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
615 "movups {$src, $dst|$dst, $src}", []>;
616 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
617 "movupd {$src, $dst|$dst, $src}", []>;
618 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
619 "movupd {$src, $dst|$dst, $src}", []>;
620 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
621 "movupd {$src, $dst|$dst, $src}", []>;
623 let isTwoAddress = 1 in {
624 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
625 "movlps {$src2, $dst|$dst, $src2}", []>;
626 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
627 "movlpd {$src2, $dst|$dst, $src2}", []>;
628 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
629 "movhps {$src2, $dst|$dst, $src2}", []>;
630 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
631 "movhpd {$src2, $dst|$dst, $src2}",
633 (v2f64 (vector_shuffle VR128:$src1,
634 (scalar_to_vector (loadf64 addr:$src2)),
635 UNPCKL_shuffle_mask)))]>;
638 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
639 "movlps {$src, $dst|$dst, $src}", []>;
640 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
641 "movlpd {$src, $dst|$dst, $src}", []>;
643 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
644 "movhps {$src, $dst|$dst, $src}", []>;
645 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
646 "movhpd {$src, $dst|$dst, $src}", []>;
648 let isTwoAddress = 1 in {
649 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
650 "movlhps {$src2, $dst|$dst, $src2}",
652 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
653 MOVLHPS_shuffle_mask)))]>;
655 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
656 "movhlps {$src2, $dst|$dst, $src2}",
658 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
659 MOVHLPS_shuffle_mask)))]>;
662 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
663 "movmskps {$src, $dst|$dst, $src}",
664 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
665 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
666 "movmskpd {$src, $dst|$dst, $src}",
667 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
669 // Conversion instructions
670 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
671 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
672 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
673 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
674 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
675 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
676 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
677 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
679 // SSE2 instructions without OpSize prefix
680 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
681 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
683 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
684 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
687 // SSE2 instructions with XS prefix
688 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
689 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
690 XS, Requires<[HasSSE2]>;
691 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
692 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
693 XS, Requires<[HasSSE2]>;
695 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
696 "cvtps2pi {$src, $dst|$dst, $src}", []>;
697 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
698 "cvtps2pi {$src, $dst|$dst, $src}", []>;
699 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
700 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
701 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
702 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
704 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
705 "cvtps2dq {$src, $dst|$dst, $src}", []>;
706 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
707 "cvtps2dq {$src, $dst|$dst, $src}", []>;
708 // SSE2 packed instructions with XD prefix
709 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
710 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
711 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
712 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
714 // SSE2 instructions without OpSize prefix
715 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
716 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
718 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
719 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
722 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
723 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
724 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
725 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
728 let isTwoAddress = 1 in {
729 let isCommutable = 1 in {
730 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
731 "addps {$src2, $dst|$dst, $src2}",
732 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
733 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
734 "addpd {$src2, $dst|$dst, $src2}",
735 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
736 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
737 "mulps {$src2, $dst|$dst, $src2}",
738 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
739 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
740 "mulpd {$src2, $dst|$dst, $src2}",
741 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
744 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
745 "addps {$src2, $dst|$dst, $src2}",
746 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
747 (load addr:$src2))))]>;
748 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
749 "addpd {$src2, $dst|$dst, $src2}",
750 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
751 (load addr:$src2))))]>;
752 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
753 "mulps {$src2, $dst|$dst, $src2}",
754 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
755 (load addr:$src2))))]>;
756 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
757 "mulpd {$src2, $dst|$dst, $src2}",
758 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
759 (load addr:$src2))))]>;
761 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
762 "divps {$src2, $dst|$dst, $src2}",
763 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
764 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
765 "divps {$src2, $dst|$dst, $src2}",
766 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
767 (load addr:$src2))))]>;
768 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
769 "divpd {$src2, $dst|$dst, $src2}",
770 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
771 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
772 "divpd {$src2, $dst|$dst, $src2}",
773 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
774 (load addr:$src2))))]>;
776 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
777 "subps {$src2, $dst|$dst, $src2}",
778 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
779 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
780 "subps {$src2, $dst|$dst, $src2}",
781 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
782 (load addr:$src2))))]>;
783 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
784 "subpd {$src2, $dst|$dst, $src2}",
785 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
786 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
787 "subpd {$src2, $dst|$dst, $src2}",
788 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
789 (load addr:$src2))))]>;
792 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
793 "sqrtps {$src, $dst|$dst, $src}",
794 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
795 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
796 "sqrtps {$src, $dst|$dst, $src}",
797 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
798 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
799 "sqrtpd {$src, $dst|$dst, $src}",
800 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
801 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
802 "sqrtpd {$src, $dst|$dst, $src}",
803 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
805 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
806 "rsqrtps {$src, $dst|$dst, $src}", []>;
807 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
808 "rsqrtps {$src, $dst|$dst, $src}", []>;
809 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
810 "rcpps {$src, $dst|$dst, $src}", []>;
811 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
812 "rcpps {$src, $dst|$dst, $src}", []>;
814 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
815 "maxps {$src, $dst|$dst, $src}", []>;
816 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
817 "maxps {$src, $dst|$dst, $src}", []>;
818 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
819 "maxpd {$src, $dst|$dst, $src}", []>;
820 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
821 "maxpd {$src, $dst|$dst, $src}", []>;
822 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
823 "minps {$src, $dst|$dst, $src}", []>;
824 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
825 "minps {$src, $dst|$dst, $src}", []>;
826 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
827 "minpd {$src, $dst|$dst, $src}", []>;
828 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
829 "minpd {$src, $dst|$dst, $src}", []>;
832 let isTwoAddress = 1 in {
833 let isCommutable = 1 in {
834 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
835 "andps {$src2, $dst|$dst, $src2}",
836 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
837 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
838 "andpd {$src2, $dst|$dst, $src2}",
839 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
840 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
841 "orps {$src2, $dst|$dst, $src2}",
842 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
843 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
844 "orpd {$src2, $dst|$dst, $src2}",
845 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
846 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
847 "xorps {$src2, $dst|$dst, $src2}",
848 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
849 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
850 "xorpd {$src2, $dst|$dst, $src2}",
851 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
853 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
854 "andps {$src2, $dst|$dst, $src2}",
855 [(set VR128:$dst, (v4i32 (and VR128:$src1,
856 (load addr:$src2))))]>;
857 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
858 "andpd {$src2, $dst|$dst, $src2}",
859 [(set VR128:$dst, (v2i64 (and VR128:$src1,
860 (load addr:$src2))))]>;
861 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
862 "orps {$src2, $dst|$dst, $src2}",
863 [(set VR128:$dst, (v4i32 (or VR128:$src1,
864 (load addr:$src2))))]>;
865 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
866 "orpd {$src2, $dst|$dst, $src2}",
867 [(set VR128:$dst, (v2i64 (or VR128:$src1,
868 (load addr:$src2))))]>;
869 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
870 "xorps {$src2, $dst|$dst, $src2}",
871 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
872 (load addr:$src2))))]>;
873 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
874 "xorpd {$src2, $dst|$dst, $src2}",
875 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
876 (load addr:$src2))))]>;
877 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
878 "andnps {$src2, $dst|$dst, $src2}",
879 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
881 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
882 "andnps {$src2, $dst|$dst, $src2}",
883 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
884 (load addr:$src2))))]>;
885 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
886 "andnpd {$src2, $dst|$dst, $src2}",
887 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
890 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
891 "andnpd {$src2, $dst|$dst, $src2}",
892 [(set VR128:$dst, (v2i64 (and VR128:$src1,
893 (load addr:$src2))))]>;
896 let isTwoAddress = 1 in {
897 def CMPPSrr : PSI<0xC2, MRMSrcReg,
898 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
899 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
900 def CMPPSrm : PSI<0xC2, MRMSrcMem,
901 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
902 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
903 def CMPPDrr : PDI<0xC2, MRMSrcReg,
904 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
905 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
906 def CMPPDrm : PDI<0xC2, MRMSrcMem,
907 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
908 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
911 // Shuffle and unpack instructions
912 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
913 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
914 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
915 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
916 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
917 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
918 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
919 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
920 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
921 [(set VR128:$dst, (v4i32 (vector_shuffle
922 VR128:$src1, (undef),
923 PSHUFD_shuffle_mask:$src2)))]>;
924 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
925 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
926 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
927 [(set VR128:$dst, (v4i32 (vector_shuffle
928 (load addr:$src1), (undef),
929 PSHUFD_shuffle_mask:$src2)))]>;
931 let isTwoAddress = 1 in {
932 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
933 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
934 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
935 [(set VR128:$dst, (v4f32 (vector_shuffle
936 VR128:$src1, VR128:$src2,
937 SHUFP_shuffle_mask:$src3)))]>;
938 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
939 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
940 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
941 [(set VR128:$dst, (v4f32 (vector_shuffle
942 VR128:$src1, (load addr:$src2),
943 SHUFP_shuffle_mask:$src3)))]>;
944 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
945 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
946 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
947 [(set VR128:$dst, (v2f64 (vector_shuffle
948 VR128:$src1, VR128:$src2,
949 SHUFP_shuffle_mask:$src3)))]>;
950 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
951 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
952 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
953 [(set VR128:$dst, (v2f64 (vector_shuffle
954 VR128:$src1, (load addr:$src2),
955 SHUFP_shuffle_mask:$src3)))]>;
957 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
958 (ops VR128:$dst, VR128:$src1, VR128:$src2),
959 "unpckhps {$src2, $dst|$dst, $src2}",
960 [(set VR128:$dst, (v4f32 (vector_shuffle
961 VR128:$src1, VR128:$src2,
962 UNPCKH_shuffle_mask)))]>;
963 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
964 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
965 "unpckhps {$src2, $dst|$dst, $src2}",
966 [(set VR128:$dst, (v4f32 (vector_shuffle
967 VR128:$src1, (load addr:$src2),
968 UNPCKH_shuffle_mask)))]>;
969 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
970 (ops VR128:$dst, VR128:$src1, VR128:$src2),
971 "unpckhpd {$src2, $dst|$dst, $src2}",
972 [(set VR128:$dst, (v2f64 (vector_shuffle
973 VR128:$src1, VR128:$src2,
974 UNPCKH_shuffle_mask)))]>;
975 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
976 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
977 "unpckhpd {$src2, $dst|$dst, $src2}",
978 [(set VR128:$dst, (v2f64 (vector_shuffle
979 VR128:$src1, (load addr:$src2),
980 UNPCKH_shuffle_mask)))]>;
982 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
983 (ops VR128:$dst, VR128:$src1, VR128:$src2),
984 "unpcklps {$src2, $dst|$dst, $src2}",
985 [(set VR128:$dst, (v4f32 (vector_shuffle
986 VR128:$src1, VR128:$src2,
987 UNPCKL_shuffle_mask)))]>;
988 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
989 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
990 "unpcklps {$src2, $dst|$dst, $src2}",
991 [(set VR128:$dst, (v4f32 (vector_shuffle
992 VR128:$src1, (load addr:$src2),
993 UNPCKL_shuffle_mask)))]>;
994 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
995 (ops VR128:$dst, VR128:$src1, VR128:$src2),
996 "unpcklpd {$src2, $dst|$dst, $src2}",
997 [(set VR128:$dst, (v2f64 (vector_shuffle
998 VR128:$src1, VR128:$src2,
999 UNPCKL_shuffle_mask)))]>;
1000 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
1001 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1002 "unpcklpd {$src2, $dst|$dst, $src2}",
1003 [(set VR128:$dst, (v2f64 (vector_shuffle
1004 VR128:$src1, (load addr:$src2),
1005 UNPCKL_shuffle_mask)))]>;
1008 //===----------------------------------------------------------------------===//
1009 // SSE integer instructions
1010 //===----------------------------------------------------------------------===//
1012 // Move Instructions
1013 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
1014 "movd {$src, $dst|$dst, $src}",
1016 (v4i32 (scalar_to_vector R32:$src)))]>;
1017 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1018 "movd {$src, $dst|$dst, $src}",
1020 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
1022 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
1023 "movd {$src, $dst|$dst, $src}", []>;
1025 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
1026 "movdqa {$src, $dst|$dst, $src}", []>;
1027 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
1028 "movdqa {$src, $dst|$dst, $src}",
1029 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
1030 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1031 "movdqa {$src, $dst|$dst, $src}",
1032 [(store (v4i32 VR128:$src), addr:$dst)]>;
1034 // SSE2 instructions with XS prefix
1035 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
1036 "movq {$src, $dst|$dst, $src}",
1038 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
1039 Requires<[HasSSE2]>;
1040 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
1041 "movq {$src, $dst|$dst, $src}", []>, XS,
1042 Requires<[HasSSE2]>;
1043 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
1044 "movq {$src, $dst|$dst, $src}", []>;
1046 // 128-bit Integer Arithmetic
1047 let isTwoAddress = 1 in {
1048 let isCommutable = 1 in {
1049 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1050 "paddb {$src2, $dst|$dst, $src2}",
1051 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
1052 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1053 "paddw {$src2, $dst|$dst, $src2}",
1054 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
1055 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1056 "paddd {$src2, $dst|$dst, $src2}",
1057 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
1059 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1060 "paddb {$src2, $dst|$dst, $src2}",
1061 [(set VR128:$dst, (v16i8 (add VR128:$src1,
1062 (load addr:$src2))))]>;
1063 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1064 "paddw {$src2, $dst|$dst, $src2}",
1065 [(set VR128:$dst, (v8i16 (add VR128:$src1,
1066 (load addr:$src2))))]>;
1067 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1068 "paddd {$src2, $dst|$dst, $src2}",
1069 [(set VR128:$dst, (v4i32 (add VR128:$src1,
1070 (load addr:$src2))))]>;
1072 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1073 "psubb {$src2, $dst|$dst, $src2}",
1074 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
1075 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1076 "psubw {$src2, $dst|$dst, $src2}",
1077 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
1078 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1079 "psubd {$src2, $dst|$dst, $src2}",
1080 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
1082 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1083 "psubb {$src2, $dst|$dst, $src2}",
1084 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
1085 (load addr:$src2))))]>;
1086 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1087 "psubw {$src2, $dst|$dst, $src2}",
1088 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
1089 (load addr:$src2))))]>;
1090 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1091 "psubd {$src2, $dst|$dst, $src2}",
1092 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
1093 (load addr:$src2))))]>;
1095 // Unpack and interleave
1096 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
1097 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1098 "punpcklbw {$src2, $dst|$dst, $src2}",
1100 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1101 UNPCKL_shuffle_mask)))]>;
1102 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
1103 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1104 "punpcklbw {$src2, $dst|$dst, $src2}",
1106 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1107 UNPCKL_shuffle_mask)))]>;
1108 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
1109 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1110 "punpcklwd {$src2, $dst|$dst, $src2}",
1112 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1113 UNPCKL_shuffle_mask)))]>;
1114 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
1115 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1116 "punpcklwd {$src2, $dst|$dst, $src2}",
1118 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1119 UNPCKL_shuffle_mask)))]>;
1120 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
1121 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1122 "punpckldq {$src2, $dst|$dst, $src2}",
1124 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1125 UNPCKL_shuffle_mask)))]>;
1126 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
1127 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1128 "punpckldq {$src2, $dst|$dst, $src2}",
1130 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1131 UNPCKL_shuffle_mask)))]>;
1132 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
1133 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1134 "punpcklqdq {$src2, $dst|$dst, $src2}",
1136 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1137 UNPCKL_shuffle_mask)))]>;
1138 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
1139 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1140 "punpcklqdq {$src2, $dst|$dst, $src2}",
1142 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1143 UNPCKL_shuffle_mask)))]>;
1145 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
1146 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1147 "punpckhbw {$src2, $dst|$dst, $src2}",
1149 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1150 UNPCKH_shuffle_mask)))]>;
1151 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
1152 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1153 "punpckhbw {$src2, $dst|$dst, $src2}",
1155 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1156 UNPCKH_shuffle_mask)))]>;
1157 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
1158 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1159 "punpckhwd {$src2, $dst|$dst, $src2}",
1161 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1162 UNPCKH_shuffle_mask)))]>;
1163 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1164 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1165 "punpckhwd {$src2, $dst|$dst, $src2}",
1167 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1168 UNPCKH_shuffle_mask)))]>;
1169 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1170 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1171 "punpckhdq {$src2, $dst|$dst, $src2}",
1173 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1174 UNPCKH_shuffle_mask)))]>;
1175 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1176 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1177 "punpckhdq {$src2, $dst|$dst, $src2}",
1179 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1180 UNPCKH_shuffle_mask)))]>;
1181 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1182 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1183 "punpckhdq {$src2, $dst|$dst, $src2}",
1185 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1186 UNPCKH_shuffle_mask)))]>;
1187 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1188 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1189 "punpckhqdq {$src2, $dst|$dst, $src2}",
1191 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1192 UNPCKH_shuffle_mask)))]>;
1195 //===----------------------------------------------------------------------===//
1196 // Miscellaneous Instructions
1197 //===----------------------------------------------------------------------===//
1199 // Prefetching loads
1200 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1201 "prefetcht0 $src", []>, TB,
1202 Requires<[HasSSE1]>;
1203 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1204 "prefetcht0 $src", []>, TB,
1205 Requires<[HasSSE1]>;
1206 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1207 "prefetcht0 $src", []>, TB,
1208 Requires<[HasSSE1]>;
1209 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1210 "prefetcht0 $src", []>, TB,
1211 Requires<[HasSSE1]>;
1213 // Non-temporal stores
1214 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1215 "movntq {$src, $dst|$dst, $src}", []>, TB,
1216 Requires<[HasSSE1]>;
1217 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1218 "movntps {$src, $dst|$dst, $src}", []>, TB,
1219 Requires<[HasSSE1]>;
1220 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1221 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1222 Requires<[HasSSE1]>;
1225 def SFENCE : I<0xAE, MRM7m, (ops),
1226 "sfence", []>, TB, Requires<[HasSSE1]>;
1228 // Load MXCSR register
1229 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1230 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1232 //===----------------------------------------------------------------------===//
1233 // Alias Instructions
1234 //===----------------------------------------------------------------------===//
1236 // Alias instructions that map zero vector to pxor / xorp* for sse.
1237 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1238 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1240 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1241 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1243 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1244 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1246 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1248 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1249 "pcmpeqd $dst, $dst",
1250 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1252 // Scalar to 128-bit vector with zero extension.
1253 // Three operand (but two address) aliases.
1254 let isTwoAddress = 1 in {
1255 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1256 "movss {$src2, $dst|$dst, $src2}", []>;
1257 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1258 "movsd {$src2, $dst|$dst, $src2}", []>;
1259 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1260 "movd {$src2, $dst|$dst, $src2}", []>;
1261 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1262 "movq {$src2, $dst|$dst, $src2}", []>;
1265 // Loading from memory automatically zeroing upper bits.
1266 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1267 "movss {$src, $dst|$dst, $src}",
1269 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1270 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1271 "movsd {$src, $dst|$dst, $src}",
1273 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1274 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1275 "movd {$src, $dst|$dst, $src}",
1277 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1279 //===----------------------------------------------------------------------===//
1280 // Non-Instruction Patterns
1281 //===----------------------------------------------------------------------===//
1283 // 128-bit vector undef's.
1284 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1285 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1286 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1287 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1288 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1290 // 128-bit vector all zero's.
1291 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1292 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1293 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1295 // 128-bit vector all one's.
1296 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1297 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1298 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1299 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1300 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1302 // Load 128-bit integer vector values.
1303 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1304 Requires<[HasSSE2]>;
1305 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1306 Requires<[HasSSE2]>;
1307 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1308 Requires<[HasSSE2]>;
1309 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1310 Requires<[HasSSE2]>;
1312 // Store 128-bit integer vector values.
1313 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1314 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1315 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1316 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1317 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1318 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1319 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1320 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1322 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1324 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1325 Requires<[HasSSE2]>;
1326 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1327 Requires<[HasSSE2]>;
1330 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1331 Requires<[HasSSE2]>;
1332 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1333 Requires<[HasSSE2]>;
1335 // Zeroing a VR128 then do a MOVS* to the lower bits.
1336 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1337 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1338 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1339 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1340 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1341 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1342 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1343 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1344 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1345 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1346 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1347 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1349 // Splat v4f32 / v4i32
1350 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1351 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1352 Requires<[HasSSE1]>;
1353 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1354 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1355 Requires<[HasSSE2]>;
1357 // Splat v2f64 / v2i64
1358 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1359 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1360 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1361 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;