1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 //===----------------------------------------------------------------------===//
32 // SSE pattern fragments
33 //===----------------------------------------------------------------------===//
35 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
36 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
38 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
39 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
40 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
41 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
42 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
43 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
45 def fp32imm0 : PatLeaf<(f32 fpimm), [{
46 return N->isExactlyValue(+0.0);
49 def vecimm0 : PatLeaf<(build_vector), [{
50 return X86::isZeroVector(N);
53 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
55 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
56 return getI8Imm(X86::getShuffleSHUFImmediate(N));
59 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
60 return X86::isSplatMask(N);
61 }], SHUFFLE_get_shuf_imm>;
63 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
64 return X86::isSplatMask(N);
67 def MOVLHPSorUNPCKLPD_shuffle_mask : PatLeaf<(build_vector), [{
68 return X86::isMOVLHPSorUNPCKLPDMask(N);
69 }], SHUFFLE_get_shuf_imm>;
71 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
72 return X86::isMOVHLPSMask(N);
73 }], SHUFFLE_get_shuf_imm>;
75 def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{
76 return X86::isUNPCKHPDMask(N);
77 }], SHUFFLE_get_shuf_imm>;
79 // Only use PSHUF if it is not a splat.
80 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
81 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
82 }], SHUFFLE_get_shuf_imm>;
84 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
85 return X86::isSHUFPMask(N);
86 }], SHUFFLE_get_shuf_imm>;
88 //===----------------------------------------------------------------------===//
89 // SSE scalar FP Instructions
90 //===----------------------------------------------------------------------===//
92 // Instruction templates
93 // SSI - SSE1 instructions with XS prefix.
94 // SDI - SSE2 instructions with XD prefix.
95 // PSI - SSE1 instructions with TB prefix.
96 // PDI - SSE2 instructions with TB and OpSize prefixes.
97 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
98 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
99 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
100 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
101 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
102 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
103 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
104 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
105 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
106 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
107 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
108 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
109 let Pattern = pattern;
111 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
112 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
113 let Pattern = pattern;
116 // Some 'special' instructions
117 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
118 "#IMPLICIT_DEF $dst",
119 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
120 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
121 "#IMPLICIT_DEF $dst",
122 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
124 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
125 // scheduler into a branch sequence.
126 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
127 def CMOV_FR32 : I<0, Pseudo,
128 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
129 "#CMOV_FR32 PSEUDO!",
130 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
131 def CMOV_FR64 : I<0, Pseudo,
132 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
133 "#CMOV_FR64 PSEUDO!",
134 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
138 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
139 "movss {$src, $dst|$dst, $src}", []>;
140 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
141 "movss {$src, $dst|$dst, $src}",
142 [(set FR32:$dst, (loadf32 addr:$src))]>;
143 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
144 "movsd {$src, $dst|$dst, $src}", []>;
145 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
146 "movsd {$src, $dst|$dst, $src}",
147 [(set FR64:$dst, (loadf64 addr:$src))]>;
149 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
150 "movss {$src, $dst|$dst, $src}",
151 [(store FR32:$src, addr:$dst)]>;
152 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
153 "movsd {$src, $dst|$dst, $src}",
154 [(store FR64:$src, addr:$dst)]>;
156 // FR32 / FR64 to 128-bit vector conversion.
157 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
158 "movss {$src, $dst|$dst, $src}",
160 (v4f32 (scalar_to_vector FR32:$src)))]>;
161 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
162 "movss {$src, $dst|$dst, $src}",
164 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
165 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
166 "movsd {$src, $dst|$dst, $src}",
168 (v2f64 (scalar_to_vector FR64:$src)))]>;
169 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
170 "movsd {$src, $dst|$dst, $src}",
172 (v4f32 (scalar_to_vector (loadf64 addr:$src))))]>;
175 // Conversion instructions
176 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
177 "cvtss2si {$src, $dst|$dst, $src}", []>;
178 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
179 "cvtss2si {$src, $dst|$dst, $src}", []>;
181 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
182 "cvttss2si {$src, $dst|$dst, $src}",
183 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
184 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
185 "cvttss2si {$src, $dst|$dst, $src}",
186 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
187 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
188 "cvttsd2si {$src, $dst|$dst, $src}",
189 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
190 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
191 "cvttsd2si {$src, $dst|$dst, $src}",
192 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
193 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
194 "cvtsd2ss {$src, $dst|$dst, $src}",
195 [(set FR32:$dst, (fround FR64:$src))]>;
196 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
197 "cvtsd2ss {$src, $dst|$dst, $src}",
198 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
199 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
200 "cvtsi2ss {$src, $dst|$dst, $src}",
201 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
202 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
203 "cvtsi2ss {$src, $dst|$dst, $src}",
204 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
205 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
206 "cvtsi2sd {$src, $dst|$dst, $src}",
207 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
208 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
209 "cvtsi2sd {$src, $dst|$dst, $src}",
210 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
211 // SSE2 instructions with XS prefix
212 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
213 "cvtss2sd {$src, $dst|$dst, $src}",
214 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
216 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
217 "cvtss2sd {$src, $dst|$dst, $src}",
218 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
221 // Arithmetic instructions
222 let isTwoAddress = 1 in {
223 let isCommutable = 1 in {
224 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
225 "addss {$src2, $dst|$dst, $src2}",
226 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
227 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
228 "addsd {$src2, $dst|$dst, $src2}",
229 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
230 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
231 "mulss {$src2, $dst|$dst, $src2}",
232 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
233 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
234 "mulsd {$src2, $dst|$dst, $src2}",
235 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
238 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
239 "addss {$src2, $dst|$dst, $src2}",
240 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
241 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
242 "addsd {$src2, $dst|$dst, $src2}",
243 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
244 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
245 "mulss {$src2, $dst|$dst, $src2}",
246 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
247 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
248 "mulsd {$src2, $dst|$dst, $src2}",
249 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
251 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
252 "divss {$src2, $dst|$dst, $src2}",
253 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
254 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
255 "divss {$src2, $dst|$dst, $src2}",
256 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
257 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
258 "divsd {$src2, $dst|$dst, $src2}",
259 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
260 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
261 "divsd {$src2, $dst|$dst, $src2}",
262 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
264 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
265 "subss {$src2, $dst|$dst, $src2}",
266 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
267 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
268 "subss {$src2, $dst|$dst, $src2}",
269 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
270 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
271 "subsd {$src2, $dst|$dst, $src2}",
272 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
273 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
274 "subsd {$src2, $dst|$dst, $src2}",
275 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
278 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
279 "sqrtss {$src, $dst|$dst, $src}",
280 [(set FR32:$dst, (fsqrt FR32:$src))]>;
281 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
282 "sqrtss {$src, $dst|$dst, $src}",
283 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
284 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
285 "sqrtsd {$src, $dst|$dst, $src}",
286 [(set FR64:$dst, (fsqrt FR64:$src))]>;
287 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
288 "sqrtsd {$src, $dst|$dst, $src}",
289 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
291 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
292 "rsqrtss {$src, $dst|$dst, $src}", []>;
293 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
294 "rsqrtss {$src, $dst|$dst, $src}", []>;
295 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
296 "rcpss {$src, $dst|$dst, $src}", []>;
297 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
298 "rcpss {$src, $dst|$dst, $src}", []>;
300 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
301 "maxss {$src, $dst|$dst, $src}", []>;
302 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
303 "maxss {$src, $dst|$dst, $src}", []>;
304 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
305 "maxsd {$src, $dst|$dst, $src}", []>;
306 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
307 "maxsd {$src, $dst|$dst, $src}", []>;
308 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
309 "minss {$src, $dst|$dst, $src}", []>;
310 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
311 "minss {$src, $dst|$dst, $src}", []>;
312 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
313 "minsd {$src, $dst|$dst, $src}", []>;
314 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
315 "minsd {$src, $dst|$dst, $src}", []>;
317 // Comparison instructions
318 let isTwoAddress = 1 in {
319 def CMPSSrr : SSI<0xC2, MRMSrcReg,
320 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
321 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
322 def CMPSSrm : SSI<0xC2, MRMSrcMem,
323 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
324 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
325 def CMPSDrr : SDI<0xC2, MRMSrcReg,
326 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
327 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
328 def CMPSDrm : SDI<0xC2, MRMSrcMem,
329 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
330 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
333 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
334 "ucomiss {$src2, $src1|$src1, $src2}",
335 [(X86cmp FR32:$src1, FR32:$src2)]>;
336 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
337 "ucomiss {$src2, $src1|$src1, $src2}",
338 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
339 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
340 "ucomisd {$src2, $src1|$src1, $src2}",
341 [(X86cmp FR64:$src1, FR64:$src2)]>;
342 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
343 "ucomisd {$src2, $src1|$src1, $src2}",
344 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
346 // Aliases of packed instructions for scalar use. These all have names that
349 // Alias instructions that map fld0 to pxor for sse.
350 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
351 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
352 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
353 Requires<[HasSSE1]>, TB, OpSize;
354 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
355 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
356 Requires<[HasSSE2]>, TB, OpSize;
358 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
359 // Upper bits are disregarded.
360 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
361 "movaps {$src, $dst|$dst, $src}", []>;
362 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
363 "movapd {$src, $dst|$dst, $src}", []>;
365 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
366 // Upper bits are disregarded.
367 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
368 "movaps {$src, $dst|$dst, $src}",
369 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
370 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
371 "movapd {$src, $dst|$dst, $src}",
372 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
374 // Alias bitwise logical operations using SSE logical ops on packed FP values.
375 let isTwoAddress = 1 in {
376 let isCommutable = 1 in {
377 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
378 "andps {$src2, $dst|$dst, $src2}",
379 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
380 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
381 "andpd {$src2, $dst|$dst, $src2}",
382 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
383 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
384 "orps {$src2, $dst|$dst, $src2}", []>;
385 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
386 "orpd {$src2, $dst|$dst, $src2}", []>;
387 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
388 "xorps {$src2, $dst|$dst, $src2}",
389 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
390 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
391 "xorpd {$src2, $dst|$dst, $src2}",
392 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
394 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
395 "andps {$src2, $dst|$dst, $src2}",
396 [(set FR32:$dst, (X86fand FR32:$src1,
397 (X86loadpf32 addr:$src2)))]>;
398 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
399 "andpd {$src2, $dst|$dst, $src2}",
400 [(set FR64:$dst, (X86fand FR64:$src1,
401 (X86loadpf64 addr:$src2)))]>;
402 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
403 "orps {$src2, $dst|$dst, $src2}", []>;
404 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
405 "orpd {$src2, $dst|$dst, $src2}", []>;
406 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
407 "xorps {$src2, $dst|$dst, $src2}",
408 [(set FR32:$dst, (X86fxor FR32:$src1,
409 (X86loadpf32 addr:$src2)))]>;
410 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
411 "xorpd {$src2, $dst|$dst, $src2}",
412 [(set FR64:$dst, (X86fxor FR64:$src1,
413 (X86loadpf64 addr:$src2)))]>;
415 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
416 "andnps {$src2, $dst|$dst, $src2}", []>;
417 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
418 "andnps {$src2, $dst|$dst, $src2}", []>;
419 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
420 "andnpd {$src2, $dst|$dst, $src2}", []>;
421 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
422 "andnpd {$src2, $dst|$dst, $src2}", []>;
425 //===----------------------------------------------------------------------===//
426 // SSE packed FP Instructions
427 //===----------------------------------------------------------------------===//
429 // Some 'special' instructions
430 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
431 "#IMPLICIT_DEF $dst",
432 [(set VR128:$dst, (v4f32 (undef)))]>,
436 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
437 "movaps {$src, $dst|$dst, $src}", []>;
438 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
439 "movaps {$src, $dst|$dst, $src}",
440 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
441 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
442 "movapd {$src, $dst|$dst, $src}", []>;
443 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
444 "movapd {$src, $dst|$dst, $src}",
445 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
447 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
448 "movaps {$src, $dst|$dst, $src}",
449 [(store (v4f32 VR128:$src), addr:$dst)]>;
450 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
451 "movapd {$src, $dst|$dst, $src}",
452 [(store (v2f64 VR128:$src), addr:$dst)]>;
454 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
455 "movups {$src, $dst|$dst, $src}", []>;
456 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
457 "movups {$src, $dst|$dst, $src}", []>;
458 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
459 "movups {$src, $dst|$dst, $src}", []>;
460 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
461 "movupd {$src, $dst|$dst, $src}", []>;
462 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
463 "movupd {$src, $dst|$dst, $src}", []>;
464 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
465 "movupd {$src, $dst|$dst, $src}", []>;
467 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
468 "movlps {$src, $dst|$dst, $src}", []>;
469 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
470 "movlps {$src, $dst|$dst, $src}", []>;
471 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
472 "movlpd {$src, $dst|$dst, $src}", []>;
473 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
474 "movlpd {$src, $dst|$dst, $src}", []>;
476 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
477 "movhps {$src, $dst|$dst, $src}", []>;
478 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
479 "movhps {$src, $dst|$dst, $src}", []>;
480 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
481 "movhpd {$src, $dst|$dst, $src}", []>;
482 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
483 "movhpd {$src, $dst|$dst, $src}", []>;
485 let isTwoAddress = 1 in {
486 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
487 "movlhps {$src2, $dst|$dst, $src2}", []>;
489 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
490 "movlhps {$src2, $dst|$dst, $src2}", []>;
493 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
494 "movmskps {$src, $dst|$dst, $src}",
495 [(set R32:$dst, (int_x86_sse_movmskps VR128:$src))]>;
496 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
497 "movmskpd {$src, $dst|$dst, $src}",
498 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
500 // Conversion instructions
501 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
502 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
503 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
504 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
505 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
506 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
507 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
508 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
510 // SSE2 instructions without OpSize prefix
511 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
512 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
514 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
515 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
518 // SSE2 instructions with XS prefix
519 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
520 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
521 XS, Requires<[HasSSE2]>;
522 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
523 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
524 XS, Requires<[HasSSE2]>;
526 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
527 "cvtps2pi {$src, $dst|$dst, $src}", []>;
528 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
529 "cvtps2pi {$src, $dst|$dst, $src}", []>;
530 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
531 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
532 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
533 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
535 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
536 "cvtps2dq {$src, $dst|$dst, $src}", []>;
537 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
538 "cvtps2dq {$src, $dst|$dst, $src}", []>;
539 // SSE2 packed instructions with XD prefix
540 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
541 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
542 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
543 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
545 // SSE2 instructions without OpSize prefix
546 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
547 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
549 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
550 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
553 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
554 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
555 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
556 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
559 let isTwoAddress = 1 in {
560 let isCommutable = 1 in {
561 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
562 "addps {$src2, $dst|$dst, $src2}",
563 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
564 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
565 "addpd {$src2, $dst|$dst, $src2}",
566 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
567 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
568 "mulps {$src2, $dst|$dst, $src2}",
569 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
570 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
571 "mulpd {$src2, $dst|$dst, $src2}",
572 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
575 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
576 "addps {$src2, $dst|$dst, $src2}",
577 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
578 (load addr:$src2))))]>;
579 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
580 "addpd {$src2, $dst|$dst, $src2}",
581 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
582 (load addr:$src2))))]>;
583 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
584 "mulps {$src2, $dst|$dst, $src2}",
585 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
586 (load addr:$src2))))]>;
587 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
588 "mulpd {$src2, $dst|$dst, $src2}",
589 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
590 (load addr:$src2))))]>;
592 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
593 "divps {$src2, $dst|$dst, $src2}",
594 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
595 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
596 "divps {$src2, $dst|$dst, $src2}",
597 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
598 (load addr:$src2))))]>;
599 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
600 "divpd {$src2, $dst|$dst, $src2}",
601 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
602 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
603 "divpd {$src2, $dst|$dst, $src2}",
604 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
605 (load addr:$src2))))]>;
607 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
608 "subps {$src2, $dst|$dst, $src2}",
609 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
610 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
611 "subps {$src2, $dst|$dst, $src2}",
612 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
613 (load addr:$src2))))]>;
614 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
615 "subpd {$src2, $dst|$dst, $src2}",
616 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
617 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
618 "subpd {$src2, $dst|$dst, $src2}",
619 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
620 (load addr:$src2))))]>;
623 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
624 "sqrtps {$src, $dst|$dst, $src}",
625 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
626 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
627 "sqrtps {$src, $dst|$dst, $src}",
628 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
629 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
630 "sqrtpd {$src, $dst|$dst, $src}",
631 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
632 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
633 "sqrtpd {$src, $dst|$dst, $src}",
634 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
636 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
637 "rsqrtps {$src, $dst|$dst, $src}", []>;
638 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
639 "rsqrtps {$src, $dst|$dst, $src}", []>;
640 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
641 "rcpps {$src, $dst|$dst, $src}", []>;
642 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
643 "rcpps {$src, $dst|$dst, $src}", []>;
645 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
646 "maxps {$src, $dst|$dst, $src}", []>;
647 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
648 "maxps {$src, $dst|$dst, $src}", []>;
649 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
650 "maxpd {$src, $dst|$dst, $src}", []>;
651 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
652 "maxpd {$src, $dst|$dst, $src}", []>;
653 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
654 "minps {$src, $dst|$dst, $src}", []>;
655 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
656 "minps {$src, $dst|$dst, $src}", []>;
657 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
658 "minpd {$src, $dst|$dst, $src}", []>;
659 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
660 "minpd {$src, $dst|$dst, $src}", []>;
663 let isTwoAddress = 1 in {
664 let isCommutable = 1 in {
665 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
666 "andps {$src2, $dst|$dst, $src2}",
667 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
668 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
669 "andpd {$src2, $dst|$dst, $src2}",
670 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
671 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
672 "orps {$src2, $dst|$dst, $src2}",
673 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
674 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
675 "orpd {$src2, $dst|$dst, $src2}",
676 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
677 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
678 "xorps {$src2, $dst|$dst, $src2}",
679 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
680 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
681 "xorpd {$src2, $dst|$dst, $src2}",
682 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
684 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
685 "andps {$src2, $dst|$dst, $src2}",
686 [(set VR128:$dst, (v4i32 (and VR128:$src1,
687 (load addr:$src2))))]>;
688 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
689 "andpd {$src2, $dst|$dst, $src2}",
690 [(set VR128:$dst, (v2i64 (and VR128:$src1,
691 (load addr:$src2))))]>;
692 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
693 "orps {$src2, $dst|$dst, $src2}",
694 [(set VR128:$dst, (v4i32 (or VR128:$src1,
695 (load addr:$src2))))]>;
696 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
697 "orpd {$src2, $dst|$dst, $src2}",
698 [(set VR128:$dst, (v2i64 (or VR128:$src1,
699 (load addr:$src2))))]>;
700 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
701 "xorps {$src2, $dst|$dst, $src2}",
702 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
703 (load addr:$src2))))]>;
704 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
705 "xorpd {$src2, $dst|$dst, $src2}",
706 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
707 (load addr:$src2))))]>;
708 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
709 "andnps {$src2, $dst|$dst, $src2}",
710 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
712 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
713 "andnps {$src2, $dst|$dst, $src2}",
714 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
715 (load addr:$src2))))]>;
716 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
717 "andnpd {$src2, $dst|$dst, $src2}",
718 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
721 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
722 "andnpd {$src2, $dst|$dst, $src2}",
723 [(set VR128:$dst, (v2i64 (and VR128:$src1,
724 (load addr:$src2))))]>;
727 let isTwoAddress = 1 in {
728 def CMPPSrr : PSI<0xC2, MRMSrcReg,
729 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
730 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
731 def CMPPSrm : PSI<0xC2, MRMSrcMem,
732 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
733 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
734 def CMPPDrr : PDI<0xC2, MRMSrcReg,
735 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
736 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
737 def CMPPDrm : PDI<0xC2, MRMSrcMem,
738 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
739 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
742 // Shuffle and unpack instructions
743 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
744 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
745 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
746 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
747 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
748 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
749 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
750 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
751 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
752 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
753 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
754 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
756 let isTwoAddress = 1 in {
757 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
758 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
759 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
760 [(set VR128:$dst, (vector_shuffle
761 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
762 SHUFP_shuffle_mask:$src3))]>;
763 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
764 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
765 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
766 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
767 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
768 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
769 [(set VR128:$dst, (vector_shuffle
770 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
771 SHUFP_shuffle_mask:$src3))]>;
772 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
773 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
774 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
776 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
777 (ops VR128:$dst, VR128:$src1, VR128:$src2),
778 "unpckhps {$src2, $dst|$dst, $src2}", []>;
779 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
780 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
781 "unpckhps {$src2, $dst|$dst, $src2}", []>;
782 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
783 (ops VR128:$dst, VR128:$src1, VR128:$src2),
784 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
785 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
786 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
787 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
788 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
789 (ops VR128:$dst, VR128:$src1, VR128:$src2),
790 "unpcklps {$src2, $dst|$dst, $src2}", []>;
791 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
792 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
793 "unpcklps {$src2, $dst|$dst, $src2}", []>;
794 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
795 (ops VR128:$dst, VR128:$src1, VR128:$src2),
796 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
797 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
798 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
799 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
802 //===----------------------------------------------------------------------===//
803 // SSE integer instructions
804 //===----------------------------------------------------------------------===//
807 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
808 "movd {$src, $dst|$dst, $src}",
810 (v4i32 (scalar_to_vector R32:$src)))]>;
811 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
812 "movd {$src, $dst|$dst, $src}",
814 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
816 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
817 "movd {$src, $dst|$dst, $src}", []>;
819 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
820 "movdqa {$src, $dst|$dst, $src}", []>;
821 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
822 "movdqa {$src, $dst|$dst, $src}",
823 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
824 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
825 "movdqa {$src, $dst|$dst, $src}",
826 [(store (v4i32 VR128:$src), addr:$dst)]>;
828 // SSE2 instructions with XS prefix
829 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
830 "movq {$src, $dst|$dst, $src}",
832 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
834 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
835 "movq {$src, $dst|$dst, $src}", []>, XS,
837 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
838 "movq {$src, $dst|$dst, $src}", []>;
840 // 128-bit Integer Arithmetic
841 let isTwoAddress = 1 in {
842 let isCommutable = 1 in {
843 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
844 "paddb {$src2, $dst|$dst, $src2}",
845 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
846 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
847 "paddw {$src2, $dst|$dst, $src2}",
848 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
849 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
850 "paddd {$src2, $dst|$dst, $src2}",
851 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
853 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
854 "paddb {$src2, $dst|$dst, $src2}",
855 [(set VR128:$dst, (v16i8 (add VR128:$src1,
856 (load addr:$src2))))]>;
857 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
858 "paddw {$src2, $dst|$dst, $src2}",
859 [(set VR128:$dst, (v8i16 (add VR128:$src1,
860 (load addr:$src2))))]>;
861 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
862 "paddd {$src2, $dst|$dst, $src2}",
863 [(set VR128:$dst, (v4i32 (add VR128:$src1,
864 (load addr:$src2))))]>;
867 //===----------------------------------------------------------------------===//
868 // Miscellaneous Instructions
869 //===----------------------------------------------------------------------===//
871 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
872 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
874 //===----------------------------------------------------------------------===//
875 // Alias Instructions
876 //===----------------------------------------------------------------------===//
878 // Alias instructions that map zero vector to xorp* for sse.
879 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
880 def VZEROv16i8 : I<0xEF, MRMInitReg, (ops VR128:$dst),
881 "pxor $dst, $dst", [(set VR128:$dst, (v16i8 vecimm0))]>,
882 Requires<[HasSSE2]>, TB, OpSize;
883 def VZEROv8i16 : I<0xEF, MRMInitReg, (ops VR128:$dst),
884 "pxor $dst, $dst", [(set VR128:$dst, (v8i16 vecimm0))]>,
885 Requires<[HasSSE2]>, TB, OpSize;
886 def VZEROv4i32 : I<0xEF, MRMInitReg, (ops VR128:$dst),
887 "pxor $dst, $dst", [(set VR128:$dst, (v4i32 vecimm0))]>,
888 Requires<[HasSSE2]>, TB, OpSize;
889 def VZEROv2i64 : I<0xEF, MRMInitReg, (ops VR128:$dst),
890 "pxor $dst, $dst", [(set VR128:$dst, (v2i64 vecimm0))]>,
891 Requires<[HasSSE2]>, TB, OpSize;
892 def VZEROv4f32 : PSI<0x57, MRMInitReg, (ops VR128:$dst),
893 "xorps $dst, $dst", [(set VR128:$dst, (v4f32 vecimm0))]>;
894 def VZEROv2f64 : PDI<0x57, MRMInitReg, (ops VR128:$dst),
895 "xorpd $dst, $dst", [(set VR128:$dst, (v2f64 vecimm0))]>;
897 // Scalar to 128-bit vector with zero extension.
898 // Three operand (but two address) aliases.
899 let isTwoAddress = 1 in {
900 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
901 "movss {$src2, $dst|$dst, $src2}", []>;
902 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
903 "movsd {$src2, $dst|$dst, $src2}", []>;
904 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
905 "movd {$src2, $dst|$dst, $src2}", []>;
906 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
907 "movq {$src2, $dst|$dst, $src2}", []>;
910 // Loading from memory automatically zeroing upper bits.
911 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
912 "movss {$src, $dst|$dst, $src}",
914 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
915 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
916 "movsd {$src, $dst|$dst, $src}",
918 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
919 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
920 "movd {$src, $dst|$dst, $src}",
922 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
924 //===----------------------------------------------------------------------===//
925 // Non-Instruction Patterns
926 //===----------------------------------------------------------------------===//
928 // 128-bit vector undef's.
929 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
930 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
931 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
932 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
933 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
935 // Load 128-bit integer vector values.
936 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
938 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
940 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
942 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
945 // Store 128-bit integer vector values.
946 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
947 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
948 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
949 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
950 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
951 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
952 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
953 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
955 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
957 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
959 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
963 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
964 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
966 // Zeroing a VR128 then do a MOVS* to the lower bits.
967 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
968 (MOVZSD128rr (VZEROv2f64), FR64:$src)>;
969 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
970 (MOVZSS128rr (VZEROv4f32), FR32:$src)>;
971 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
972 (MOVZQ128rr (VZEROv2i64), VR64:$src)>, Requires<[HasSSE2]>;
973 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
974 (MOVZD128rr (VZEROv4i32), R32:$src)>;
975 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
976 (MOVZD128rr (VZEROv8i16), (MOVZX32rr16 R16:$src))>;
977 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
978 (MOVZD128rr (VZEROv16i8), (MOVZX32rr8 R8:$src))>;
980 // Splat v4f32 / v4i32
981 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
982 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
984 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
985 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
988 // Splat v2f64 / v2i64
989 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
990 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
991 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
992 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
994 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
995 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
996 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
998 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
999 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1000 Requires<[HasSSE2]>;
1002 // Shuffle v2f64 / v2i64
1003 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1004 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1005 (v2f64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1006 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1007 MOVHLPS_shuffle_mask:$sm),
1008 (v2f64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1009 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1010 UNPCKHPD_shuffle_mask:$sm),
1011 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1012 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (loadv2f64 addr:$src2),
1013 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1014 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
1016 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1017 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1018 (v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1019 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1020 MOVHLPS_shuffle_mask:$sm),
1021 (v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1022 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1023 UNPCKHPD_shuffle_mask:$sm),
1024 (v2i64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1025 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (loadv2i64 addr:$src2),
1026 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1027 (v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;