1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 //===----------------------------------------------------------------------===//
32 // SSE pattern fragments
33 //===----------------------------------------------------------------------===//
35 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
36 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
38 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
39 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
40 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
41 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
42 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
43 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
45 def fp32imm0 : PatLeaf<(f32 fpimm), [{
46 return N->isExactlyValue(+0.0);
49 def vecimm0 : PatLeaf<(build_vector), [{
50 return X86::isZeroVector(N);
53 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
55 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
56 return getI8Imm(X86::getShuffleSHUFImmediate(N));
59 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
60 return X86::isSplatMask(N);
61 }], SHUFFLE_get_shuf_imm>;
63 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
64 return X86::isSplatMask(N);
67 def MOVLHPSorUNPCKLPD_shuffle_mask : PatLeaf<(build_vector), [{
68 return X86::isMOVLHPSorUNPCKLPDMask(N);
69 }], SHUFFLE_get_shuf_imm>;
71 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
72 return X86::isMOVHLPSMask(N);
73 }], SHUFFLE_get_shuf_imm>;
75 def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{
76 return X86::isUNPCKHPDMask(N);
77 }], SHUFFLE_get_shuf_imm>;
79 // Only use PSHUF if it is not a splat.
80 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
81 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
82 }], SHUFFLE_get_shuf_imm>;
84 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
85 return X86::isSHUFPMask(N);
86 }], SHUFFLE_get_shuf_imm>;
88 //===----------------------------------------------------------------------===//
89 // SSE scalar FP Instructions
90 //===----------------------------------------------------------------------===//
92 // Instruction templates
93 // SSI - SSE1 instructions with XS prefix.
94 // SDI - SSE2 instructions with XD prefix.
95 // PSI - SSE1 instructions with TB prefix.
96 // PDI - SSE2 instructions with TB and OpSize prefixes.
97 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
98 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
99 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
100 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
101 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
102 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
103 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
104 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
105 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
106 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
107 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
108 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
109 let Pattern = pattern;
111 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
112 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
113 let Pattern = pattern;
116 // Some 'special' instructions
117 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
118 "#IMPLICIT_DEF $dst",
119 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
120 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
121 "#IMPLICIT_DEF $dst",
122 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
124 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
125 // scheduler into a branch sequence.
126 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
127 def CMOV_FR32 : I<0, Pseudo,
128 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
129 "#CMOV_FR32 PSEUDO!",
130 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
131 def CMOV_FR64 : I<0, Pseudo,
132 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
133 "#CMOV_FR64 PSEUDO!",
134 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
138 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
139 "movss {$src, $dst|$dst, $src}", []>;
140 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
141 "movss {$src, $dst|$dst, $src}",
142 [(set FR32:$dst, (loadf32 addr:$src))]>;
143 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
144 "movsd {$src, $dst|$dst, $src}", []>;
145 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
146 "movsd {$src, $dst|$dst, $src}",
147 [(set FR64:$dst, (loadf64 addr:$src))]>;
149 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
150 "movss {$src, $dst|$dst, $src}",
151 [(store FR32:$src, addr:$dst)]>;
152 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
153 "movsd {$src, $dst|$dst, $src}",
154 [(store FR64:$src, addr:$dst)]>;
156 // FR32 / FR64 to 128-bit vector conversion.
157 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
158 "movss {$src, $dst|$dst, $src}",
160 (v4f32 (scalar_to_vector FR32:$src)))]>;
161 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
162 "movss {$src, $dst|$dst, $src}",
164 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
165 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
166 "movsd {$src, $dst|$dst, $src}",
168 (v2f64 (scalar_to_vector FR64:$src)))]>;
169 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
170 "movsd {$src, $dst|$dst, $src}",
172 (v4f32 (scalar_to_vector (loadf64 addr:$src))))]>;
175 // Conversion instructions
176 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
177 "cvttss2si {$src, $dst|$dst, $src}",
178 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
179 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
180 "cvttss2si {$src, $dst|$dst, $src}",
181 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
182 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
183 "cvttsd2si {$src, $dst|$dst, $src}",
184 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
185 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
186 "cvttsd2si {$src, $dst|$dst, $src}",
187 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
188 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
189 "cvtsd2ss {$src, $dst|$dst, $src}",
190 [(set FR32:$dst, (fround FR64:$src))]>;
191 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
192 "cvtsd2ss {$src, $dst|$dst, $src}",
193 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
194 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
195 "cvtsi2ss {$src, $dst|$dst, $src}",
196 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
197 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
198 "cvtsi2ss {$src, $dst|$dst, $src}",
199 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
200 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
201 "cvtsi2sd {$src, $dst|$dst, $src}",
202 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
203 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
204 "cvtsi2sd {$src, $dst|$dst, $src}",
205 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
206 // SSE2 instructions with XS prefix
207 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
208 "cvtss2sd {$src, $dst|$dst, $src}",
209 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
211 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
212 "cvtss2sd {$src, $dst|$dst, $src}",
213 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
216 // Arithmetic instructions
217 let isTwoAddress = 1 in {
218 let isCommutable = 1 in {
219 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
220 "addss {$src2, $dst|$dst, $src2}",
221 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
222 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
223 "addsd {$src2, $dst|$dst, $src2}",
224 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
225 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
226 "mulss {$src2, $dst|$dst, $src2}",
227 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
228 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
229 "mulsd {$src2, $dst|$dst, $src2}",
230 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
233 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
234 "addss {$src2, $dst|$dst, $src2}",
235 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
236 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
237 "addsd {$src2, $dst|$dst, $src2}",
238 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
239 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
240 "mulss {$src2, $dst|$dst, $src2}",
241 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
242 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
243 "mulsd {$src2, $dst|$dst, $src2}",
244 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
246 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
247 "divss {$src2, $dst|$dst, $src2}",
248 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
249 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
250 "divss {$src2, $dst|$dst, $src2}",
251 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
252 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
253 "divsd {$src2, $dst|$dst, $src2}",
254 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
255 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
256 "divsd {$src2, $dst|$dst, $src2}",
257 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
259 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
260 "subss {$src2, $dst|$dst, $src2}",
261 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
262 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
263 "subss {$src2, $dst|$dst, $src2}",
264 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
265 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
266 "subsd {$src2, $dst|$dst, $src2}",
267 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
268 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
269 "subsd {$src2, $dst|$dst, $src2}",
270 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
273 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
274 "sqrtss {$src, $dst|$dst, $src}",
275 [(set FR32:$dst, (fsqrt FR32:$src))]>;
276 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
277 "sqrtss {$src, $dst|$dst, $src}",
278 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
279 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
280 "sqrtsd {$src, $dst|$dst, $src}",
281 [(set FR64:$dst, (fsqrt FR64:$src))]>;
282 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
283 "sqrtsd {$src, $dst|$dst, $src}",
284 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
286 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
287 "rsqrtss {$src, $dst|$dst, $src}", []>;
288 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
289 "rsqrtss {$src, $dst|$dst, $src}", []>;
290 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
291 "rcpss {$src, $dst|$dst, $src}", []>;
292 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
293 "rcpss {$src, $dst|$dst, $src}", []>;
295 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
296 "maxss {$src, $dst|$dst, $src}", []>;
297 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
298 "maxss {$src, $dst|$dst, $src}", []>;
299 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
300 "maxsd {$src, $dst|$dst, $src}", []>;
301 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
302 "maxsd {$src, $dst|$dst, $src}", []>;
303 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
304 "minss {$src, $dst|$dst, $src}", []>;
305 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
306 "minss {$src, $dst|$dst, $src}", []>;
307 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
308 "minsd {$src, $dst|$dst, $src}", []>;
309 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
310 "minsd {$src, $dst|$dst, $src}", []>;
312 // Comparison instructions
313 let isTwoAddress = 1 in {
314 def CMPSSrr : SSI<0xC2, MRMSrcReg,
315 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
316 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
317 def CMPSSrm : SSI<0xC2, MRMSrcMem,
318 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
319 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
320 def CMPSDrr : SDI<0xC2, MRMSrcReg,
321 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
322 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
323 def CMPSDrm : SDI<0xC2, MRMSrcMem,
324 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
325 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
328 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
329 "ucomiss {$src2, $src1|$src1, $src2}",
330 [(X86cmp FR32:$src1, FR32:$src2)]>;
331 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
332 "ucomiss {$src2, $src1|$src1, $src2}",
333 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
334 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
335 "ucomisd {$src2, $src1|$src1, $src2}",
336 [(X86cmp FR64:$src1, FR64:$src2)]>;
337 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
338 "ucomisd {$src2, $src1|$src1, $src2}",
339 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
341 // Aliases of packed instructions for scalar use. These all have names that
344 // Alias instructions that map fld0 to pxor for sse.
345 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
346 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
347 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
348 Requires<[HasSSE1]>, TB, OpSize;
349 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
350 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
351 Requires<[HasSSE2]>, TB, OpSize;
353 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
354 // Upper bits are disregarded.
355 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
356 "movaps {$src, $dst|$dst, $src}", []>;
357 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
358 "movapd {$src, $dst|$dst, $src}", []>;
360 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
361 // Upper bits are disregarded.
362 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
363 "movaps {$src, $dst|$dst, $src}",
364 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
365 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
366 "movapd {$src, $dst|$dst, $src}",
367 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
369 // Alias bitwise logical operations using SSE logical ops on packed FP values.
370 let isTwoAddress = 1 in {
371 let isCommutable = 1 in {
372 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
373 "andps {$src2, $dst|$dst, $src2}",
374 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
375 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
376 "andpd {$src2, $dst|$dst, $src2}",
377 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
378 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
379 "orps {$src2, $dst|$dst, $src2}", []>;
380 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
381 "orpd {$src2, $dst|$dst, $src2}", []>;
382 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
383 "xorps {$src2, $dst|$dst, $src2}",
384 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
385 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
386 "xorpd {$src2, $dst|$dst, $src2}",
387 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
389 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
390 "andps {$src2, $dst|$dst, $src2}",
391 [(set FR32:$dst, (X86fand FR32:$src1,
392 (X86loadpf32 addr:$src2)))]>;
393 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
394 "andpd {$src2, $dst|$dst, $src2}",
395 [(set FR64:$dst, (X86fand FR64:$src1,
396 (X86loadpf64 addr:$src2)))]>;
397 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
398 "orps {$src2, $dst|$dst, $src2}", []>;
399 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
400 "orpd {$src2, $dst|$dst, $src2}", []>;
401 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
402 "xorps {$src2, $dst|$dst, $src2}",
403 [(set FR32:$dst, (X86fxor FR32:$src1,
404 (X86loadpf32 addr:$src2)))]>;
405 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
406 "xorpd {$src2, $dst|$dst, $src2}",
407 [(set FR64:$dst, (X86fxor FR64:$src1,
408 (X86loadpf64 addr:$src2)))]>;
410 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
411 "andnps {$src2, $dst|$dst, $src2}", []>;
412 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
413 "andnps {$src2, $dst|$dst, $src2}", []>;
414 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
415 "andnpd {$src2, $dst|$dst, $src2}", []>;
416 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
417 "andnpd {$src2, $dst|$dst, $src2}", []>;
420 //===----------------------------------------------------------------------===//
421 // SSE packed FP Instructions
422 //===----------------------------------------------------------------------===//
424 // Some 'special' instructions
425 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
426 "#IMPLICIT_DEF $dst",
427 [(set VR128:$dst, (v4f32 (undef)))]>,
431 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
432 "movaps {$src, $dst|$dst, $src}", []>;
433 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
434 "movaps {$src, $dst|$dst, $src}",
435 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
436 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
437 "movapd {$src, $dst|$dst, $src}", []>;
438 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
439 "movapd {$src, $dst|$dst, $src}",
440 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
442 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
443 "movaps {$src, $dst|$dst, $src}",
444 [(store (v4f32 VR128:$src), addr:$dst)]>;
445 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
446 "movapd {$src, $dst|$dst, $src}",
447 [(store (v2f64 VR128:$src), addr:$dst)]>;
449 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
450 "movups {$src, $dst|$dst, $src}", []>;
451 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
452 "movups {$src, $dst|$dst, $src}", []>;
453 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
454 "movups {$src, $dst|$dst, $src}", []>;
455 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
456 "movupd {$src, $dst|$dst, $src}", []>;
457 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
458 "movupd {$src, $dst|$dst, $src}", []>;
459 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
460 "movupd {$src, $dst|$dst, $src}", []>;
462 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
463 "movlps {$src, $dst|$dst, $src}", []>;
464 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
465 "movlps {$src, $dst|$dst, $src}", []>;
466 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
467 "movlpd {$src, $dst|$dst, $src}", []>;
468 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
469 "movlpd {$src, $dst|$dst, $src}", []>;
471 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
472 "movhps {$src, $dst|$dst, $src}", []>;
473 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
474 "movhps {$src, $dst|$dst, $src}", []>;
475 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
476 "movhpd {$src, $dst|$dst, $src}", []>;
477 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
478 "movhpd {$src, $dst|$dst, $src}", []>;
480 let isTwoAddress = 1 in {
481 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
482 "movlhps {$src2, $dst|$dst, $src2}", []>;
484 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
485 "movlhps {$src2, $dst|$dst, $src2}", []>;
488 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
489 "movmskps {$src, $dst|$dst, $src}",
490 [(set R32:$dst, (int_x86_sse_movmskps VR128:$src))]>;
491 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
492 "movmskpd {$src, $dst|$dst, $src}",
493 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
495 // Conversion instructions
496 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
497 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
498 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
499 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
500 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
501 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
502 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
503 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
505 // SSE2 instructions without OpSize prefix
506 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
507 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
509 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
510 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
513 // SSE2 instructions with XS prefix
514 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
515 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
516 XS, Requires<[HasSSE2]>;
517 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
518 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
519 XS, Requires<[HasSSE2]>;
521 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
522 "cvtps2pi {$src, $dst|$dst, $src}", []>;
523 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
524 "cvtps2pi {$src, $dst|$dst, $src}", []>;
525 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
526 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
527 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
528 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
530 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
531 "cvtps2dq {$src, $dst|$dst, $src}", []>;
532 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
533 "cvtps2dq {$src, $dst|$dst, $src}", []>;
534 // SSE2 packed instructions with XD prefix
535 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
536 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
537 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
538 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
540 // SSE2 instructions without OpSize prefix
541 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
542 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
544 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
545 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
548 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
549 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
550 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
551 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
554 let isTwoAddress = 1 in {
555 let isCommutable = 1 in {
556 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
557 "addps {$src2, $dst|$dst, $src2}",
558 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
559 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
560 "addpd {$src2, $dst|$dst, $src2}",
561 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
562 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
563 "mulps {$src2, $dst|$dst, $src2}",
564 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
565 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
566 "mulpd {$src2, $dst|$dst, $src2}",
567 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
570 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
571 "addps {$src2, $dst|$dst, $src2}",
572 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
573 (load addr:$src2))))]>;
574 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
575 "addpd {$src2, $dst|$dst, $src2}",
576 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
577 (load addr:$src2))))]>;
578 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
579 "mulps {$src2, $dst|$dst, $src2}",
580 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
581 (load addr:$src2))))]>;
582 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
583 "mulpd {$src2, $dst|$dst, $src2}",
584 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
585 (load addr:$src2))))]>;
587 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
588 "divps {$src2, $dst|$dst, $src2}",
589 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
590 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
591 "divps {$src2, $dst|$dst, $src2}",
592 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
593 (load addr:$src2))))]>;
594 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
595 "divpd {$src2, $dst|$dst, $src2}",
596 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
597 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
598 "divpd {$src2, $dst|$dst, $src2}",
599 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
600 (load addr:$src2))))]>;
602 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
603 "subps {$src2, $dst|$dst, $src2}",
604 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
605 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
606 "subps {$src2, $dst|$dst, $src2}",
607 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
608 (load addr:$src2))))]>;
609 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
610 "subpd {$src2, $dst|$dst, $src2}",
611 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
612 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
613 "subpd {$src2, $dst|$dst, $src2}",
614 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
615 (load addr:$src2))))]>;
618 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
619 "sqrtps {$src, $dst|$dst, $src}",
620 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
621 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
622 "sqrtps {$src, $dst|$dst, $src}",
623 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
624 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
625 "sqrtpd {$src, $dst|$dst, $src}",
626 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
627 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
628 "sqrtpd {$src, $dst|$dst, $src}",
629 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
631 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
632 "rsqrtps {$src, $dst|$dst, $src}", []>;
633 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
634 "rsqrtps {$src, $dst|$dst, $src}", []>;
635 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
636 "rcpps {$src, $dst|$dst, $src}", []>;
637 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
638 "rcpps {$src, $dst|$dst, $src}", []>;
640 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
641 "maxps {$src, $dst|$dst, $src}", []>;
642 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
643 "maxps {$src, $dst|$dst, $src}", []>;
644 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
645 "maxpd {$src, $dst|$dst, $src}", []>;
646 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
647 "maxpd {$src, $dst|$dst, $src}", []>;
648 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
649 "minps {$src, $dst|$dst, $src}", []>;
650 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
651 "minps {$src, $dst|$dst, $src}", []>;
652 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
653 "minpd {$src, $dst|$dst, $src}", []>;
654 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
655 "minpd {$src, $dst|$dst, $src}", []>;
658 let isTwoAddress = 1 in {
659 let isCommutable = 1 in {
660 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
661 "andps {$src2, $dst|$dst, $src2}",
662 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
663 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
664 "andpd {$src2, $dst|$dst, $src2}",
665 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
666 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
667 "orps {$src2, $dst|$dst, $src2}",
668 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
669 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
670 "orpd {$src2, $dst|$dst, $src2}",
671 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
672 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
673 "xorps {$src2, $dst|$dst, $src2}",
674 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
675 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
676 "xorpd {$src2, $dst|$dst, $src2}",
677 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
679 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
680 "andps {$src2, $dst|$dst, $src2}",
681 [(set VR128:$dst, (v4i32 (and VR128:$src1,
682 (load addr:$src2))))]>;
683 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
684 "andpd {$src2, $dst|$dst, $src2}",
685 [(set VR128:$dst, (v2i64 (and VR128:$src1,
686 (load addr:$src2))))]>;
687 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
688 "orps {$src2, $dst|$dst, $src2}",
689 [(set VR128:$dst, (v4i32 (or VR128:$src1,
690 (load addr:$src2))))]>;
691 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
692 "orpd {$src2, $dst|$dst, $src2}",
693 [(set VR128:$dst, (v2i64 (or VR128:$src1,
694 (load addr:$src2))))]>;
695 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
696 "xorps {$src2, $dst|$dst, $src2}",
697 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
698 (load addr:$src2))))]>;
699 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
700 "xorpd {$src2, $dst|$dst, $src2}",
701 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
702 (load addr:$src2))))]>;
703 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
704 "andnps {$src2, $dst|$dst, $src2}",
705 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
707 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
708 "andnps {$src2, $dst|$dst, $src2}",
709 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
710 (load addr:$src2))))]>;
711 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
712 "andnpd {$src2, $dst|$dst, $src2}",
713 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
716 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
717 "andnpd {$src2, $dst|$dst, $src2}",
718 [(set VR128:$dst, (v2i64 (and VR128:$src1,
719 (load addr:$src2))))]>;
722 let isTwoAddress = 1 in {
723 def CMPPSrr : PSI<0xC2, MRMSrcReg,
724 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
725 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
726 def CMPPSrm : PSI<0xC2, MRMSrcMem,
727 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
728 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
729 def CMPPDrr : PDI<0xC2, MRMSrcReg,
730 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
731 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
732 def CMPPDrm : PDI<0xC2, MRMSrcMem,
733 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
734 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
737 // Shuffle and unpack instructions
738 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
739 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
740 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
741 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
742 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
743 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
744 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
745 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
746 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
747 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
748 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
749 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
751 let isTwoAddress = 1 in {
752 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
753 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
754 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
755 [(set VR128:$dst, (vector_shuffle
756 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
757 SHUFP_shuffle_mask:$src3))]>;
758 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
759 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
760 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
761 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
762 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
763 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
764 [(set VR128:$dst, (vector_shuffle
765 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
766 SHUFP_shuffle_mask:$src3))]>;
767 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
768 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
769 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
771 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
772 (ops VR128:$dst, VR128:$src1, VR128:$src2),
773 "unpckhps {$src2, $dst|$dst, $src2}", []>;
774 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
775 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
776 "unpckhps {$src2, $dst|$dst, $src2}", []>;
777 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
778 (ops VR128:$dst, VR128:$src1, VR128:$src2),
779 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
780 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
781 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
782 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
783 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
784 (ops VR128:$dst, VR128:$src1, VR128:$src2),
785 "unpcklps {$src2, $dst|$dst, $src2}", []>;
786 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
787 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
788 "unpcklps {$src2, $dst|$dst, $src2}", []>;
789 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
790 (ops VR128:$dst, VR128:$src1, VR128:$src2),
791 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
792 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
793 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
794 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
797 //===----------------------------------------------------------------------===//
798 // SSE integer instructions
799 //===----------------------------------------------------------------------===//
802 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
803 "movd {$src, $dst|$dst, $src}",
805 (v4i32 (scalar_to_vector R32:$src)))]>;
806 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
807 "movd {$src, $dst|$dst, $src}",
809 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
811 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
812 "movd {$src, $dst|$dst, $src}", []>;
814 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
815 "movdqa {$src, $dst|$dst, $src}", []>;
816 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
817 "movdqa {$src, $dst|$dst, $src}",
818 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
819 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
820 "movdqa {$src, $dst|$dst, $src}",
821 [(store (v4i32 VR128:$src), addr:$dst)]>;
823 // SSE2 instructions with XS prefix
824 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
825 "movq {$src, $dst|$dst, $src}",
827 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
829 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
830 "movq {$src, $dst|$dst, $src}", []>, XS,
832 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
833 "movq {$src, $dst|$dst, $src}", []>;
835 // 128-bit Integer Arithmetic
836 let isTwoAddress = 1 in {
837 let isCommutable = 1 in {
838 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
839 "paddb {$src2, $dst|$dst, $src2}",
840 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
841 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
842 "paddw {$src2, $dst|$dst, $src2}",
843 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
844 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
845 "paddd {$src2, $dst|$dst, $src2}",
846 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
848 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
849 "paddb {$src2, $dst|$dst, $src2}",
850 [(set VR128:$dst, (v16i8 (add VR128:$src1,
851 (load addr:$src2))))]>;
852 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
853 "paddw {$src2, $dst|$dst, $src2}",
854 [(set VR128:$dst, (v8i16 (add VR128:$src1,
855 (load addr:$src2))))]>;
856 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
857 "paddd {$src2, $dst|$dst, $src2}",
858 [(set VR128:$dst, (v4i32 (add VR128:$src1,
859 (load addr:$src2))))]>;
862 //===----------------------------------------------------------------------===//
863 // Miscellaneous Instructions
864 //===----------------------------------------------------------------------===//
866 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
867 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
869 //===----------------------------------------------------------------------===//
870 // Alias Instructions
871 //===----------------------------------------------------------------------===//
873 // Alias instructions that map zero vector to xorp* for sse.
874 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
875 def VZEROv16i8 : I<0xEF, MRMInitReg, (ops VR128:$dst),
876 "pxor $dst, $dst", [(set VR128:$dst, (v16i8 vecimm0))]>,
877 Requires<[HasSSE2]>, TB, OpSize;
878 def VZEROv8i16 : I<0xEF, MRMInitReg, (ops VR128:$dst),
879 "pxor $dst, $dst", [(set VR128:$dst, (v8i16 vecimm0))]>,
880 Requires<[HasSSE2]>, TB, OpSize;
881 def VZEROv4i32 : I<0xEF, MRMInitReg, (ops VR128:$dst),
882 "pxor $dst, $dst", [(set VR128:$dst, (v4i32 vecimm0))]>,
883 Requires<[HasSSE2]>, TB, OpSize;
884 def VZEROv2i64 : I<0xEF, MRMInitReg, (ops VR128:$dst),
885 "pxor $dst, $dst", [(set VR128:$dst, (v2i64 vecimm0))]>,
886 Requires<[HasSSE2]>, TB, OpSize;
887 def VZEROv4f32 : PSI<0x57, MRMInitReg, (ops VR128:$dst),
888 "xorps $dst, $dst", [(set VR128:$dst, (v4f32 vecimm0))]>;
889 def VZEROv2f64 : PDI<0x57, MRMInitReg, (ops VR128:$dst),
890 "xorpd $dst, $dst", [(set VR128:$dst, (v2f64 vecimm0))]>;
892 // Scalar to 128-bit vector with zero extension.
893 // Three operand (but two address) aliases.
894 let isTwoAddress = 1 in {
895 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
896 "movss {$src2, $dst|$dst, $src2}", []>;
897 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
898 "movsd {$src2, $dst|$dst, $src2}", []>;
899 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
900 "movd {$src2, $dst|$dst, $src2}", []>;
901 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
902 "movq {$src2, $dst|$dst, $src2}", []>;
905 // Loading from memory automatically zeroing upper bits.
906 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
907 "movss {$src, $dst|$dst, $src}",
909 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
910 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
911 "movsd {$src, $dst|$dst, $src}",
913 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
914 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
915 "movd {$src, $dst|$dst, $src}",
917 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
919 //===----------------------------------------------------------------------===//
920 // Non-Instruction Patterns
921 //===----------------------------------------------------------------------===//
923 // 128-bit vector undef's.
924 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
925 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
926 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
927 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
928 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
930 // Load 128-bit integer vector values.
931 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
933 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
935 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
937 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
940 // Store 128-bit integer vector values.
941 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
942 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
943 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
944 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
945 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
946 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
947 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
948 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
950 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
952 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
954 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
958 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
959 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
961 // Zeroing a VR128 then do a MOVS* to the lower bits.
962 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
963 (MOVZSD128rr (VZEROv2f64), FR64:$src)>;
964 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
965 (MOVZSS128rr (VZEROv4f32), FR32:$src)>;
966 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
967 (MOVZQ128rr (VZEROv2i64), VR64:$src)>, Requires<[HasSSE2]>;
968 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
969 (MOVZD128rr (VZEROv4i32), R32:$src)>;
970 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
971 (MOVZD128rr (VZEROv8i16), (MOVZX32rr16 R16:$src))>;
972 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
973 (MOVZD128rr (VZEROv16i8), (MOVZX32rr8 R8:$src))>;
975 // Splat v4f32 / v4i32
976 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
977 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
979 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
980 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
983 // Splat v2f64 / v2i64
984 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
985 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
986 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
987 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
989 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
990 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
991 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
993 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
994 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
997 // Shuffle v2f64 / v2i64
998 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
999 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1000 (v2f64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1001 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1002 MOVHLPS_shuffle_mask:$sm),
1003 (v2f64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1004 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1005 UNPCKHPD_shuffle_mask:$sm),
1006 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1007 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (loadv2f64 addr:$src2),
1008 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1009 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
1011 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1012 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1013 (v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1014 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1015 MOVHLPS_shuffle_mask:$sm),
1016 (v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
1017 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1018 UNPCKHPD_shuffle_mask:$sm),
1019 (v2i64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1020 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (loadv2i64 addr:$src2),
1021 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1022 (v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;