1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def SDTX86Unpcklp : SDTypeProfile<1, 2,
21 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
23 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
25 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
26 [SDNPCommutative, SDNPAssociative]>;
27 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
28 [SDNPCommutative, SDNPAssociative]>;
29 def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR",
30 SDTypeProfile<1, 1, []>, []>;
31 def X86unpcklp : SDNode<"X86ISD::UNPCKLP",
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def fp32imm0 : PatLeaf<(f32 fpimm), [{
49 return N->isExactlyValue(+0.0);
52 def vecimm0 : PatLeaf<(build_vector), [{
53 return X86::isZeroVector(N);
56 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
58 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
59 return getI8Imm(X86::getShuffleSHUFImmediate(N));
62 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
63 return X86::isSplatMask(N);
64 }], SHUFFLE_get_shuf_imm>;
66 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
67 return X86::isSplatMask(N);
70 def MOVLHPSorUNPCKLPD_shuffle_mask : PatLeaf<(build_vector), [{
71 return X86::isMOVLHPSorUNPCKLPDMask(N);
72 }], SHUFFLE_get_shuf_imm>;
74 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
75 return X86::isMOVHLPSMask(N);
76 }], SHUFFLE_get_shuf_imm>;
78 def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{
79 return X86::isUNPCKHPDMask(N);
80 }], SHUFFLE_get_shuf_imm>;
82 // Only use PSHUF if it is not a splat.
83 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
84 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
85 }], SHUFFLE_get_shuf_imm>;
87 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
88 return X86::isSHUFPMask(N);
89 }], SHUFFLE_get_shuf_imm>;
91 //===----------------------------------------------------------------------===//
92 // SSE scalar FP Instructions
93 //===----------------------------------------------------------------------===//
95 // Instruction templates
96 // SSI - SSE1 instructions with XS prefix.
97 // SDI - SSE2 instructions with XD prefix.
98 // PSI - SSE1 instructions with TB prefix.
99 // PDI - SSE2 instructions with TB and OpSize prefixes.
100 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
101 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
102 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
103 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
104 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
105 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
106 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
107 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
108 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
109 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
110 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
111 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
112 let Pattern = pattern;
114 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
115 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
116 let Pattern = pattern;
119 // Some 'special' instructions
120 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
121 "#IMPLICIT_DEF $dst",
122 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
123 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
124 "#IMPLICIT_DEF $dst",
125 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
127 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
128 // scheduler into a branch sequence.
129 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
130 def CMOV_FR32 : I<0, Pseudo,
131 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
132 "#CMOV_FR32 PSEUDO!",
133 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
134 def CMOV_FR64 : I<0, Pseudo,
135 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
136 "#CMOV_FR64 PSEUDO!",
137 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
141 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
142 "movss {$src, $dst|$dst, $src}", []>;
143 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
144 "movss {$src, $dst|$dst, $src}",
145 [(set FR32:$dst, (loadf32 addr:$src))]>;
146 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
147 "movsd {$src, $dst|$dst, $src}", []>;
148 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
149 "movsd {$src, $dst|$dst, $src}",
150 [(set FR64:$dst, (loadf64 addr:$src))]>;
152 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
153 "movss {$src, $dst|$dst, $src}",
154 [(store FR32:$src, addr:$dst)]>;
155 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
156 "movsd {$src, $dst|$dst, $src}",
157 [(store FR64:$src, addr:$dst)]>;
159 // Conversion instructions
160 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
161 "cvttss2si {$src, $dst|$dst, $src}",
162 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
163 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
164 "cvttss2si {$src, $dst|$dst, $src}",
165 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
166 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
167 "cvttsd2si {$src, $dst|$dst, $src}",
168 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
169 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
170 "cvttsd2si {$src, $dst|$dst, $src}",
171 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
172 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
173 "cvtsd2ss {$src, $dst|$dst, $src}",
174 [(set FR32:$dst, (fround FR64:$src))]>;
175 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
176 "cvtsd2ss {$src, $dst|$dst, $src}",
177 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
178 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
179 "cvtsi2ss {$src, $dst|$dst, $src}",
180 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
181 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
182 "cvtsi2ss {$src, $dst|$dst, $src}",
183 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
184 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
185 "cvtsi2sd {$src, $dst|$dst, $src}",
186 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
187 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
188 "cvtsi2sd {$src, $dst|$dst, $src}",
189 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
190 // SSE2 instructions with XS prefix
191 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
192 "cvtss2sd {$src, $dst|$dst, $src}",
193 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
195 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
196 "cvtss2sd {$src, $dst|$dst, $src}",
197 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
200 // Arithmetic instructions
201 let isTwoAddress = 1 in {
202 let isCommutable = 1 in {
203 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
204 "addss {$src2, $dst|$dst, $src2}",
205 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
206 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
207 "addsd {$src2, $dst|$dst, $src2}",
208 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
209 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
210 "mulss {$src2, $dst|$dst, $src2}",
211 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
212 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
213 "mulsd {$src2, $dst|$dst, $src2}",
214 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
217 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
218 "addss {$src2, $dst|$dst, $src2}",
219 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
220 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
221 "addsd {$src2, $dst|$dst, $src2}",
222 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
223 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
224 "mulss {$src2, $dst|$dst, $src2}",
225 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
226 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
227 "mulsd {$src2, $dst|$dst, $src2}",
228 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
230 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
231 "divss {$src2, $dst|$dst, $src2}",
232 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
233 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
234 "divss {$src2, $dst|$dst, $src2}",
235 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
236 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
237 "divsd {$src2, $dst|$dst, $src2}",
238 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
239 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
240 "divsd {$src2, $dst|$dst, $src2}",
241 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
243 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
244 "subss {$src2, $dst|$dst, $src2}",
245 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
246 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
247 "subss {$src2, $dst|$dst, $src2}",
248 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
249 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
250 "subsd {$src2, $dst|$dst, $src2}",
251 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
252 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
253 "subsd {$src2, $dst|$dst, $src2}",
254 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
257 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
258 "sqrtss {$src, $dst|$dst, $src}",
259 [(set FR32:$dst, (fsqrt FR32:$src))]>;
260 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
261 "sqrtss {$src, $dst|$dst, $src}",
262 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
263 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
264 "sqrtsd {$src, $dst|$dst, $src}",
265 [(set FR64:$dst, (fsqrt FR64:$src))]>;
266 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
267 "sqrtsd {$src, $dst|$dst, $src}",
268 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
270 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
271 "rsqrtss {$src, $dst|$dst, $src}", []>;
272 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
273 "rsqrtss {$src, $dst|$dst, $src}", []>;
274 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
275 "rcpss {$src, $dst|$dst, $src}", []>;
276 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
277 "rcpss {$src, $dst|$dst, $src}", []>;
279 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
280 "maxss {$src, $dst|$dst, $src}", []>;
281 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
282 "maxss {$src, $dst|$dst, $src}", []>;
283 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
284 "maxsd {$src, $dst|$dst, $src}", []>;
285 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
286 "maxsd {$src, $dst|$dst, $src}", []>;
287 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
288 "minss {$src, $dst|$dst, $src}", []>;
289 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
290 "minss {$src, $dst|$dst, $src}", []>;
291 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
292 "minsd {$src, $dst|$dst, $src}", []>;
293 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
294 "minsd {$src, $dst|$dst, $src}", []>;
296 // Comparison instructions
297 let isTwoAddress = 1 in {
298 def CMPSSrr : SSI<0xC2, MRMSrcReg,
299 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
300 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
301 def CMPSSrm : SSI<0xC2, MRMSrcMem,
302 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
303 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
304 def CMPSDrr : SDI<0xC2, MRMSrcReg,
305 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
306 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
307 def CMPSDrm : SDI<0xC2, MRMSrcMem,
308 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
309 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
312 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
313 "ucomiss {$src2, $src1|$src1, $src2}",
314 [(X86cmp FR32:$src1, FR32:$src2)]>;
315 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
316 "ucomiss {$src2, $src1|$src1, $src2}",
317 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
318 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
319 "ucomisd {$src2, $src1|$src1, $src2}",
320 [(X86cmp FR64:$src1, FR64:$src2)]>;
321 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
322 "ucomisd {$src2, $src1|$src1, $src2}",
323 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
325 // Aliases of packed instructions for scalar use. These all have names that
328 // Alias instructions that map fld0 to pxor for sse.
329 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
330 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
331 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
332 Requires<[HasSSE1]>, TB, OpSize;
333 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
334 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
335 Requires<[HasSSE2]>, TB, OpSize;
337 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
338 // Upper bits are disregarded.
339 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
340 "movaps {$src, $dst|$dst, $src}", []>;
341 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
342 "movapd {$src, $dst|$dst, $src}", []>;
344 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
345 // Upper bits are disregarded.
346 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
347 "movaps {$src, $dst|$dst, $src}",
348 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
349 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
350 "movapd {$src, $dst|$dst, $src}",
351 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
353 // Alias bitwise logical operations using SSE logical ops on packed FP values.
354 let isTwoAddress = 1 in {
355 let isCommutable = 1 in {
356 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
357 "andps {$src2, $dst|$dst, $src2}",
358 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
359 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
360 "andpd {$src2, $dst|$dst, $src2}",
361 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
362 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
363 "orps {$src2, $dst|$dst, $src2}", []>;
364 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
365 "orpd {$src2, $dst|$dst, $src2}", []>;
366 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
367 "xorps {$src2, $dst|$dst, $src2}",
368 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
369 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
370 "xorpd {$src2, $dst|$dst, $src2}",
371 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
373 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
374 "andps {$src2, $dst|$dst, $src2}",
375 [(set FR32:$dst, (X86fand FR32:$src1,
376 (X86loadpf32 addr:$src2)))]>;
377 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
378 "andpd {$src2, $dst|$dst, $src2}",
379 [(set FR64:$dst, (X86fand FR64:$src1,
380 (X86loadpf64 addr:$src2)))]>;
381 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
382 "orps {$src2, $dst|$dst, $src2}", []>;
383 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
384 "orpd {$src2, $dst|$dst, $src2}", []>;
385 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
386 "xorps {$src2, $dst|$dst, $src2}",
387 [(set FR32:$dst, (X86fxor FR32:$src1,
388 (X86loadpf32 addr:$src2)))]>;
389 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
390 "xorpd {$src2, $dst|$dst, $src2}",
391 [(set FR64:$dst, (X86fxor FR64:$src1,
392 (X86loadpf64 addr:$src2)))]>;
394 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
395 "andnps {$src2, $dst|$dst, $src2}", []>;
396 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
397 "andnps {$src2, $dst|$dst, $src2}", []>;
398 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
399 "andnpd {$src2, $dst|$dst, $src2}", []>;
400 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
401 "andnpd {$src2, $dst|$dst, $src2}", []>;
404 //===----------------------------------------------------------------------===//
405 // SSE packed FP Instructions
406 //===----------------------------------------------------------------------===//
408 // Some 'special' instructions
409 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
410 "#IMPLICIT_DEF $dst",
411 [(set VR128:$dst, (v4f32 (undef)))]>,
415 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
416 "movaps {$src, $dst|$dst, $src}", []>;
417 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
418 "movaps {$src, $dst|$dst, $src}",
419 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
420 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
421 "movapd {$src, $dst|$dst, $src}", []>;
422 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
423 "movapd {$src, $dst|$dst, $src}",
424 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
426 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
427 "movaps {$src, $dst|$dst, $src}",
428 [(store (v4f32 VR128:$src), addr:$dst)]>;
429 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
430 "movapd {$src, $dst|$dst, $src}",
431 [(store (v2f64 VR128:$src), addr:$dst)]>;
433 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
434 "movups {$src, $dst|$dst, $src}", []>;
435 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
436 "movups {$src, $dst|$dst, $src}", []>;
437 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
438 "movups {$src, $dst|$dst, $src}", []>;
439 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
440 "movupd {$src, $dst|$dst, $src}", []>;
441 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
442 "movupd {$src, $dst|$dst, $src}", []>;
443 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
444 "movupd {$src, $dst|$dst, $src}", []>;
446 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
447 "movlps {$src, $dst|$dst, $src}", []>;
448 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
449 "movlps {$src, $dst|$dst, $src}", []>;
450 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
451 "movlpd {$src, $dst|$dst, $src}", []>;
452 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
453 "movlpd {$src, $dst|$dst, $src}", []>;
455 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
456 "movhps {$src, $dst|$dst, $src}", []>;
457 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
458 "movhps {$src, $dst|$dst, $src}", []>;
459 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
460 "movhpd {$src, $dst|$dst, $src}", []>;
461 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
462 "movhpd {$src, $dst|$dst, $src}", []>;
464 let isTwoAddress = 1 in {
465 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
466 "movlhps {$src2, $dst|$dst, $src2}", []>;
468 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
469 "movlhps {$src2, $dst|$dst, $src2}", []>;
472 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
473 "movmskps {$src, $dst|$dst, $src}",
474 [(set R32:$dst, (int_x86_sse_movmskps VR128:$src))]>;
475 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
476 "movmskpd {$src, $dst|$dst, $src}",
477 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
479 // Conversion instructions
480 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
481 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
482 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
483 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
484 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
485 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
486 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
487 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
489 // SSE2 instructions without OpSize prefix
490 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
491 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
493 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
494 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
497 // SSE2 instructions with XS prefix
498 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
499 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
500 XS, Requires<[HasSSE2]>;
501 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
502 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
503 XS, Requires<[HasSSE2]>;
505 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
506 "cvtps2pi {$src, $dst|$dst, $src}", []>;
507 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
508 "cvtps2pi {$src, $dst|$dst, $src}", []>;
509 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
510 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
511 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
512 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
514 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
515 "cvtps2dq {$src, $dst|$dst, $src}", []>;
516 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
517 "cvtps2dq {$src, $dst|$dst, $src}", []>;
518 // SSE2 packed instructions with XD prefix
519 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
520 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
521 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
522 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
524 // SSE2 instructions without OpSize prefix
525 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
526 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
528 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
529 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
532 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
533 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
534 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
535 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
538 let isTwoAddress = 1 in {
539 let isCommutable = 1 in {
540 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
541 "addps {$src2, $dst|$dst, $src2}",
542 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
543 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
544 "addpd {$src2, $dst|$dst, $src2}",
545 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
546 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
547 "mulps {$src2, $dst|$dst, $src2}",
548 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
549 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
550 "mulpd {$src2, $dst|$dst, $src2}",
551 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
554 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
555 "addps {$src2, $dst|$dst, $src2}",
556 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
557 (load addr:$src2))))]>;
558 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
559 "addpd {$src2, $dst|$dst, $src2}",
560 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
561 (load addr:$src2))))]>;
562 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
563 "mulps {$src2, $dst|$dst, $src2}",
564 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
565 (load addr:$src2))))]>;
566 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
567 "mulpd {$src2, $dst|$dst, $src2}",
568 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
569 (load addr:$src2))))]>;
571 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
572 "divps {$src2, $dst|$dst, $src2}",
573 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
574 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
575 "divps {$src2, $dst|$dst, $src2}",
576 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
577 (load addr:$src2))))]>;
578 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
579 "divpd {$src2, $dst|$dst, $src2}",
580 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
581 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
582 "divpd {$src2, $dst|$dst, $src2}",
583 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
584 (load addr:$src2))))]>;
586 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
587 "subps {$src2, $dst|$dst, $src2}",
588 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
589 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
590 "subps {$src2, $dst|$dst, $src2}",
591 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
592 (load addr:$src2))))]>;
593 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
594 "subpd {$src2, $dst|$dst, $src2}",
595 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
596 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
597 "subpd {$src2, $dst|$dst, $src2}",
598 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
599 (load addr:$src2))))]>;
602 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
603 "sqrtps {$src, $dst|$dst, $src}",
604 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
605 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
606 "sqrtps {$src, $dst|$dst, $src}",
607 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
608 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
609 "sqrtpd {$src, $dst|$dst, $src}",
610 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
611 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
612 "sqrtpd {$src, $dst|$dst, $src}",
613 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
615 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
616 "rsqrtps {$src, $dst|$dst, $src}", []>;
617 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
618 "rsqrtps {$src, $dst|$dst, $src}", []>;
619 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
620 "rcpps {$src, $dst|$dst, $src}", []>;
621 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
622 "rcpps {$src, $dst|$dst, $src}", []>;
624 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
625 "maxps {$src, $dst|$dst, $src}", []>;
626 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
627 "maxps {$src, $dst|$dst, $src}", []>;
628 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
629 "maxpd {$src, $dst|$dst, $src}", []>;
630 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
631 "maxpd {$src, $dst|$dst, $src}", []>;
632 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
633 "minps {$src, $dst|$dst, $src}", []>;
634 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
635 "minps {$src, $dst|$dst, $src}", []>;
636 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
637 "minpd {$src, $dst|$dst, $src}", []>;
638 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
639 "minpd {$src, $dst|$dst, $src}", []>;
642 let isTwoAddress = 1 in {
643 let isCommutable = 1 in {
644 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
645 "andps {$src2, $dst|$dst, $src2}",
646 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
647 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
648 "andpd {$src2, $dst|$dst, $src2}",
649 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
650 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
651 "orps {$src2, $dst|$dst, $src2}",
652 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
653 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
654 "orpd {$src2, $dst|$dst, $src2}",
655 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
656 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
657 "xorps {$src2, $dst|$dst, $src2}",
658 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
659 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
660 "xorpd {$src2, $dst|$dst, $src2}",
661 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
663 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
664 "andps {$src2, $dst|$dst, $src2}",
665 [(set VR128:$dst, (v4i32 (and VR128:$src1,
666 (load addr:$src2))))]>;
667 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
668 "andpd {$src2, $dst|$dst, $src2}",
669 [(set VR128:$dst, (v2i64 (and VR128:$src1,
670 (load addr:$src2))))]>;
671 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
672 "orps {$src2, $dst|$dst, $src2}",
673 [(set VR128:$dst, (v4i32 (or VR128:$src1,
674 (load addr:$src2))))]>;
675 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
676 "orpd {$src2, $dst|$dst, $src2}",
677 [(set VR128:$dst, (v2i64 (or VR128:$src1,
678 (load addr:$src2))))]>;
679 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
680 "xorps {$src2, $dst|$dst, $src2}",
681 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
682 (load addr:$src2))))]>;
683 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
684 "xorpd {$src2, $dst|$dst, $src2}",
685 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
686 (load addr:$src2))))]>;
687 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
688 "andnps {$src2, $dst|$dst, $src2}",
689 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
691 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
692 "andnps {$src2, $dst|$dst, $src2}",
693 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
694 (load addr:$src2))))]>;
695 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
696 "andnpd {$src2, $dst|$dst, $src2}",
697 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
700 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
701 "andnpd {$src2, $dst|$dst, $src2}",
702 [(set VR128:$dst, (v2i64 (and VR128:$src1,
703 (load addr:$src2))))]>;
706 let isTwoAddress = 1 in {
707 def CMPPSrr : PSI<0xC2, MRMSrcReg,
708 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
709 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
710 def CMPPSrm : PSI<0xC2, MRMSrcMem,
711 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
712 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
713 def CMPPDrr : PDI<0xC2, MRMSrcReg,
714 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
715 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
716 def CMPPDrm : PDI<0xC2, MRMSrcMem,
717 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
718 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
721 // Shuffle and unpack instructions
722 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
723 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
724 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
725 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
726 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
727 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
728 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
729 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
730 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
731 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
732 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
733 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
735 let isTwoAddress = 1 in {
736 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
737 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
738 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
739 [(set VR128:$dst, (vector_shuffle
740 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
741 SHUFP_shuffle_mask:$src3))]>;
742 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
743 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
744 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
745 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
746 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
747 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
748 [(set VR128:$dst, (vector_shuffle
749 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
750 SHUFP_shuffle_mask:$src3))]>;
751 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
752 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
753 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
755 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
756 (ops VR128:$dst, VR128:$src1, VR128:$src2),
757 "unpckhps {$src2, $dst|$dst, $src2}", []>;
758 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
759 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
760 "unpckhps {$src2, $dst|$dst, $src2}", []>;
761 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
762 (ops VR128:$dst, VR128:$src1, VR128:$src2),
763 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
764 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
765 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
766 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
767 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
768 (ops VR128:$dst, VR128:$src1, VR128:$src2),
769 "unpcklps {$src2, $dst|$dst, $src2}", []>;
770 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
771 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
772 "unpcklps {$src2, $dst|$dst, $src2}", []>;
773 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
774 (ops VR128:$dst, VR128:$src1, VR128:$src2),
775 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
776 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
777 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
778 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
781 //===----------------------------------------------------------------------===//
782 // SSE integer instructions
783 //===----------------------------------------------------------------------===//
786 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
787 "movd {$src, $dst|$dst, $src}",
789 (v4i32 (scalar_to_vector R32:$src)))]>;
790 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
791 "movd {$src, $dst|$dst, $src}", []>;
792 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
793 "movd {$src, $dst|$dst, $src}", []>;
795 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
796 "movdqa {$src, $dst|$dst, $src}", []>;
797 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
798 "movdqa {$src, $dst|$dst, $src}",
799 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
800 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
801 "movdqa {$src, $dst|$dst, $src}",
802 [(store (v4i32 VR128:$src), addr:$dst)]>;
804 // SSE2 instructions with XS prefix
805 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
806 "movq {$src, $dst|$dst, $src}",
808 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
810 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
811 "movq {$src, $dst|$dst, $src}", []>, XS;
813 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
814 "movq {$src, $dst|$dst, $src}", []>;
816 // 128-bit Integer Arithmetic
817 let isTwoAddress = 1 in {
818 let isCommutable = 1 in {
819 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
820 "paddb {$src2, $dst|$dst, $src2}",
821 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
822 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
823 "paddw {$src2, $dst|$dst, $src2}",
824 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
825 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
826 "paddd {$src2, $dst|$dst, $src2}",
827 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
829 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
830 "paddb {$src2, $dst|$dst, $src2}",
831 [(set VR128:$dst, (v16i8 (add VR128:$src1,
832 (load addr:$src2))))]>;
833 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
834 "paddw {$src2, $dst|$dst, $src2}",
835 [(set VR128:$dst, (v8i16 (add VR128:$src1,
836 (load addr:$src2))))]>;
837 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
838 "paddd {$src2, $dst|$dst, $src2}",
839 [(set VR128:$dst, (v4i32 (add VR128:$src1,
840 (load addr:$src2))))]>;
843 //===----------------------------------------------------------------------===//
844 // Miscellaneous Instructions
845 //===----------------------------------------------------------------------===//
847 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
848 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
850 //===----------------------------------------------------------------------===//
851 // Alias Instructions
852 //===----------------------------------------------------------------------===//
854 // Alias instructions that map zero vector to xorp* for sse.
855 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
856 def VZEROv16i8 : I<0xEF, MRMInitReg, (ops VR128:$dst),
857 "pxor $dst, $dst", [(set VR128:$dst, (v16i8 vecimm0))]>,
858 Requires<[HasSSE2]>, TB, OpSize;
859 def VZEROv8i16 : I<0xEF, MRMInitReg, (ops VR128:$dst),
860 "pxor $dst, $dst", [(set VR128:$dst, (v8i16 vecimm0))]>,
861 Requires<[HasSSE2]>, TB, OpSize;
862 def VZEROv4i32 : I<0xEF, MRMInitReg, (ops VR128:$dst),
863 "pxor $dst, $dst", [(set VR128:$dst, (v4i32 vecimm0))]>,
864 Requires<[HasSSE2]>, TB, OpSize;
865 def VZEROv2i64 : I<0xEF, MRMInitReg, (ops VR128:$dst),
866 "pxor $dst, $dst", [(set VR128:$dst, (v2i64 vecimm0))]>,
867 Requires<[HasSSE2]>, TB, OpSize;
868 def VZEROv4f32 : PSI<0x57, MRMInitReg, (ops VR128:$dst),
869 "xorps $dst, $dst", [(set VR128:$dst, (v4f32 vecimm0))]>;
870 def VZEROv2f64 : PDI<0x57, MRMInitReg, (ops VR128:$dst),
871 "xorpd $dst, $dst", [(set VR128:$dst, (v2f64 vecimm0))]>;
873 def FR32ToV4F32 : PSI<0x28, MRMSrcReg, (ops VR128:$dst, FR32:$src),
874 "movaps {$src, $dst|$dst, $src}",
876 (v4f32 (scalar_to_vector FR32:$src)))]>;
878 def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src),
879 "movapd {$src, $dst|$dst, $src}",
881 (v2f64 (scalar_to_vector FR64:$src)))]>;
883 //===----------------------------------------------------------------------===//
884 // Non-Instruction Patterns
885 //===----------------------------------------------------------------------===//
887 // 128-bit vector undef's.
888 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
889 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
890 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
891 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
892 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
894 // Load 128-bit integer vector values.
895 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
897 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
899 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
901 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
904 // Store 128-bit integer vector values.
905 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
906 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
907 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
908 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
909 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
910 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
911 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
912 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
914 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
916 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
918 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
922 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
923 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
925 // Splat v4f32 / v4i32
926 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
927 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
929 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
930 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
933 // Splat v2f64 / v2i64
934 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
935 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
936 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
937 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
939 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
940 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
941 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
943 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
944 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
947 // Shuffle v2f64 / v2i64
948 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
949 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
950 (v2f64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
951 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
952 MOVHLPS_shuffle_mask:$sm),
953 (v2f64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
954 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
955 UNPCKHPD_shuffle_mask:$sm),
956 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
957 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (loadv2f64 addr:$src2),
958 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
959 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
961 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
962 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
963 (v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
964 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
965 MOVHLPS_shuffle_mask:$sm),
966 (v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
967 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
968 UNPCKHPD_shuffle_mask:$sm),
969 (v2i64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
970 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (loadv2i64 addr:$src2),
971 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
972 (v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;