1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def fp32imm0 : PatLeaf<(f32 fpimm), [{
49 return N->isExactlyValue(+0.0);
52 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
54 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
55 return getI8Imm(X86::getShuffleSHUFImmediate(N));
58 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
60 }], SHUFFLE_get_shuf_imm>;
62 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
63 return X86::isSplatMask(N);
66 def MOVLHPS_shuffle_mask : PatLeaf<(build_vector), [{
67 return X86::isMOVLHPSMask(N);
70 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
71 return X86::isMOVHLPSMask(N);
74 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
75 return X86::isUNPCKLMask(N);
78 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
79 return X86::isUNPCKHMask(N);
82 // Only use PSHUF if it is not a splat.
83 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
84 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
85 }], SHUFFLE_get_shuf_imm>;
87 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
88 return X86::isSHUFPMask(N);
89 }], SHUFFLE_get_shuf_imm>;
91 //===----------------------------------------------------------------------===//
92 // SSE scalar FP Instructions
93 //===----------------------------------------------------------------------===//
95 // Instruction templates
96 // SSI - SSE1 instructions with XS prefix.
97 // SDI - SSE2 instructions with XD prefix.
98 // PSI - SSE1 instructions with TB prefix.
99 // PDI - SSE2 instructions with TB and OpSize prefixes.
100 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
101 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
102 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
103 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
104 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
105 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
106 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
107 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
108 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
109 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
110 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
111 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
112 let Pattern = pattern;
114 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
115 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
116 let Pattern = pattern;
119 // Some 'special' instructions
120 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
121 "#IMPLICIT_DEF $dst",
122 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
123 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
124 "#IMPLICIT_DEF $dst",
125 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
127 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
128 // scheduler into a branch sequence.
129 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
130 def CMOV_FR32 : I<0, Pseudo,
131 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
132 "#CMOV_FR32 PSEUDO!",
133 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
134 def CMOV_FR64 : I<0, Pseudo,
135 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
136 "#CMOV_FR64 PSEUDO!",
137 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
141 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
142 "movss {$src, $dst|$dst, $src}", []>;
143 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
144 "movss {$src, $dst|$dst, $src}",
145 [(set FR32:$dst, (loadf32 addr:$src))]>;
146 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
147 "movsd {$src, $dst|$dst, $src}", []>;
148 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
149 "movsd {$src, $dst|$dst, $src}",
150 [(set FR64:$dst, (loadf64 addr:$src))]>;
152 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
153 "movss {$src, $dst|$dst, $src}",
154 [(store FR32:$src, addr:$dst)]>;
155 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
156 "movsd {$src, $dst|$dst, $src}",
157 [(store FR64:$src, addr:$dst)]>;
159 // FR32 / FR64 to 128-bit vector conversion.
160 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
161 "movss {$src, $dst|$dst, $src}",
163 (v4f32 (scalar_to_vector FR32:$src)))]>;
164 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
165 "movss {$src, $dst|$dst, $src}",
167 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
168 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
169 "movsd {$src, $dst|$dst, $src}",
171 (v2f64 (scalar_to_vector FR64:$src)))]>;
172 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
173 "movsd {$src, $dst|$dst, $src}",
175 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
178 // Conversion instructions
179 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
180 "cvtss2si {$src, $dst|$dst, $src}", []>;
181 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
182 "cvtss2si {$src, $dst|$dst, $src}", []>;
184 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
185 "cvttss2si {$src, $dst|$dst, $src}",
186 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
187 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
188 "cvttss2si {$src, $dst|$dst, $src}",
189 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
190 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
191 "cvttsd2si {$src, $dst|$dst, $src}",
192 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
193 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
194 "cvttsd2si {$src, $dst|$dst, $src}",
195 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
196 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
197 "cvtsd2ss {$src, $dst|$dst, $src}",
198 [(set FR32:$dst, (fround FR64:$src))]>;
199 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
200 "cvtsd2ss {$src, $dst|$dst, $src}",
201 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
202 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
203 "cvtsi2ss {$src, $dst|$dst, $src}",
204 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
205 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
206 "cvtsi2ss {$src, $dst|$dst, $src}",
207 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
208 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
209 "cvtsi2sd {$src, $dst|$dst, $src}",
210 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
211 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
212 "cvtsi2sd {$src, $dst|$dst, $src}",
213 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
214 // SSE2 instructions with XS prefix
215 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
216 "cvtss2sd {$src, $dst|$dst, $src}",
217 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
219 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
220 "cvtss2sd {$src, $dst|$dst, $src}",
221 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
224 // Arithmetic instructions
225 let isTwoAddress = 1 in {
226 let isCommutable = 1 in {
227 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
228 "addss {$src2, $dst|$dst, $src2}",
229 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
230 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
231 "addsd {$src2, $dst|$dst, $src2}",
232 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
233 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
234 "mulss {$src2, $dst|$dst, $src2}",
235 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
236 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
237 "mulsd {$src2, $dst|$dst, $src2}",
238 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
241 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
242 "addss {$src2, $dst|$dst, $src2}",
243 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
244 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
245 "addsd {$src2, $dst|$dst, $src2}",
246 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
247 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
248 "mulss {$src2, $dst|$dst, $src2}",
249 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
250 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
251 "mulsd {$src2, $dst|$dst, $src2}",
252 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
254 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
255 "divss {$src2, $dst|$dst, $src2}",
256 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
257 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
258 "divss {$src2, $dst|$dst, $src2}",
259 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
260 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
261 "divsd {$src2, $dst|$dst, $src2}",
262 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
263 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
264 "divsd {$src2, $dst|$dst, $src2}",
265 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
267 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
268 "subss {$src2, $dst|$dst, $src2}",
269 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
270 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
271 "subss {$src2, $dst|$dst, $src2}",
272 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
273 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
274 "subsd {$src2, $dst|$dst, $src2}",
275 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
276 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
277 "subsd {$src2, $dst|$dst, $src2}",
278 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
281 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
282 "sqrtss {$src, $dst|$dst, $src}",
283 [(set FR32:$dst, (fsqrt FR32:$src))]>;
284 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
285 "sqrtss {$src, $dst|$dst, $src}",
286 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
287 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
288 "sqrtsd {$src, $dst|$dst, $src}",
289 [(set FR64:$dst, (fsqrt FR64:$src))]>;
290 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
291 "sqrtsd {$src, $dst|$dst, $src}",
292 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
294 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
295 "rsqrtss {$src, $dst|$dst, $src}", []>;
296 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
297 "rsqrtss {$src, $dst|$dst, $src}", []>;
298 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
299 "rcpss {$src, $dst|$dst, $src}", []>;
300 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
301 "rcpss {$src, $dst|$dst, $src}", []>;
303 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
304 "maxss {$src, $dst|$dst, $src}", []>;
305 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
306 "maxss {$src, $dst|$dst, $src}", []>;
307 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
308 "maxsd {$src, $dst|$dst, $src}", []>;
309 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
310 "maxsd {$src, $dst|$dst, $src}", []>;
311 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
312 "minss {$src, $dst|$dst, $src}", []>;
313 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
314 "minss {$src, $dst|$dst, $src}", []>;
315 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
316 "minsd {$src, $dst|$dst, $src}", []>;
317 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
318 "minsd {$src, $dst|$dst, $src}", []>;
320 // Comparison instructions
321 let isTwoAddress = 1 in {
322 def CMPSSrr : SSI<0xC2, MRMSrcReg,
323 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
324 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
325 def CMPSSrm : SSI<0xC2, MRMSrcMem,
326 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
327 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
328 def CMPSDrr : SDI<0xC2, MRMSrcReg,
329 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
330 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
331 def CMPSDrm : SDI<0xC2, MRMSrcMem,
332 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
333 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
336 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
337 "ucomiss {$src2, $src1|$src1, $src2}",
338 [(X86cmp FR32:$src1, FR32:$src2)]>;
339 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
340 "ucomiss {$src2, $src1|$src1, $src2}",
341 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
342 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
343 "ucomisd {$src2, $src1|$src1, $src2}",
344 [(X86cmp FR64:$src1, FR64:$src2)]>;
345 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
346 "ucomisd {$src2, $src1|$src1, $src2}",
347 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
349 // Aliases of packed instructions for scalar use. These all have names that
352 // Alias instructions that map fld0 to pxor for sse.
353 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
354 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
355 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
356 Requires<[HasSSE1]>, TB, OpSize;
357 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
358 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
359 Requires<[HasSSE2]>, TB, OpSize;
361 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
362 // Upper bits are disregarded.
363 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
364 "movaps {$src, $dst|$dst, $src}", []>;
365 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
366 "movapd {$src, $dst|$dst, $src}", []>;
368 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
369 // Upper bits are disregarded.
370 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
371 "movaps {$src, $dst|$dst, $src}",
372 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
373 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
374 "movapd {$src, $dst|$dst, $src}",
375 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
377 // Alias bitwise logical operations using SSE logical ops on packed FP values.
378 let isTwoAddress = 1 in {
379 let isCommutable = 1 in {
380 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
381 "andps {$src2, $dst|$dst, $src2}",
382 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
383 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
384 "andpd {$src2, $dst|$dst, $src2}",
385 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
386 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
387 "orps {$src2, $dst|$dst, $src2}", []>;
388 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
389 "orpd {$src2, $dst|$dst, $src2}", []>;
390 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
391 "xorps {$src2, $dst|$dst, $src2}",
392 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
393 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
394 "xorpd {$src2, $dst|$dst, $src2}",
395 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
397 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
398 "andps {$src2, $dst|$dst, $src2}",
399 [(set FR32:$dst, (X86fand FR32:$src1,
400 (X86loadpf32 addr:$src2)))]>;
401 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
402 "andpd {$src2, $dst|$dst, $src2}",
403 [(set FR64:$dst, (X86fand FR64:$src1,
404 (X86loadpf64 addr:$src2)))]>;
405 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
406 "orps {$src2, $dst|$dst, $src2}", []>;
407 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
408 "orpd {$src2, $dst|$dst, $src2}", []>;
409 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
410 "xorps {$src2, $dst|$dst, $src2}",
411 [(set FR32:$dst, (X86fxor FR32:$src1,
412 (X86loadpf32 addr:$src2)))]>;
413 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
414 "xorpd {$src2, $dst|$dst, $src2}",
415 [(set FR64:$dst, (X86fxor FR64:$src1,
416 (X86loadpf64 addr:$src2)))]>;
418 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
419 "andnps {$src2, $dst|$dst, $src2}", []>;
420 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
421 "andnps {$src2, $dst|$dst, $src2}", []>;
422 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
423 "andnpd {$src2, $dst|$dst, $src2}", []>;
424 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
425 "andnpd {$src2, $dst|$dst, $src2}", []>;
428 //===----------------------------------------------------------------------===//
429 // SSE packed FP Instructions
430 //===----------------------------------------------------------------------===//
432 // Some 'special' instructions
433 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
434 "#IMPLICIT_DEF $dst",
435 [(set VR128:$dst, (v4f32 (undef)))]>,
439 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
440 "movaps {$src, $dst|$dst, $src}", []>;
441 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
442 "movaps {$src, $dst|$dst, $src}",
443 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
444 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
445 "movapd {$src, $dst|$dst, $src}", []>;
446 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
447 "movapd {$src, $dst|$dst, $src}",
448 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
450 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
451 "movaps {$src, $dst|$dst, $src}",
452 [(store (v4f32 VR128:$src), addr:$dst)]>;
453 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
454 "movapd {$src, $dst|$dst, $src}",
455 [(store (v2f64 VR128:$src), addr:$dst)]>;
457 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
458 "movups {$src, $dst|$dst, $src}", []>;
459 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
460 "movups {$src, $dst|$dst, $src}", []>;
461 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
462 "movups {$src, $dst|$dst, $src}", []>;
463 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
464 "movupd {$src, $dst|$dst, $src}", []>;
465 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
466 "movupd {$src, $dst|$dst, $src}", []>;
467 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
468 "movupd {$src, $dst|$dst, $src}", []>;
470 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
471 "movlps {$src, $dst|$dst, $src}", []>;
472 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
473 "movlps {$src, $dst|$dst, $src}", []>;
474 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
475 "movlpd {$src, $dst|$dst, $src}", []>;
476 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
477 "movlpd {$src, $dst|$dst, $src}", []>;
479 let isTwoAddress = 1 in {
480 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
481 "movhps {$src2, $dst|$dst, $src2}", []>;
482 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
483 "movhpd {$src2, $dst|$dst, $src2}",
485 (v2f64 (vector_shuffle VR128:$src1,
486 (scalar_to_vector (loadf64 addr:$src2)),
487 UNPCKL_shuffle_mask)))]>;
490 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
491 "movhps {$src, $dst|$dst, $src}", []>;
492 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
493 "movhpd {$src, $dst|$dst, $src}", []>;
495 let isTwoAddress = 1 in {
496 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
497 "movlhps {$src2, $dst|$dst, $src2}",
499 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
500 MOVLHPS_shuffle_mask)))]>;
502 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
503 "movlhps {$src2, $dst|$dst, $src2}",
505 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
506 MOVHLPS_shuffle_mask)))]>;
509 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
510 "movmskps {$src, $dst|$dst, $src}",
511 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
512 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
513 "movmskpd {$src, $dst|$dst, $src}",
514 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
516 // Conversion instructions
517 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
518 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
519 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
520 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
521 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
522 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
523 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
524 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
526 // SSE2 instructions without OpSize prefix
527 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
528 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
530 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
531 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
534 // SSE2 instructions with XS prefix
535 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
536 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
537 XS, Requires<[HasSSE2]>;
538 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
539 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
540 XS, Requires<[HasSSE2]>;
542 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
543 "cvtps2pi {$src, $dst|$dst, $src}", []>;
544 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
545 "cvtps2pi {$src, $dst|$dst, $src}", []>;
546 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
547 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
548 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
549 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
551 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
552 "cvtps2dq {$src, $dst|$dst, $src}", []>;
553 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
554 "cvtps2dq {$src, $dst|$dst, $src}", []>;
555 // SSE2 packed instructions with XD prefix
556 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
557 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
558 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
559 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
561 // SSE2 instructions without OpSize prefix
562 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
563 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
565 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
566 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
569 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
570 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
571 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
572 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
575 let isTwoAddress = 1 in {
576 let isCommutable = 1 in {
577 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
578 "addps {$src2, $dst|$dst, $src2}",
579 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
580 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
581 "addpd {$src2, $dst|$dst, $src2}",
582 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
583 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
584 "mulps {$src2, $dst|$dst, $src2}",
585 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
586 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
587 "mulpd {$src2, $dst|$dst, $src2}",
588 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
591 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
592 "addps {$src2, $dst|$dst, $src2}",
593 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
594 (load addr:$src2))))]>;
595 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
596 "addpd {$src2, $dst|$dst, $src2}",
597 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
598 (load addr:$src2))))]>;
599 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
600 "mulps {$src2, $dst|$dst, $src2}",
601 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
602 (load addr:$src2))))]>;
603 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
604 "mulpd {$src2, $dst|$dst, $src2}",
605 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
606 (load addr:$src2))))]>;
608 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
609 "divps {$src2, $dst|$dst, $src2}",
610 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
611 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
612 "divps {$src2, $dst|$dst, $src2}",
613 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
614 (load addr:$src2))))]>;
615 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
616 "divpd {$src2, $dst|$dst, $src2}",
617 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
618 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
619 "divpd {$src2, $dst|$dst, $src2}",
620 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
621 (load addr:$src2))))]>;
623 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
624 "subps {$src2, $dst|$dst, $src2}",
625 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
626 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
627 "subps {$src2, $dst|$dst, $src2}",
628 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
629 (load addr:$src2))))]>;
630 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
631 "subpd {$src2, $dst|$dst, $src2}",
632 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
633 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
634 "subpd {$src2, $dst|$dst, $src2}",
635 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
636 (load addr:$src2))))]>;
639 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
640 "sqrtps {$src, $dst|$dst, $src}",
641 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
642 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
643 "sqrtps {$src, $dst|$dst, $src}",
644 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
645 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
646 "sqrtpd {$src, $dst|$dst, $src}",
647 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
648 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
649 "sqrtpd {$src, $dst|$dst, $src}",
650 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
652 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
653 "rsqrtps {$src, $dst|$dst, $src}", []>;
654 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
655 "rsqrtps {$src, $dst|$dst, $src}", []>;
656 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
657 "rcpps {$src, $dst|$dst, $src}", []>;
658 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
659 "rcpps {$src, $dst|$dst, $src}", []>;
661 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
662 "maxps {$src, $dst|$dst, $src}", []>;
663 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
664 "maxps {$src, $dst|$dst, $src}", []>;
665 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
666 "maxpd {$src, $dst|$dst, $src}", []>;
667 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
668 "maxpd {$src, $dst|$dst, $src}", []>;
669 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
670 "minps {$src, $dst|$dst, $src}", []>;
671 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
672 "minps {$src, $dst|$dst, $src}", []>;
673 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
674 "minpd {$src, $dst|$dst, $src}", []>;
675 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
676 "minpd {$src, $dst|$dst, $src}", []>;
679 let isTwoAddress = 1 in {
680 let isCommutable = 1 in {
681 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
682 "andps {$src2, $dst|$dst, $src2}",
683 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
684 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
685 "andpd {$src2, $dst|$dst, $src2}",
686 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
687 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
688 "orps {$src2, $dst|$dst, $src2}",
689 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
690 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
691 "orpd {$src2, $dst|$dst, $src2}",
692 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
693 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
694 "xorps {$src2, $dst|$dst, $src2}",
695 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
696 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
697 "xorpd {$src2, $dst|$dst, $src2}",
698 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
700 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
701 "andps {$src2, $dst|$dst, $src2}",
702 [(set VR128:$dst, (v4i32 (and VR128:$src1,
703 (load addr:$src2))))]>;
704 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
705 "andpd {$src2, $dst|$dst, $src2}",
706 [(set VR128:$dst, (v2i64 (and VR128:$src1,
707 (load addr:$src2))))]>;
708 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
709 "orps {$src2, $dst|$dst, $src2}",
710 [(set VR128:$dst, (v4i32 (or VR128:$src1,
711 (load addr:$src2))))]>;
712 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
713 "orpd {$src2, $dst|$dst, $src2}",
714 [(set VR128:$dst, (v2i64 (or VR128:$src1,
715 (load addr:$src2))))]>;
716 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
717 "xorps {$src2, $dst|$dst, $src2}",
718 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
719 (load addr:$src2))))]>;
720 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
721 "xorpd {$src2, $dst|$dst, $src2}",
722 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
723 (load addr:$src2))))]>;
724 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
725 "andnps {$src2, $dst|$dst, $src2}",
726 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
728 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
729 "andnps {$src2, $dst|$dst, $src2}",
730 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
731 (load addr:$src2))))]>;
732 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
733 "andnpd {$src2, $dst|$dst, $src2}",
734 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
737 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
738 "andnpd {$src2, $dst|$dst, $src2}",
739 [(set VR128:$dst, (v2i64 (and VR128:$src1,
740 (load addr:$src2))))]>;
743 let isTwoAddress = 1 in {
744 def CMPPSrr : PSI<0xC2, MRMSrcReg,
745 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
746 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
747 def CMPPSrm : PSI<0xC2, MRMSrcMem,
748 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
749 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
750 def CMPPDrr : PDI<0xC2, MRMSrcReg,
751 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
752 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
753 def CMPPDrm : PDI<0xC2, MRMSrcMem,
754 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
755 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
758 // Shuffle and unpack instructions
759 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
760 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
761 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
762 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
763 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
764 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
765 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
766 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
767 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
768 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
769 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
770 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
772 let isTwoAddress = 1 in {
773 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
774 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
775 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
776 [(set VR128:$dst, (vector_shuffle
777 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
778 SHUFP_shuffle_mask:$src3))]>;
779 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
780 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
781 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
782 [(set VR128:$dst, (vector_shuffle
783 (v4f32 VR128:$src1), (load addr:$src2),
784 SHUFP_shuffle_mask:$src3))]>;
785 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
786 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
787 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
788 [(set VR128:$dst, (vector_shuffle
789 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
790 SHUFP_shuffle_mask:$src3))]>;
791 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
792 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
793 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
794 [(set VR128:$dst, (vector_shuffle
795 (v2f64 VR128:$src1), (load addr:$src2),
796 SHUFP_shuffle_mask:$src3))]>;
798 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
799 (ops VR128:$dst, VR128:$src1, VR128:$src2),
800 "unpckhps {$src2, $dst|$dst, $src2}",
802 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
803 UNPCKH_shuffle_mask)))]>;
804 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
805 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
806 "unpckhps {$src2, $dst|$dst, $src2}",
808 (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
809 UNPCKH_shuffle_mask)))]>;
810 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
811 (ops VR128:$dst, VR128:$src1, VR128:$src2),
812 "unpckhpd {$src2, $dst|$dst, $src2}",
814 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
815 UNPCKH_shuffle_mask)))]>;
816 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
817 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
818 "unpckhpd {$src2, $dst|$dst, $src2}",
820 (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
821 UNPCKH_shuffle_mask)))]>;
823 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
824 (ops VR128:$dst, VR128:$src1, VR128:$src2),
825 "unpcklps {$src2, $dst|$dst, $src2}",
827 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
828 UNPCKL_shuffle_mask)))]>;
829 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
830 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
831 "unpcklps {$src2, $dst|$dst, $src2}",
833 (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
834 UNPCKL_shuffle_mask)))]>;
835 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
836 (ops VR128:$dst, VR128:$src1, VR128:$src2),
837 "unpcklpd {$src2, $dst|$dst, $src2}",
839 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
840 UNPCKL_shuffle_mask)))]>;
841 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
842 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
843 "unpcklpd {$src2, $dst|$dst, $src2}",
845 (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
846 UNPCKL_shuffle_mask)))]>;
849 //===----------------------------------------------------------------------===//
850 // SSE integer instructions
851 //===----------------------------------------------------------------------===//
854 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
855 "movd {$src, $dst|$dst, $src}",
857 (v4i32 (scalar_to_vector R32:$src)))]>;
858 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
859 "movd {$src, $dst|$dst, $src}",
861 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
863 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
864 "movd {$src, $dst|$dst, $src}", []>;
866 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
867 "movdqa {$src, $dst|$dst, $src}", []>;
868 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
869 "movdqa {$src, $dst|$dst, $src}",
870 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
871 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
872 "movdqa {$src, $dst|$dst, $src}",
873 [(store (v4i32 VR128:$src), addr:$dst)]>;
875 // SSE2 instructions with XS prefix
876 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
877 "movq {$src, $dst|$dst, $src}",
879 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
881 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
882 "movq {$src, $dst|$dst, $src}", []>, XS,
884 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
885 "movq {$src, $dst|$dst, $src}", []>;
887 // 128-bit Integer Arithmetic
888 let isTwoAddress = 1 in {
889 let isCommutable = 1 in {
890 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
891 "paddb {$src2, $dst|$dst, $src2}",
892 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
893 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
894 "paddw {$src2, $dst|$dst, $src2}",
895 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
896 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
897 "paddd {$src2, $dst|$dst, $src2}",
898 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
900 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
901 "paddb {$src2, $dst|$dst, $src2}",
902 [(set VR128:$dst, (v16i8 (add VR128:$src1,
903 (load addr:$src2))))]>;
904 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
905 "paddw {$src2, $dst|$dst, $src2}",
906 [(set VR128:$dst, (v8i16 (add VR128:$src1,
907 (load addr:$src2))))]>;
908 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
909 "paddd {$src2, $dst|$dst, $src2}",
910 [(set VR128:$dst, (v4i32 (add VR128:$src1,
911 (load addr:$src2))))]>;
913 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
914 "psubb {$src2, $dst|$dst, $src2}",
915 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
916 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
917 "psubw {$src2, $dst|$dst, $src2}",
918 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
919 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
920 "psubd {$src2, $dst|$dst, $src2}",
921 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
923 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
924 "psubb {$src2, $dst|$dst, $src2}",
925 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
926 (load addr:$src2))))]>;
927 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
928 "psubw {$src2, $dst|$dst, $src2}",
929 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
930 (load addr:$src2))))]>;
931 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
932 "psubd {$src2, $dst|$dst, $src2}",
933 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
934 (load addr:$src2))))]>;
936 // Unpack and interleave
937 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
938 (ops VR128:$dst, VR128:$src1, VR128:$src2),
939 "punpcklbw {$src2, $dst|$dst, $src2}",
941 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
942 UNPCKL_shuffle_mask)))]>;
943 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
944 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
945 "punpcklbw {$src2, $dst|$dst, $src2}",
947 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
948 UNPCKL_shuffle_mask)))]>;
949 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
950 (ops VR128:$dst, VR128:$src1, VR128:$src2),
951 "punpcklwd {$src2, $dst|$dst, $src2}",
953 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
954 UNPCKL_shuffle_mask)))]>;
955 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
956 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
957 "punpcklwd {$src2, $dst|$dst, $src2}",
959 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
960 UNPCKL_shuffle_mask)))]>;
961 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
962 (ops VR128:$dst, VR128:$src1, VR128:$src2),
963 "punpckldq {$src2, $dst|$dst, $src2}",
965 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
966 UNPCKL_shuffle_mask)))]>;
967 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
968 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
969 "punpckldq {$src2, $dst|$dst, $src2}",
971 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
972 UNPCKL_shuffle_mask)))]>;
973 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
974 (ops VR128:$dst, VR128:$src1, VR128:$src2),
975 "punpcklqdq {$src2, $dst|$dst, $src2}",
977 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
978 UNPCKL_shuffle_mask)))]>;
979 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
980 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
981 "punpcklqdq {$src2, $dst|$dst, $src2}",
983 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
984 UNPCKL_shuffle_mask)))]>;
986 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
987 (ops VR128:$dst, VR128:$src1, VR128:$src2),
988 "punpckhbw {$src2, $dst|$dst, $src2}",
990 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
991 UNPCKH_shuffle_mask)))]>;
992 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
993 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
994 "punpckhbw {$src2, $dst|$dst, $src2}",
996 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
997 UNPCKH_shuffle_mask)))]>;
998 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
999 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1000 "punpckhwd {$src2, $dst|$dst, $src2}",
1002 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1003 UNPCKH_shuffle_mask)))]>;
1004 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1005 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1006 "punpckhwd {$src2, $dst|$dst, $src2}",
1008 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1009 UNPCKH_shuffle_mask)))]>;
1010 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1011 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1012 "punpckhdq {$src2, $dst|$dst, $src2}",
1014 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1015 UNPCKH_shuffle_mask)))]>;
1016 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1017 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1018 "punpckhdq {$src2, $dst|$dst, $src2}",
1020 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1021 UNPCKH_shuffle_mask)))]>;
1022 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1023 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1024 "punpckhdq {$src2, $dst|$dst, $src2}",
1026 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1027 UNPCKH_shuffle_mask)))]>;
1028 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1029 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1030 "punpckhqdq {$src2, $dst|$dst, $src2}",
1032 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1033 UNPCKH_shuffle_mask)))]>;
1036 //===----------------------------------------------------------------------===//
1037 // Miscellaneous Instructions
1038 //===----------------------------------------------------------------------===//
1040 // Prefetching loads
1041 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1042 "prefetcht0 $src", []>, TB,
1043 Requires<[HasSSE1]>;
1044 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1045 "prefetcht0 $src", []>, TB,
1046 Requires<[HasSSE1]>;
1047 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1048 "prefetcht0 $src", []>, TB,
1049 Requires<[HasSSE1]>;
1050 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1051 "prefetcht0 $src", []>, TB,
1052 Requires<[HasSSE1]>;
1054 // Non-temporal stores
1055 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1056 "movntq {$src, $dst|$dst, $src}", []>, TB,
1057 Requires<[HasSSE1]>;
1058 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1059 "movntps {$src, $dst|$dst, $src}", []>, TB,
1060 Requires<[HasSSE1]>;
1061 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1062 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1063 Requires<[HasSSE1]>;
1066 def SFENCE : I<0xAE, MRM7m, (ops),
1067 "sfence", []>, TB, Requires<[HasSSE1]>;
1069 // Load MXCSR register
1070 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1071 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1073 //===----------------------------------------------------------------------===//
1074 // Alias Instructions
1075 //===----------------------------------------------------------------------===//
1077 // Alias instructions that map zero vector to pxor / xorp* for sse.
1078 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1079 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1081 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1082 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1084 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1085 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1087 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1089 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1090 "pcmpeqd $dst, $dst",
1091 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1093 // Scalar to 128-bit vector with zero extension.
1094 // Three operand (but two address) aliases.
1095 let isTwoAddress = 1 in {
1096 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1097 "movss {$src2, $dst|$dst, $src2}", []>;
1098 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1099 "movsd {$src2, $dst|$dst, $src2}", []>;
1100 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1101 "movd {$src2, $dst|$dst, $src2}", []>;
1102 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1103 "movq {$src2, $dst|$dst, $src2}", []>;
1106 // Loading from memory automatically zeroing upper bits.
1107 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1108 "movss {$src, $dst|$dst, $src}",
1110 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1111 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1112 "movsd {$src, $dst|$dst, $src}",
1114 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1115 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1116 "movd {$src, $dst|$dst, $src}",
1118 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1120 //===----------------------------------------------------------------------===//
1121 // Non-Instruction Patterns
1122 //===----------------------------------------------------------------------===//
1124 // 128-bit vector undef's.
1125 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1126 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1127 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1128 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1129 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1131 // 128-bit vector all zero's.
1132 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1133 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1134 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1136 // 128-bit vector all one's.
1137 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1138 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1139 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1140 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1141 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1143 // Load 128-bit integer vector values.
1144 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1145 Requires<[HasSSE2]>;
1146 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1147 Requires<[HasSSE2]>;
1148 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1149 Requires<[HasSSE2]>;
1150 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1151 Requires<[HasSSE2]>;
1153 // Store 128-bit integer vector values.
1154 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1155 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1156 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1157 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1158 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1159 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1160 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1161 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1163 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1165 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1166 Requires<[HasSSE2]>;
1167 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1168 Requires<[HasSSE2]>;
1171 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1172 Requires<[HasSSE2]>;
1173 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1174 Requires<[HasSSE2]>;
1176 // Zeroing a VR128 then do a MOVS* to the lower bits.
1177 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1178 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1179 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1180 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1181 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1182 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1183 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1184 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1185 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1186 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1187 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1188 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1190 // Splat v4f32 / v4i32
1191 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1192 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1193 Requires<[HasSSE1]>;
1194 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1195 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1196 Requires<[HasSSE2]>;
1198 // Splat v2f64 / v2i64
1199 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1200 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1201 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1202 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1204 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
1205 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1206 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1207 Requires<[HasSSE2]>;
1208 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1209 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1210 Requires<[HasSSE2]>;