1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 def fp32imm0 : PatLeaf<(f32 fpimm), [{
49 return N->isExactlyValue(+0.0);
52 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
54 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
55 return getI8Imm(X86::getShuffleSHUFImmediate(N));
58 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
60 }], SHUFFLE_get_shuf_imm>;
62 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
63 return X86::isSplatMask(N);
66 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
67 return X86::isMOVHLPSMask(N);
70 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
71 return X86::isUNPCKLMask(N);
74 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
75 return X86::isUNPCKHMask(N);
78 // Only use PSHUF if it is not a splat.
79 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
80 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
81 }], SHUFFLE_get_shuf_imm>;
83 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
84 return X86::isSHUFPMask(N);
85 }], SHUFFLE_get_shuf_imm>;
87 //===----------------------------------------------------------------------===//
88 // SSE scalar FP Instructions
89 //===----------------------------------------------------------------------===//
91 // Instruction templates
92 // SSI - SSE1 instructions with XS prefix.
93 // SDI - SSE2 instructions with XD prefix.
94 // PSI - SSE1 instructions with TB prefix.
95 // PDI - SSE2 instructions with TB and OpSize prefixes.
96 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
97 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
98 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
99 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
100 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
101 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
102 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
103 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
104 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
105 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
106 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
107 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
108 let Pattern = pattern;
110 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
111 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
112 let Pattern = pattern;
115 // Some 'special' instructions
116 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
117 "#IMPLICIT_DEF $dst",
118 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
119 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
120 "#IMPLICIT_DEF $dst",
121 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
123 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
124 // scheduler into a branch sequence.
125 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
126 def CMOV_FR32 : I<0, Pseudo,
127 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
128 "#CMOV_FR32 PSEUDO!",
129 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
130 def CMOV_FR64 : I<0, Pseudo,
131 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
132 "#CMOV_FR64 PSEUDO!",
133 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
137 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
138 "movss {$src, $dst|$dst, $src}", []>;
139 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
140 "movss {$src, $dst|$dst, $src}",
141 [(set FR32:$dst, (loadf32 addr:$src))]>;
142 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
143 "movsd {$src, $dst|$dst, $src}", []>;
144 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
145 "movsd {$src, $dst|$dst, $src}",
146 [(set FR64:$dst, (loadf64 addr:$src))]>;
148 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
149 "movss {$src, $dst|$dst, $src}",
150 [(store FR32:$src, addr:$dst)]>;
151 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
152 "movsd {$src, $dst|$dst, $src}",
153 [(store FR64:$src, addr:$dst)]>;
155 // FR32 / FR64 to 128-bit vector conversion.
156 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
157 "movss {$src, $dst|$dst, $src}",
159 (v4f32 (scalar_to_vector FR32:$src)))]>;
160 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
161 "movss {$src, $dst|$dst, $src}",
163 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
164 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
165 "movsd {$src, $dst|$dst, $src}",
167 (v2f64 (scalar_to_vector FR64:$src)))]>;
168 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
169 "movsd {$src, $dst|$dst, $src}",
171 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
174 // Conversion instructions
175 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
176 "cvtss2si {$src, $dst|$dst, $src}", []>;
177 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
178 "cvtss2si {$src, $dst|$dst, $src}", []>;
180 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
181 "cvttss2si {$src, $dst|$dst, $src}",
182 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
183 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
184 "cvttss2si {$src, $dst|$dst, $src}",
185 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
186 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
187 "cvttsd2si {$src, $dst|$dst, $src}",
188 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
189 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
190 "cvttsd2si {$src, $dst|$dst, $src}",
191 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
192 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
193 "cvtsd2ss {$src, $dst|$dst, $src}",
194 [(set FR32:$dst, (fround FR64:$src))]>;
195 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
196 "cvtsd2ss {$src, $dst|$dst, $src}",
197 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
198 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
199 "cvtsi2ss {$src, $dst|$dst, $src}",
200 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
201 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
202 "cvtsi2ss {$src, $dst|$dst, $src}",
203 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
204 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
205 "cvtsi2sd {$src, $dst|$dst, $src}",
206 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
207 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
208 "cvtsi2sd {$src, $dst|$dst, $src}",
209 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
210 // SSE2 instructions with XS prefix
211 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
212 "cvtss2sd {$src, $dst|$dst, $src}",
213 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
215 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
216 "cvtss2sd {$src, $dst|$dst, $src}",
217 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
220 // Arithmetic instructions
221 let isTwoAddress = 1 in {
222 let isCommutable = 1 in {
223 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
224 "addss {$src2, $dst|$dst, $src2}",
225 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
226 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
227 "addsd {$src2, $dst|$dst, $src2}",
228 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
229 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
230 "mulss {$src2, $dst|$dst, $src2}",
231 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
232 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
233 "mulsd {$src2, $dst|$dst, $src2}",
234 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
237 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
238 "addss {$src2, $dst|$dst, $src2}",
239 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
240 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
241 "addsd {$src2, $dst|$dst, $src2}",
242 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
243 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
244 "mulss {$src2, $dst|$dst, $src2}",
245 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
246 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
247 "mulsd {$src2, $dst|$dst, $src2}",
248 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
250 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
251 "divss {$src2, $dst|$dst, $src2}",
252 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
253 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
254 "divss {$src2, $dst|$dst, $src2}",
255 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
256 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
257 "divsd {$src2, $dst|$dst, $src2}",
258 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
259 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
260 "divsd {$src2, $dst|$dst, $src2}",
261 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
263 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
264 "subss {$src2, $dst|$dst, $src2}",
265 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
266 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
267 "subss {$src2, $dst|$dst, $src2}",
268 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
269 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
270 "subsd {$src2, $dst|$dst, $src2}",
271 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
272 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
273 "subsd {$src2, $dst|$dst, $src2}",
274 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
277 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
278 "sqrtss {$src, $dst|$dst, $src}",
279 [(set FR32:$dst, (fsqrt FR32:$src))]>;
280 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
281 "sqrtss {$src, $dst|$dst, $src}",
282 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
283 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
284 "sqrtsd {$src, $dst|$dst, $src}",
285 [(set FR64:$dst, (fsqrt FR64:$src))]>;
286 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
287 "sqrtsd {$src, $dst|$dst, $src}",
288 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
290 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
291 "rsqrtss {$src, $dst|$dst, $src}", []>;
292 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
293 "rsqrtss {$src, $dst|$dst, $src}", []>;
294 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
295 "rcpss {$src, $dst|$dst, $src}", []>;
296 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
297 "rcpss {$src, $dst|$dst, $src}", []>;
299 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
300 "maxss {$src, $dst|$dst, $src}", []>;
301 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
302 "maxss {$src, $dst|$dst, $src}", []>;
303 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
304 "maxsd {$src, $dst|$dst, $src}", []>;
305 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
306 "maxsd {$src, $dst|$dst, $src}", []>;
307 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
308 "minss {$src, $dst|$dst, $src}", []>;
309 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
310 "minss {$src, $dst|$dst, $src}", []>;
311 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
312 "minsd {$src, $dst|$dst, $src}", []>;
313 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
314 "minsd {$src, $dst|$dst, $src}", []>;
316 // Comparison instructions
317 let isTwoAddress = 1 in {
318 def CMPSSrr : SSI<0xC2, MRMSrcReg,
319 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
320 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
321 def CMPSSrm : SSI<0xC2, MRMSrcMem,
322 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
323 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
324 def CMPSDrr : SDI<0xC2, MRMSrcReg,
325 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
326 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
327 def CMPSDrm : SDI<0xC2, MRMSrcMem,
328 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
329 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
332 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
333 "ucomiss {$src2, $src1|$src1, $src2}",
334 [(X86cmp FR32:$src1, FR32:$src2)]>;
335 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
336 "ucomiss {$src2, $src1|$src1, $src2}",
337 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
338 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
339 "ucomisd {$src2, $src1|$src1, $src2}",
340 [(X86cmp FR64:$src1, FR64:$src2)]>;
341 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
342 "ucomisd {$src2, $src1|$src1, $src2}",
343 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
345 // Aliases of packed instructions for scalar use. These all have names that
348 // Alias instructions that map fld0 to pxor for sse.
349 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
350 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
351 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
352 Requires<[HasSSE1]>, TB, OpSize;
353 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
354 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
355 Requires<[HasSSE2]>, TB, OpSize;
357 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
358 // Upper bits are disregarded.
359 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
360 "movaps {$src, $dst|$dst, $src}", []>;
361 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
362 "movapd {$src, $dst|$dst, $src}", []>;
364 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
365 // Upper bits are disregarded.
366 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
367 "movaps {$src, $dst|$dst, $src}",
368 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
369 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
370 "movapd {$src, $dst|$dst, $src}",
371 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
373 // Alias bitwise logical operations using SSE logical ops on packed FP values.
374 let isTwoAddress = 1 in {
375 let isCommutable = 1 in {
376 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
377 "andps {$src2, $dst|$dst, $src2}",
378 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
379 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
380 "andpd {$src2, $dst|$dst, $src2}",
381 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
382 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
383 "orps {$src2, $dst|$dst, $src2}", []>;
384 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
385 "orpd {$src2, $dst|$dst, $src2}", []>;
386 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
387 "xorps {$src2, $dst|$dst, $src2}",
388 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
389 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
390 "xorpd {$src2, $dst|$dst, $src2}",
391 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
393 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
394 "andps {$src2, $dst|$dst, $src2}",
395 [(set FR32:$dst, (X86fand FR32:$src1,
396 (X86loadpf32 addr:$src2)))]>;
397 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
398 "andpd {$src2, $dst|$dst, $src2}",
399 [(set FR64:$dst, (X86fand FR64:$src1,
400 (X86loadpf64 addr:$src2)))]>;
401 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
402 "orps {$src2, $dst|$dst, $src2}", []>;
403 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
404 "orpd {$src2, $dst|$dst, $src2}", []>;
405 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
406 "xorps {$src2, $dst|$dst, $src2}",
407 [(set FR32:$dst, (X86fxor FR32:$src1,
408 (X86loadpf32 addr:$src2)))]>;
409 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
410 "xorpd {$src2, $dst|$dst, $src2}",
411 [(set FR64:$dst, (X86fxor FR64:$src1,
412 (X86loadpf64 addr:$src2)))]>;
414 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
415 "andnps {$src2, $dst|$dst, $src2}", []>;
416 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
417 "andnps {$src2, $dst|$dst, $src2}", []>;
418 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
419 "andnpd {$src2, $dst|$dst, $src2}", []>;
420 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
421 "andnpd {$src2, $dst|$dst, $src2}", []>;
424 //===----------------------------------------------------------------------===//
425 // SSE packed FP Instructions
426 //===----------------------------------------------------------------------===//
428 // Some 'special' instructions
429 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
430 "#IMPLICIT_DEF $dst",
431 [(set VR128:$dst, (v4f32 (undef)))]>,
435 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
436 "movaps {$src, $dst|$dst, $src}", []>;
437 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
438 "movaps {$src, $dst|$dst, $src}",
439 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
440 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
441 "movapd {$src, $dst|$dst, $src}", []>;
442 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
443 "movapd {$src, $dst|$dst, $src}",
444 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
446 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
447 "movaps {$src, $dst|$dst, $src}",
448 [(store (v4f32 VR128:$src), addr:$dst)]>;
449 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
450 "movapd {$src, $dst|$dst, $src}",
451 [(store (v2f64 VR128:$src), addr:$dst)]>;
453 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
454 "movups {$src, $dst|$dst, $src}", []>;
455 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
456 "movups {$src, $dst|$dst, $src}", []>;
457 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
458 "movups {$src, $dst|$dst, $src}", []>;
459 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
460 "movupd {$src, $dst|$dst, $src}", []>;
461 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
462 "movupd {$src, $dst|$dst, $src}", []>;
463 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
464 "movupd {$src, $dst|$dst, $src}", []>;
466 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
467 "movlps {$src, $dst|$dst, $src}", []>;
468 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
469 "movlps {$src, $dst|$dst, $src}", []>;
470 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
471 "movlpd {$src, $dst|$dst, $src}", []>;
472 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
473 "movlpd {$src, $dst|$dst, $src}", []>;
475 let isTwoAddress = 1 in {
476 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
477 "movhps {$src2, $dst|$dst, $src2}", []>;
478 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
479 "movhpd {$src2, $dst|$dst, $src2}",
481 (v2f64 (vector_shuffle VR128:$src1,
482 (scalar_to_vector (loadf64 addr:$src2)),
483 UNPCKL_shuffle_mask)))]>;
486 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
487 "movhps {$src, $dst|$dst, $src}", []>;
488 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
489 "movhpd {$src, $dst|$dst, $src}", []>;
491 let isTwoAddress = 1 in {
492 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
493 "movlhps {$src2, $dst|$dst, $src2}",
495 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
496 UNPCKL_shuffle_mask)))]>;
498 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
499 "movlhps {$src2, $dst|$dst, $src2}",
501 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
502 MOVHLPS_shuffle_mask)))]>;
505 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
506 "movmskps {$src, $dst|$dst, $src}",
507 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
508 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
509 "movmskpd {$src, $dst|$dst, $src}",
510 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
512 // Conversion instructions
513 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
514 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
515 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
516 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
517 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
518 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
519 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
520 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
522 // SSE2 instructions without OpSize prefix
523 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
524 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
526 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
527 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
530 // SSE2 instructions with XS prefix
531 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
532 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
533 XS, Requires<[HasSSE2]>;
534 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
535 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
536 XS, Requires<[HasSSE2]>;
538 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
539 "cvtps2pi {$src, $dst|$dst, $src}", []>;
540 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
541 "cvtps2pi {$src, $dst|$dst, $src}", []>;
542 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
543 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
544 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
545 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
547 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
548 "cvtps2dq {$src, $dst|$dst, $src}", []>;
549 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
550 "cvtps2dq {$src, $dst|$dst, $src}", []>;
551 // SSE2 packed instructions with XD prefix
552 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
553 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
554 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
555 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
557 // SSE2 instructions without OpSize prefix
558 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
559 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
561 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
562 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
565 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
566 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
567 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
568 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
571 let isTwoAddress = 1 in {
572 let isCommutable = 1 in {
573 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
574 "addps {$src2, $dst|$dst, $src2}",
575 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
576 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
577 "addpd {$src2, $dst|$dst, $src2}",
578 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
579 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
580 "mulps {$src2, $dst|$dst, $src2}",
581 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
582 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
583 "mulpd {$src2, $dst|$dst, $src2}",
584 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
587 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
588 "addps {$src2, $dst|$dst, $src2}",
589 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
590 (load addr:$src2))))]>;
591 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
592 "addpd {$src2, $dst|$dst, $src2}",
593 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
594 (load addr:$src2))))]>;
595 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
596 "mulps {$src2, $dst|$dst, $src2}",
597 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
598 (load addr:$src2))))]>;
599 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
600 "mulpd {$src2, $dst|$dst, $src2}",
601 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
602 (load addr:$src2))))]>;
604 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
605 "divps {$src2, $dst|$dst, $src2}",
606 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
607 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
608 "divps {$src2, $dst|$dst, $src2}",
609 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
610 (load addr:$src2))))]>;
611 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
612 "divpd {$src2, $dst|$dst, $src2}",
613 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
614 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
615 "divpd {$src2, $dst|$dst, $src2}",
616 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
617 (load addr:$src2))))]>;
619 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
620 "subps {$src2, $dst|$dst, $src2}",
621 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
622 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
623 "subps {$src2, $dst|$dst, $src2}",
624 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
625 (load addr:$src2))))]>;
626 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
627 "subpd {$src2, $dst|$dst, $src2}",
628 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
629 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
630 "subpd {$src2, $dst|$dst, $src2}",
631 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
632 (load addr:$src2))))]>;
635 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
636 "sqrtps {$src, $dst|$dst, $src}",
637 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
638 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
639 "sqrtps {$src, $dst|$dst, $src}",
640 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
641 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
642 "sqrtpd {$src, $dst|$dst, $src}",
643 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
644 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
645 "sqrtpd {$src, $dst|$dst, $src}",
646 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
648 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
649 "rsqrtps {$src, $dst|$dst, $src}", []>;
650 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
651 "rsqrtps {$src, $dst|$dst, $src}", []>;
652 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
653 "rcpps {$src, $dst|$dst, $src}", []>;
654 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
655 "rcpps {$src, $dst|$dst, $src}", []>;
657 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
658 "maxps {$src, $dst|$dst, $src}", []>;
659 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
660 "maxps {$src, $dst|$dst, $src}", []>;
661 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
662 "maxpd {$src, $dst|$dst, $src}", []>;
663 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
664 "maxpd {$src, $dst|$dst, $src}", []>;
665 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
666 "minps {$src, $dst|$dst, $src}", []>;
667 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
668 "minps {$src, $dst|$dst, $src}", []>;
669 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
670 "minpd {$src, $dst|$dst, $src}", []>;
671 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
672 "minpd {$src, $dst|$dst, $src}", []>;
675 let isTwoAddress = 1 in {
676 let isCommutable = 1 in {
677 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
678 "andps {$src2, $dst|$dst, $src2}",
679 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
680 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
681 "andpd {$src2, $dst|$dst, $src2}",
682 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
683 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
684 "orps {$src2, $dst|$dst, $src2}",
685 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
686 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
687 "orpd {$src2, $dst|$dst, $src2}",
688 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
689 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
690 "xorps {$src2, $dst|$dst, $src2}",
691 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
692 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
693 "xorpd {$src2, $dst|$dst, $src2}",
694 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
696 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
697 "andps {$src2, $dst|$dst, $src2}",
698 [(set VR128:$dst, (v4i32 (and VR128:$src1,
699 (load addr:$src2))))]>;
700 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
701 "andpd {$src2, $dst|$dst, $src2}",
702 [(set VR128:$dst, (v2i64 (and VR128:$src1,
703 (load addr:$src2))))]>;
704 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
705 "orps {$src2, $dst|$dst, $src2}",
706 [(set VR128:$dst, (v4i32 (or VR128:$src1,
707 (load addr:$src2))))]>;
708 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
709 "orpd {$src2, $dst|$dst, $src2}",
710 [(set VR128:$dst, (v2i64 (or VR128:$src1,
711 (load addr:$src2))))]>;
712 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
713 "xorps {$src2, $dst|$dst, $src2}",
714 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
715 (load addr:$src2))))]>;
716 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
717 "xorpd {$src2, $dst|$dst, $src2}",
718 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
719 (load addr:$src2))))]>;
720 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
721 "andnps {$src2, $dst|$dst, $src2}",
722 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
724 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
725 "andnps {$src2, $dst|$dst, $src2}",
726 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
727 (load addr:$src2))))]>;
728 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
729 "andnpd {$src2, $dst|$dst, $src2}",
730 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
733 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
734 "andnpd {$src2, $dst|$dst, $src2}",
735 [(set VR128:$dst, (v2i64 (and VR128:$src1,
736 (load addr:$src2))))]>;
739 let isTwoAddress = 1 in {
740 def CMPPSrr : PSI<0xC2, MRMSrcReg,
741 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
742 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
743 def CMPPSrm : PSI<0xC2, MRMSrcMem,
744 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
745 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
746 def CMPPDrr : PDI<0xC2, MRMSrcReg,
747 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
748 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
749 def CMPPDrm : PDI<0xC2, MRMSrcMem,
750 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
751 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
754 // Shuffle and unpack instructions
755 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
756 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
757 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
758 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
759 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
760 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
761 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
762 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
763 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
764 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
765 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
766 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
768 let isTwoAddress = 1 in {
769 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
770 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
771 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
772 [(set VR128:$dst, (vector_shuffle
773 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
774 SHUFP_shuffle_mask:$src3))]>;
775 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
776 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
777 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
778 [(set VR128:$dst, (vector_shuffle
779 (v4f32 VR128:$src1), (load addr:$src2),
780 SHUFP_shuffle_mask:$src3))]>;
781 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
782 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
783 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
784 [(set VR128:$dst, (vector_shuffle
785 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
786 SHUFP_shuffle_mask:$src3))]>;
787 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
788 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
789 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
790 [(set VR128:$dst, (vector_shuffle
791 (v2f64 VR128:$src1), (load addr:$src2),
792 SHUFP_shuffle_mask:$src3))]>;
794 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
795 (ops VR128:$dst, VR128:$src1, VR128:$src2),
796 "unpckhps {$src2, $dst|$dst, $src2}",
798 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
799 UNPCKH_shuffle_mask)))]>;
800 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
801 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
802 "unpckhps {$src2, $dst|$dst, $src2}",
804 (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
805 UNPCKH_shuffle_mask)))]>;
806 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
807 (ops VR128:$dst, VR128:$src1, VR128:$src2),
808 "unpckhpd {$src2, $dst|$dst, $src2}",
810 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
811 UNPCKH_shuffle_mask)))]>;
812 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
813 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
814 "unpckhpd {$src2, $dst|$dst, $src2}",
816 (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
817 UNPCKH_shuffle_mask)))]>;
819 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
820 (ops VR128:$dst, VR128:$src1, VR128:$src2),
821 "unpcklps {$src2, $dst|$dst, $src2}",
823 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
824 UNPCKL_shuffle_mask)))]>;
825 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
826 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
827 "unpcklps {$src2, $dst|$dst, $src2}",
829 (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
830 UNPCKL_shuffle_mask)))]>;
831 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
832 (ops VR128:$dst, VR128:$src1, VR128:$src2),
833 "unpcklpd {$src2, $dst|$dst, $src2}",
835 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
836 UNPCKL_shuffle_mask)))]>;
837 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
838 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
839 "unpcklpd {$src2, $dst|$dst, $src2}",
841 (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
842 UNPCKL_shuffle_mask)))]>;
845 //===----------------------------------------------------------------------===//
846 // SSE integer instructions
847 //===----------------------------------------------------------------------===//
850 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
851 "movd {$src, $dst|$dst, $src}",
853 (v4i32 (scalar_to_vector R32:$src)))]>;
854 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
855 "movd {$src, $dst|$dst, $src}",
857 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
859 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
860 "movd {$src, $dst|$dst, $src}", []>;
862 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
863 "movdqa {$src, $dst|$dst, $src}", []>;
864 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
865 "movdqa {$src, $dst|$dst, $src}",
866 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
867 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
868 "movdqa {$src, $dst|$dst, $src}",
869 [(store (v4i32 VR128:$src), addr:$dst)]>;
871 // SSE2 instructions with XS prefix
872 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
873 "movq {$src, $dst|$dst, $src}",
875 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
877 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
878 "movq {$src, $dst|$dst, $src}", []>, XS,
880 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
881 "movq {$src, $dst|$dst, $src}", []>;
883 // 128-bit Integer Arithmetic
884 let isTwoAddress = 1 in {
885 let isCommutable = 1 in {
886 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
887 "paddb {$src2, $dst|$dst, $src2}",
888 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
889 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
890 "paddw {$src2, $dst|$dst, $src2}",
891 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
892 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
893 "paddd {$src2, $dst|$dst, $src2}",
894 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
896 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
897 "paddb {$src2, $dst|$dst, $src2}",
898 [(set VR128:$dst, (v16i8 (add VR128:$src1,
899 (load addr:$src2))))]>;
900 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
901 "paddw {$src2, $dst|$dst, $src2}",
902 [(set VR128:$dst, (v8i16 (add VR128:$src1,
903 (load addr:$src2))))]>;
904 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
905 "paddd {$src2, $dst|$dst, $src2}",
906 [(set VR128:$dst, (v4i32 (add VR128:$src1,
907 (load addr:$src2))))]>;
909 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
910 "psubb {$src2, $dst|$dst, $src2}",
911 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
912 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
913 "psubw {$src2, $dst|$dst, $src2}",
914 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
915 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
916 "psubd {$src2, $dst|$dst, $src2}",
917 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
919 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
920 "psubb {$src2, $dst|$dst, $src2}",
921 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
922 (load addr:$src2))))]>;
923 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
924 "psubw {$src2, $dst|$dst, $src2}",
925 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
926 (load addr:$src2))))]>;
927 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
928 "psubd {$src2, $dst|$dst, $src2}",
929 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
930 (load addr:$src2))))]>;
932 // Unpack and interleave
933 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
934 (ops VR128:$dst, VR128:$src1, VR128:$src2),
935 "punpcklbw {$src2, $dst|$dst, $src2}",
937 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
938 UNPCKL_shuffle_mask)))]>;
939 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
940 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
941 "punpcklbw {$src2, $dst|$dst, $src2}",
943 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
944 UNPCKL_shuffle_mask)))]>;
945 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
946 (ops VR128:$dst, VR128:$src1, VR128:$src2),
947 "punpcklwd {$src2, $dst|$dst, $src2}",
949 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
950 UNPCKL_shuffle_mask)))]>;
951 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
952 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
953 "punpcklwd {$src2, $dst|$dst, $src2}",
955 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
956 UNPCKL_shuffle_mask)))]>;
957 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
958 (ops VR128:$dst, VR128:$src1, VR128:$src2),
959 "punpckldq {$src2, $dst|$dst, $src2}",
961 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
962 UNPCKL_shuffle_mask)))]>;
963 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
964 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
965 "punpckldq {$src2, $dst|$dst, $src2}",
967 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
968 UNPCKL_shuffle_mask)))]>;
969 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
970 (ops VR128:$dst, VR128:$src1, VR128:$src2),
971 "punpcklqdq {$src2, $dst|$dst, $src2}",
973 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
974 UNPCKL_shuffle_mask)))]>;
975 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
976 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
977 "punpcklqdq {$src2, $dst|$dst, $src2}",
979 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
980 UNPCKL_shuffle_mask)))]>;
982 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
983 (ops VR128:$dst, VR128:$src1, VR128:$src2),
984 "punpckhbw {$src2, $dst|$dst, $src2}",
986 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
987 UNPCKH_shuffle_mask)))]>;
988 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
989 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
990 "punpckhbw {$src2, $dst|$dst, $src2}",
992 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
993 UNPCKH_shuffle_mask)))]>;
994 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
995 (ops VR128:$dst, VR128:$src1, VR128:$src2),
996 "punpckhwd {$src2, $dst|$dst, $src2}",
998 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
999 UNPCKH_shuffle_mask)))]>;
1000 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1001 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1002 "punpckhwd {$src2, $dst|$dst, $src2}",
1004 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1005 UNPCKH_shuffle_mask)))]>;
1006 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1007 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1008 "punpckhdq {$src2, $dst|$dst, $src2}",
1010 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1011 UNPCKH_shuffle_mask)))]>;
1012 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1013 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1014 "punpckhdq {$src2, $dst|$dst, $src2}",
1016 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1017 UNPCKH_shuffle_mask)))]>;
1018 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1019 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1020 "punpckhdq {$src2, $dst|$dst, $src2}",
1022 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1023 UNPCKH_shuffle_mask)))]>;
1024 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1025 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1026 "punpckhqdq {$src2, $dst|$dst, $src2}",
1028 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1029 UNPCKH_shuffle_mask)))]>;
1032 //===----------------------------------------------------------------------===//
1033 // Miscellaneous Instructions
1034 //===----------------------------------------------------------------------===//
1036 // Prefetching loads
1037 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1038 "prefetcht0 $src", []>, TB,
1039 Requires<[HasSSE1]>;
1040 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1041 "prefetcht0 $src", []>, TB,
1042 Requires<[HasSSE1]>;
1043 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1044 "prefetcht0 $src", []>, TB,
1045 Requires<[HasSSE1]>;
1046 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1047 "prefetcht0 $src", []>, TB,
1048 Requires<[HasSSE1]>;
1050 // Non-temporal stores
1051 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1052 "movntq {$src, $dst|$dst, $src}", []>, TB,
1053 Requires<[HasSSE1]>;
1054 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1055 "movntps {$src, $dst|$dst, $src}", []>, TB,
1056 Requires<[HasSSE1]>;
1057 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1058 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1059 Requires<[HasSSE1]>;
1062 def SFENCE : I<0xAE, MRM7m, (ops),
1063 "sfence", []>, TB, Requires<[HasSSE1]>;
1065 // Load MXCSR register
1066 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1067 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1069 //===----------------------------------------------------------------------===//
1070 // Alias Instructions
1071 //===----------------------------------------------------------------------===//
1073 // Alias instructions that map zero vector to pxor / xorp* for sse.
1074 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1075 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1077 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1078 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1080 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1081 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1083 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1085 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1086 "pcmpeqd $dst, $dst",
1087 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1089 // Scalar to 128-bit vector with zero extension.
1090 // Three operand (but two address) aliases.
1091 let isTwoAddress = 1 in {
1092 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1093 "movss {$src2, $dst|$dst, $src2}", []>;
1094 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1095 "movsd {$src2, $dst|$dst, $src2}", []>;
1096 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1097 "movd {$src2, $dst|$dst, $src2}", []>;
1098 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1099 "movq {$src2, $dst|$dst, $src2}", []>;
1102 // Loading from memory automatically zeroing upper bits.
1103 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1104 "movss {$src, $dst|$dst, $src}",
1106 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1107 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1108 "movsd {$src, $dst|$dst, $src}",
1110 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1111 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1112 "movd {$src, $dst|$dst, $src}",
1114 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1116 //===----------------------------------------------------------------------===//
1117 // Non-Instruction Patterns
1118 //===----------------------------------------------------------------------===//
1120 // 128-bit vector undef's.
1121 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1122 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1123 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1124 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1125 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1127 // 128-bit vector all zero's.
1128 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1129 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1130 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1132 // 128-bit vector all one's.
1133 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1134 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1135 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1136 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1137 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1139 // Load 128-bit integer vector values.
1140 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1141 Requires<[HasSSE2]>;
1142 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1143 Requires<[HasSSE2]>;
1144 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1145 Requires<[HasSSE2]>;
1146 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1147 Requires<[HasSSE2]>;
1149 // Store 128-bit integer vector values.
1150 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1151 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1152 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1153 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1154 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1155 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1156 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1157 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1159 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1161 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1162 Requires<[HasSSE2]>;
1163 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1164 Requires<[HasSSE2]>;
1167 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1168 Requires<[HasSSE2]>;
1169 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1170 Requires<[HasSSE2]>;
1172 // Zeroing a VR128 then do a MOVS* to the lower bits.
1173 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1174 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1175 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1176 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1177 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1178 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1179 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1180 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1181 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1182 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1183 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1184 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1186 // Splat v4f32 / v4i32
1187 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1188 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1189 Requires<[HasSSE1]>;
1190 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1191 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1192 Requires<[HasSSE2]>;
1194 // Splat v2f64 / v2i64
1195 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1196 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1197 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1198 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1200 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
1201 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1202 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1203 Requires<[HasSSE2]>;
1204 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1205 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1206 Requires<[HasSSE2]>;
1209 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1210 UNPCKL_shuffle_mask:$sm),
1211 (v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1212 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1213 MOVHLPS_shuffle_mask:$sm),
1214 (v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1215 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (load addr:$src2),
1216 UNPCKL_shuffle_mask:$sm),
1217 (v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
1218 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (load addr:$src2),
1219 UNPCKH_shuffle_mask:$sm),
1220 (v2i64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;