1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86s2vec : SDNode<"X86ISD::S2VEC",
27 SDTypeProfile<1, 1, []>, []>;
28 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
29 SDTypeProfile<1, 1, []>, []>;
31 def SDTUnpckl : SDTypeProfile<1, 2,
32 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
33 def X86unpckl : SDNode<"X86ISD::UNPCKL", SDTUnpckl,
36 //===----------------------------------------------------------------------===//
37 // SSE pattern fragments
38 //===----------------------------------------------------------------------===//
40 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
41 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
43 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
44 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
45 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
46 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
47 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
48 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
50 def fp32imm0 : PatLeaf<(f32 fpimm), [{
51 return N->isExactlyValue(+0.0);
54 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
56 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
57 return getI8Imm(X86::getShuffleSHUFImmediate(N));
60 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
61 return X86::isSplatMask(N);
62 }], SHUFFLE_get_shuf_imm>;
64 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
65 return X86::isSplatMask(N);
68 def MOVLHPSorUNPCKLPD_shuffle_mask : PatLeaf<(build_vector), [{
69 return X86::isMOVLHPSorUNPCKLPDMask(N);
70 }], SHUFFLE_get_shuf_imm>;
72 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
73 return X86::isMOVHLPSMask(N);
74 }], SHUFFLE_get_shuf_imm>;
76 def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{
77 return X86::isUNPCKHPDMask(N);
78 }], SHUFFLE_get_shuf_imm>;
80 // Only use PSHUF if it is not a splat.
81 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
82 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
83 }], SHUFFLE_get_shuf_imm>;
85 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
86 return X86::isSHUFPMask(N);
87 }], SHUFFLE_get_shuf_imm>;
89 //===----------------------------------------------------------------------===//
90 // SSE scalar FP Instructions
91 //===----------------------------------------------------------------------===//
93 // Instruction templates
94 // SSI - SSE1 instructions with XS prefix.
95 // SDI - SSE2 instructions with XD prefix.
96 // PSI - SSE1 instructions with TB prefix.
97 // PDI - SSE2 instructions with TB and OpSize prefixes.
98 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
99 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
100 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
101 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
102 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
103 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
104 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
105 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
106 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
107 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
108 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
109 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
110 let Pattern = pattern;
112 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
113 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
114 let Pattern = pattern;
117 // Some 'special' instructions
118 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
119 "#IMPLICIT_DEF $dst",
120 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
121 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
122 "#IMPLICIT_DEF $dst",
123 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
125 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
126 // scheduler into a branch sequence.
127 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
128 def CMOV_FR32 : I<0, Pseudo,
129 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
130 "#CMOV_FR32 PSEUDO!",
131 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
132 def CMOV_FR64 : I<0, Pseudo,
133 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
134 "#CMOV_FR64 PSEUDO!",
135 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
139 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
140 "movss {$src, $dst|$dst, $src}", []>;
141 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
142 "movss {$src, $dst|$dst, $src}",
143 [(set FR32:$dst, (loadf32 addr:$src))]>;
144 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
145 "movsd {$src, $dst|$dst, $src}", []>;
146 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
147 "movsd {$src, $dst|$dst, $src}",
148 [(set FR64:$dst, (loadf64 addr:$src))]>;
150 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
151 "movss {$src, $dst|$dst, $src}",
152 [(store FR32:$src, addr:$dst)]>;
153 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
154 "movsd {$src, $dst|$dst, $src}",
155 [(store FR64:$src, addr:$dst)]>;
157 // FR32 / FR64 to 128-bit vector conversion.
158 def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
159 "movss {$src, $dst|$dst, $src}",
161 (v4f32 (scalar_to_vector FR32:$src)))]>;
162 def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
163 "movss {$src, $dst|$dst, $src}",
165 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
166 def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
167 "movsd {$src, $dst|$dst, $src}",
169 (v2f64 (scalar_to_vector FR64:$src)))]>;
170 def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
171 "movsd {$src, $dst|$dst, $src}",
173 (v4f32 (scalar_to_vector (loadf64 addr:$src))))]>;
176 // Conversion instructions
177 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
178 "cvtss2si {$src, $dst|$dst, $src}", []>;
179 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
180 "cvtss2si {$src, $dst|$dst, $src}", []>;
182 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
183 "cvttss2si {$src, $dst|$dst, $src}",
184 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
185 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
186 "cvttss2si {$src, $dst|$dst, $src}",
187 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
188 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
189 "cvttsd2si {$src, $dst|$dst, $src}",
190 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
191 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
192 "cvttsd2si {$src, $dst|$dst, $src}",
193 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
194 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
195 "cvtsd2ss {$src, $dst|$dst, $src}",
196 [(set FR32:$dst, (fround FR64:$src))]>;
197 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
198 "cvtsd2ss {$src, $dst|$dst, $src}",
199 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
200 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
201 "cvtsi2ss {$src, $dst|$dst, $src}",
202 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
203 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
204 "cvtsi2ss {$src, $dst|$dst, $src}",
205 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
206 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
207 "cvtsi2sd {$src, $dst|$dst, $src}",
208 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
209 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
210 "cvtsi2sd {$src, $dst|$dst, $src}",
211 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
212 // SSE2 instructions with XS prefix
213 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
214 "cvtss2sd {$src, $dst|$dst, $src}",
215 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
217 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
218 "cvtss2sd {$src, $dst|$dst, $src}",
219 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
222 // Arithmetic instructions
223 let isTwoAddress = 1 in {
224 let isCommutable = 1 in {
225 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
226 "addss {$src2, $dst|$dst, $src2}",
227 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
228 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
229 "addsd {$src2, $dst|$dst, $src2}",
230 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
231 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
232 "mulss {$src2, $dst|$dst, $src2}",
233 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
234 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
235 "mulsd {$src2, $dst|$dst, $src2}",
236 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
239 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
240 "addss {$src2, $dst|$dst, $src2}",
241 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
242 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
243 "addsd {$src2, $dst|$dst, $src2}",
244 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
245 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
246 "mulss {$src2, $dst|$dst, $src2}",
247 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
248 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
249 "mulsd {$src2, $dst|$dst, $src2}",
250 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
252 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
253 "divss {$src2, $dst|$dst, $src2}",
254 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
255 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
256 "divss {$src2, $dst|$dst, $src2}",
257 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
258 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
259 "divsd {$src2, $dst|$dst, $src2}",
260 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
261 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
262 "divsd {$src2, $dst|$dst, $src2}",
263 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
265 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
266 "subss {$src2, $dst|$dst, $src2}",
267 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
268 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
269 "subss {$src2, $dst|$dst, $src2}",
270 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
271 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
272 "subsd {$src2, $dst|$dst, $src2}",
273 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
274 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
275 "subsd {$src2, $dst|$dst, $src2}",
276 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
279 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
280 "sqrtss {$src, $dst|$dst, $src}",
281 [(set FR32:$dst, (fsqrt FR32:$src))]>;
282 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
283 "sqrtss {$src, $dst|$dst, $src}",
284 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
285 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
286 "sqrtsd {$src, $dst|$dst, $src}",
287 [(set FR64:$dst, (fsqrt FR64:$src))]>;
288 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
289 "sqrtsd {$src, $dst|$dst, $src}",
290 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
292 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
293 "rsqrtss {$src, $dst|$dst, $src}", []>;
294 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
295 "rsqrtss {$src, $dst|$dst, $src}", []>;
296 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
297 "rcpss {$src, $dst|$dst, $src}", []>;
298 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
299 "rcpss {$src, $dst|$dst, $src}", []>;
301 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
302 "maxss {$src, $dst|$dst, $src}", []>;
303 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
304 "maxss {$src, $dst|$dst, $src}", []>;
305 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
306 "maxsd {$src, $dst|$dst, $src}", []>;
307 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
308 "maxsd {$src, $dst|$dst, $src}", []>;
309 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
310 "minss {$src, $dst|$dst, $src}", []>;
311 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
312 "minss {$src, $dst|$dst, $src}", []>;
313 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
314 "minsd {$src, $dst|$dst, $src}", []>;
315 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
316 "minsd {$src, $dst|$dst, $src}", []>;
318 // Comparison instructions
319 let isTwoAddress = 1 in {
320 def CMPSSrr : SSI<0xC2, MRMSrcReg,
321 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
322 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
323 def CMPSSrm : SSI<0xC2, MRMSrcMem,
324 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
325 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
326 def CMPSDrr : SDI<0xC2, MRMSrcReg,
327 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
328 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
329 def CMPSDrm : SDI<0xC2, MRMSrcMem,
330 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
331 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
334 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
335 "ucomiss {$src2, $src1|$src1, $src2}",
336 [(X86cmp FR32:$src1, FR32:$src2)]>;
337 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
338 "ucomiss {$src2, $src1|$src1, $src2}",
339 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
340 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
341 "ucomisd {$src2, $src1|$src1, $src2}",
342 [(X86cmp FR64:$src1, FR64:$src2)]>;
343 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
344 "ucomisd {$src2, $src1|$src1, $src2}",
345 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
347 // Aliases of packed instructions for scalar use. These all have names that
350 // Alias instructions that map fld0 to pxor for sse.
351 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
352 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
353 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
354 Requires<[HasSSE1]>, TB, OpSize;
355 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
356 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
357 Requires<[HasSSE2]>, TB, OpSize;
359 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
360 // Upper bits are disregarded.
361 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
362 "movaps {$src, $dst|$dst, $src}", []>;
363 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
364 "movapd {$src, $dst|$dst, $src}", []>;
366 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
367 // Upper bits are disregarded.
368 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
369 "movaps {$src, $dst|$dst, $src}",
370 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
371 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
372 "movapd {$src, $dst|$dst, $src}",
373 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
375 // Alias bitwise logical operations using SSE logical ops on packed FP values.
376 let isTwoAddress = 1 in {
377 let isCommutable = 1 in {
378 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
379 "andps {$src2, $dst|$dst, $src2}",
380 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
381 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
382 "andpd {$src2, $dst|$dst, $src2}",
383 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
384 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
385 "orps {$src2, $dst|$dst, $src2}", []>;
386 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
387 "orpd {$src2, $dst|$dst, $src2}", []>;
388 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
389 "xorps {$src2, $dst|$dst, $src2}",
390 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
391 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
392 "xorpd {$src2, $dst|$dst, $src2}",
393 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
395 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
396 "andps {$src2, $dst|$dst, $src2}",
397 [(set FR32:$dst, (X86fand FR32:$src1,
398 (X86loadpf32 addr:$src2)))]>;
399 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
400 "andpd {$src2, $dst|$dst, $src2}",
401 [(set FR64:$dst, (X86fand FR64:$src1,
402 (X86loadpf64 addr:$src2)))]>;
403 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
404 "orps {$src2, $dst|$dst, $src2}", []>;
405 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
406 "orpd {$src2, $dst|$dst, $src2}", []>;
407 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
408 "xorps {$src2, $dst|$dst, $src2}",
409 [(set FR32:$dst, (X86fxor FR32:$src1,
410 (X86loadpf32 addr:$src2)))]>;
411 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
412 "xorpd {$src2, $dst|$dst, $src2}",
413 [(set FR64:$dst, (X86fxor FR64:$src1,
414 (X86loadpf64 addr:$src2)))]>;
416 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
417 "andnps {$src2, $dst|$dst, $src2}", []>;
418 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
419 "andnps {$src2, $dst|$dst, $src2}", []>;
420 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
421 "andnpd {$src2, $dst|$dst, $src2}", []>;
422 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
423 "andnpd {$src2, $dst|$dst, $src2}", []>;
426 //===----------------------------------------------------------------------===//
427 // SSE packed FP Instructions
428 //===----------------------------------------------------------------------===//
430 // Some 'special' instructions
431 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
432 "#IMPLICIT_DEF $dst",
433 [(set VR128:$dst, (v4f32 (undef)))]>,
437 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
438 "movaps {$src, $dst|$dst, $src}", []>;
439 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
440 "movaps {$src, $dst|$dst, $src}",
441 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
442 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
443 "movapd {$src, $dst|$dst, $src}", []>;
444 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
445 "movapd {$src, $dst|$dst, $src}",
446 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
448 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
449 "movaps {$src, $dst|$dst, $src}",
450 [(store (v4f32 VR128:$src), addr:$dst)]>;
451 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
452 "movapd {$src, $dst|$dst, $src}",
453 [(store (v2f64 VR128:$src), addr:$dst)]>;
455 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
456 "movups {$src, $dst|$dst, $src}", []>;
457 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
458 "movups {$src, $dst|$dst, $src}", []>;
459 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
460 "movups {$src, $dst|$dst, $src}", []>;
461 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
462 "movupd {$src, $dst|$dst, $src}", []>;
463 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
464 "movupd {$src, $dst|$dst, $src}", []>;
465 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
466 "movupd {$src, $dst|$dst, $src}", []>;
468 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
469 "movlps {$src, $dst|$dst, $src}", []>;
470 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
471 "movlps {$src, $dst|$dst, $src}", []>;
472 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
473 "movlpd {$src, $dst|$dst, $src}", []>;
474 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
475 "movlpd {$src, $dst|$dst, $src}", []>;
477 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
478 "movhps {$src, $dst|$dst, $src}", []>;
479 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
480 "movhps {$src, $dst|$dst, $src}", []>;
481 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
482 "movhpd {$src, $dst|$dst, $src}", []>;
483 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
484 "movhpd {$src, $dst|$dst, $src}", []>;
486 let isTwoAddress = 1 in {
487 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
488 "movlhps {$src2, $dst|$dst, $src2}", []>;
490 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
491 "movlhps {$src2, $dst|$dst, $src2}", []>;
494 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
495 "movmskps {$src, $dst|$dst, $src}",
496 [(set R32:$dst, (int_x86_sse_movmskps VR128:$src))]>;
497 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
498 "movmskpd {$src, $dst|$dst, $src}",
499 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
501 // Conversion instructions
502 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
503 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
504 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
505 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
506 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
507 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
508 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
509 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
511 // SSE2 instructions without OpSize prefix
512 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
513 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
515 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
516 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
519 // SSE2 instructions with XS prefix
520 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
521 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
522 XS, Requires<[HasSSE2]>;
523 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
524 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
525 XS, Requires<[HasSSE2]>;
527 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
528 "cvtps2pi {$src, $dst|$dst, $src}", []>;
529 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
530 "cvtps2pi {$src, $dst|$dst, $src}", []>;
531 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
532 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
533 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
534 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
536 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
537 "cvtps2dq {$src, $dst|$dst, $src}", []>;
538 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
539 "cvtps2dq {$src, $dst|$dst, $src}", []>;
540 // SSE2 packed instructions with XD prefix
541 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
542 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
543 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
544 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
546 // SSE2 instructions without OpSize prefix
547 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
548 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
550 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
551 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
554 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
555 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
556 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
557 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
560 let isTwoAddress = 1 in {
561 let isCommutable = 1 in {
562 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
563 "addps {$src2, $dst|$dst, $src2}",
564 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
565 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
566 "addpd {$src2, $dst|$dst, $src2}",
567 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
568 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
569 "mulps {$src2, $dst|$dst, $src2}",
570 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
571 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
572 "mulpd {$src2, $dst|$dst, $src2}",
573 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
576 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
577 "addps {$src2, $dst|$dst, $src2}",
578 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
579 (load addr:$src2))))]>;
580 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
581 "addpd {$src2, $dst|$dst, $src2}",
582 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
583 (load addr:$src2))))]>;
584 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
585 "mulps {$src2, $dst|$dst, $src2}",
586 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
587 (load addr:$src2))))]>;
588 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
589 "mulpd {$src2, $dst|$dst, $src2}",
590 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
591 (load addr:$src2))))]>;
593 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
594 "divps {$src2, $dst|$dst, $src2}",
595 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
596 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
597 "divps {$src2, $dst|$dst, $src2}",
598 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
599 (load addr:$src2))))]>;
600 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
601 "divpd {$src2, $dst|$dst, $src2}",
602 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
603 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
604 "divpd {$src2, $dst|$dst, $src2}",
605 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
606 (load addr:$src2))))]>;
608 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
609 "subps {$src2, $dst|$dst, $src2}",
610 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
611 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
612 "subps {$src2, $dst|$dst, $src2}",
613 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
614 (load addr:$src2))))]>;
615 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
616 "subpd {$src2, $dst|$dst, $src2}",
617 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
618 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
619 "subpd {$src2, $dst|$dst, $src2}",
620 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
621 (load addr:$src2))))]>;
624 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
625 "sqrtps {$src, $dst|$dst, $src}",
626 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
627 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
628 "sqrtps {$src, $dst|$dst, $src}",
629 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
630 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
631 "sqrtpd {$src, $dst|$dst, $src}",
632 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
633 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
634 "sqrtpd {$src, $dst|$dst, $src}",
635 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
637 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
638 "rsqrtps {$src, $dst|$dst, $src}", []>;
639 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
640 "rsqrtps {$src, $dst|$dst, $src}", []>;
641 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
642 "rcpps {$src, $dst|$dst, $src}", []>;
643 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
644 "rcpps {$src, $dst|$dst, $src}", []>;
646 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
647 "maxps {$src, $dst|$dst, $src}", []>;
648 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
649 "maxps {$src, $dst|$dst, $src}", []>;
650 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
651 "maxpd {$src, $dst|$dst, $src}", []>;
652 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
653 "maxpd {$src, $dst|$dst, $src}", []>;
654 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
655 "minps {$src, $dst|$dst, $src}", []>;
656 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
657 "minps {$src, $dst|$dst, $src}", []>;
658 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
659 "minpd {$src, $dst|$dst, $src}", []>;
660 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
661 "minpd {$src, $dst|$dst, $src}", []>;
664 let isTwoAddress = 1 in {
665 let isCommutable = 1 in {
666 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
667 "andps {$src2, $dst|$dst, $src2}",
668 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
669 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
670 "andpd {$src2, $dst|$dst, $src2}",
671 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
672 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
673 "orps {$src2, $dst|$dst, $src2}",
674 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
675 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
676 "orpd {$src2, $dst|$dst, $src2}",
677 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
678 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
679 "xorps {$src2, $dst|$dst, $src2}",
680 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
681 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
682 "xorpd {$src2, $dst|$dst, $src2}",
683 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
685 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
686 "andps {$src2, $dst|$dst, $src2}",
687 [(set VR128:$dst, (v4i32 (and VR128:$src1,
688 (load addr:$src2))))]>;
689 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
690 "andpd {$src2, $dst|$dst, $src2}",
691 [(set VR128:$dst, (v2i64 (and VR128:$src1,
692 (load addr:$src2))))]>;
693 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
694 "orps {$src2, $dst|$dst, $src2}",
695 [(set VR128:$dst, (v4i32 (or VR128:$src1,
696 (load addr:$src2))))]>;
697 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
698 "orpd {$src2, $dst|$dst, $src2}",
699 [(set VR128:$dst, (v2i64 (or VR128:$src1,
700 (load addr:$src2))))]>;
701 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
702 "xorps {$src2, $dst|$dst, $src2}",
703 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
704 (load addr:$src2))))]>;
705 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
706 "xorpd {$src2, $dst|$dst, $src2}",
707 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
708 (load addr:$src2))))]>;
709 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
710 "andnps {$src2, $dst|$dst, $src2}",
711 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
713 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
714 "andnps {$src2, $dst|$dst, $src2}",
715 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
716 (load addr:$src2))))]>;
717 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
718 "andnpd {$src2, $dst|$dst, $src2}",
719 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
722 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
723 "andnpd {$src2, $dst|$dst, $src2}",
724 [(set VR128:$dst, (v2i64 (and VR128:$src1,
725 (load addr:$src2))))]>;
728 let isTwoAddress = 1 in {
729 def CMPPSrr : PSI<0xC2, MRMSrcReg,
730 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
731 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
732 def CMPPSrm : PSI<0xC2, MRMSrcMem,
733 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
734 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
735 def CMPPDrr : PDI<0xC2, MRMSrcReg,
736 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
737 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
738 def CMPPDrm : PDI<0xC2, MRMSrcMem,
739 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
740 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
743 // Shuffle and unpack instructions
744 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
745 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
746 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
747 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
748 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
749 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
750 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
751 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
752 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
753 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
754 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
755 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
757 let isTwoAddress = 1 in {
758 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
759 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
760 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
761 [(set VR128:$dst, (vector_shuffle
762 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
763 SHUFP_shuffle_mask:$src3))]>;
764 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
765 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
766 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
767 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
768 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
769 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
770 [(set VR128:$dst, (vector_shuffle
771 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
772 SHUFP_shuffle_mask:$src3))]>;
773 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
774 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
775 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
777 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
778 (ops VR128:$dst, VR128:$src1, VR128:$src2),
779 "unpckhps {$src2, $dst|$dst, $src2}", []>;
780 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
781 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
782 "unpckhps {$src2, $dst|$dst, $src2}", []>;
783 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
784 (ops VR128:$dst, VR128:$src1, VR128:$src2),
785 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
786 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
787 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
788 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
789 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
790 (ops VR128:$dst, VR128:$src1, VR128:$src2),
791 "unpcklps {$src2, $dst|$dst, $src2}",
792 [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
794 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
795 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
796 "unpcklps {$src2, $dst|$dst, $src2}",
797 [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
798 (load addr:$src2))))]>;
799 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
800 (ops VR128:$dst, VR128:$src1, VR128:$src2),
801 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
802 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
803 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
804 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
807 //===----------------------------------------------------------------------===//
808 // SSE integer instructions
809 //===----------------------------------------------------------------------===//
812 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
813 "movd {$src, $dst|$dst, $src}",
815 (v4i32 (scalar_to_vector R32:$src)))]>;
816 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
817 "movd {$src, $dst|$dst, $src}",
819 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
821 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
822 "movd {$src, $dst|$dst, $src}", []>;
824 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
825 "movdqa {$src, $dst|$dst, $src}", []>;
826 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
827 "movdqa {$src, $dst|$dst, $src}",
828 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
829 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
830 "movdqa {$src, $dst|$dst, $src}",
831 [(store (v4i32 VR128:$src), addr:$dst)]>;
833 // SSE2 instructions with XS prefix
834 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
835 "movq {$src, $dst|$dst, $src}",
837 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
839 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
840 "movq {$src, $dst|$dst, $src}", []>, XS,
842 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
843 "movq {$src, $dst|$dst, $src}", []>;
845 // 128-bit Integer Arithmetic
846 let isTwoAddress = 1 in {
847 let isCommutable = 1 in {
848 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
849 "paddb {$src2, $dst|$dst, $src2}",
850 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
851 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
852 "paddw {$src2, $dst|$dst, $src2}",
853 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
854 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
855 "paddd {$src2, $dst|$dst, $src2}",
856 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
858 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
859 "paddb {$src2, $dst|$dst, $src2}",
860 [(set VR128:$dst, (v16i8 (add VR128:$src1,
861 (load addr:$src2))))]>;
862 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
863 "paddw {$src2, $dst|$dst, $src2}",
864 [(set VR128:$dst, (v8i16 (add VR128:$src1,
865 (load addr:$src2))))]>;
866 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
867 "paddd {$src2, $dst|$dst, $src2}",
868 [(set VR128:$dst, (v4i32 (add VR128:$src1,
869 (load addr:$src2))))]>;
871 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
872 "psubb {$src2, $dst|$dst, $src2}",
873 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
874 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
875 "psubw {$src2, $dst|$dst, $src2}",
876 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
877 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
878 "psubd {$src2, $dst|$dst, $src2}",
879 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
881 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
882 "psubb {$src2, $dst|$dst, $src2}",
883 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
884 (load addr:$src2))))]>;
885 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
886 "psubw {$src2, $dst|$dst, $src2}",
887 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
888 (load addr:$src2))))]>;
889 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
890 "psubd {$src2, $dst|$dst, $src2}",
891 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
892 (load addr:$src2))))]>;
894 // Unpack and interleave
895 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
896 (ops VR128:$dst, VR128:$src1, VR128:$src2),
897 "punpcklbw {$src2, $dst|$dst, $src2}",
898 [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
900 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
901 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
902 "punpcklbw {$src2, $dst|$dst, $src2}",
903 [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
904 (load addr:$src2))))]>;
905 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
906 (ops VR128:$dst, VR128:$src1, VR128:$src2),
907 "punpcklwd {$src2, $dst|$dst, $src2}",
908 [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
910 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
911 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
912 "punpcklwd {$src2, $dst|$dst, $src2}",
913 [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
914 (load addr:$src2))))]>;
915 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
916 (ops VR128:$dst, VR128:$src1, VR128:$src2),
917 "punpckldq {$src2, $dst|$dst, $src2}",
918 [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
920 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
921 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
922 "punpckldq {$src2, $dst|$dst, $src2}",
923 [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
924 (load addr:$src2))))]>;
925 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
926 (ops VR128:$dst, VR128:$src1, VR128:$src2),
927 "punpcklqdq {$src2, $dst|$dst, $src2}", []>;
928 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
929 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
930 "punpcklqdq {$src2, $dst|$dst, $src2}", []>;
932 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
933 (ops VR128:$dst, VR128:$src1, VR128:$src2),
934 "punpckhbw {$src2, $dst|$dst, $src2}", []>;
935 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
936 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
937 "punpckhbw {$src2, $dst|$dst, $src2}", []>;
938 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
939 (ops VR128:$dst, VR128:$src1, VR128:$src2),
940 "punpckhwd {$src2, $dst|$dst, $src2}", []>;
941 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
942 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
943 "punpckhwd {$src2, $dst|$dst, $src2}", []>;
944 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
945 (ops VR128:$dst, VR128:$src1, VR128:$src2),
946 "punpckhdq {$src2, $dst|$dst, $src2}", []>;
947 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
948 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
949 "punpckhdq {$src2, $dst|$dst, $src2}", []>;
950 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
951 (ops VR128:$dst, VR128:$src1, VR128:$src2),
952 "punpckhdq {$src2, $dst|$dst, $src2}", []>;
953 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
954 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
955 "punpckhqdq {$src2, $dst|$dst, $src2}", []>;
958 //===----------------------------------------------------------------------===//
959 // Miscellaneous Instructions
960 //===----------------------------------------------------------------------===//
963 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
964 "prefetcht0 $src", []>, TB,
966 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
967 "prefetcht0 $src", []>, TB,
969 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
970 "prefetcht0 $src", []>, TB,
972 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
973 "prefetcht0 $src", []>, TB,
976 // Non-temporal stores
977 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
978 "movntq {$src, $dst|$dst, $src}", []>, TB,
980 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
981 "movntps {$src, $dst|$dst, $src}", []>, TB,
983 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
984 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
988 def SFENCE : I<0xAE, MRM7m, (ops),
989 "sfence", []>, TB, Requires<[HasSSE1]>;
991 // Load MXCSR register
992 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
993 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
995 //===----------------------------------------------------------------------===//
996 // Alias Instructions
997 //===----------------------------------------------------------------------===//
999 // Alias instructions that map zero vector to pxor / xorp* for sse.
1000 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1001 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1003 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1004 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1006 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1007 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1009 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1011 // Scalar to 128-bit vector with zero extension.
1012 // Three operand (but two address) aliases.
1013 let isTwoAddress = 1 in {
1014 def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1015 "movss {$src2, $dst|$dst, $src2}", []>;
1016 def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1017 "movsd {$src2, $dst|$dst, $src2}", []>;
1018 def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1019 "movd {$src2, $dst|$dst, $src2}", []>;
1020 def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
1021 "movq {$src2, $dst|$dst, $src2}", []>;
1024 // Loading from memory automatically zeroing upper bits.
1025 def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1026 "movss {$src, $dst|$dst, $src}",
1028 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1029 def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1030 "movsd {$src, $dst|$dst, $src}",
1032 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1033 def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1034 "movd {$src, $dst|$dst, $src}",
1036 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1038 //===----------------------------------------------------------------------===//
1039 // Non-Instruction Patterns
1040 //===----------------------------------------------------------------------===//
1042 // 128-bit vector undef's.
1043 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1044 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1045 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1046 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1047 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1049 // 128-bit vector all zero's.
1050 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1051 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1052 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1054 // Load 128-bit integer vector values.
1055 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1056 Requires<[HasSSE2]>;
1057 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1058 Requires<[HasSSE2]>;
1059 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1060 Requires<[HasSSE2]>;
1061 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1062 Requires<[HasSSE2]>;
1064 // Store 128-bit integer vector values.
1065 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1066 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1067 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1068 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1069 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1070 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1071 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1072 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1074 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1076 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1077 Requires<[HasSSE2]>;
1078 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
1079 Requires<[HasSSE2]>;
1082 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1083 Requires<[HasSSE2]>;
1084 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1085 Requires<[HasSSE2]>;
1087 // Zeroing a VR128 then do a MOVS* to the lower bits.
1088 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1089 (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1090 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1091 (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1092 def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
1093 (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
1094 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1095 (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1096 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1097 (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1098 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1099 (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1101 // Splat v4f32 / v4i32
1102 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1103 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1104 Requires<[HasSSE1]>;
1105 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
1106 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
1107 Requires<[HasSSE2]>;
1109 // Splat v2f64 / v2i64
1110 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1111 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1112 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
1113 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1115 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
1116 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1117 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1118 Requires<[HasSSE2]>;
1119 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
1120 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
1121 Requires<[HasSSE2]>;
1123 // Shuffle v2f64 / v2i64
1124 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1125 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1126 (v2f64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1127 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1128 MOVHLPS_shuffle_mask:$sm),
1129 (v2f64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1130 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
1131 UNPCKHPD_shuffle_mask:$sm),
1132 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1133 def : Pat<(vector_shuffle (v2f64 VR128:$src1), (loadv2f64 addr:$src2),
1134 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1135 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
1137 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1138 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1139 (v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1140 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1141 MOVHLPS_shuffle_mask:$sm),
1142 (v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1143 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
1144 UNPCKHPD_shuffle_mask:$sm),
1145 (v2i64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
1146 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (loadv2i64 addr:$src2),
1147 MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
1148 (v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;