1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def SDTX86Unpcklp : SDTypeProfile<1, 2,
21 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
23 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
25 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
26 [SDNPCommutative, SDNPAssociative]>;
27 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
28 [SDNPCommutative, SDNPAssociative]>;
29 def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR",
30 SDTypeProfile<1, 1, []>, []>;
31 def X86unpcklp : SDNode<"X86ISD::UNPCKLP",
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
50 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
51 return getI8Imm(X86::getShuffleSHUFImmediate(N));
54 def SHUFFLE_get_pshufd_imm : SDNodeXForm<build_vector, [{
55 return getI8Imm(X86::getShufflePSHUFDImmediate(N));
58 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
60 }], SHUFFLE_get_shuf_imm>;
62 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
63 return X86::isSplatMask(N);
66 // Only use PSHUF if it is not a splat.
67 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
68 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
69 }], SHUFFLE_get_pshufd_imm>;
72 //===----------------------------------------------------------------------===//
73 // SSE scalar FP Instructions
74 //===----------------------------------------------------------------------===//
76 // Instruction templates
77 // SSI - SSE1 instructions with XS prefix.
78 // SDI - SSE2 instructions with XD prefix.
79 // PSI - SSE1 instructions with TB prefix.
80 // PDI - SSE2 instructions with TB and OpSize prefixes.
81 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
82 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
83 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
84 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
85 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
86 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
87 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
88 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
89 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
90 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
91 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
92 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
93 let Pattern = pattern;
95 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
96 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
97 let Pattern = pattern;
100 // Some 'special' instructions
101 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
102 "#IMPLICIT_DEF $dst",
103 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
104 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
105 "#IMPLICIT_DEF $dst",
106 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
108 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
109 // scheduler into a branch sequence.
110 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
111 def CMOV_FR32 : I<0, Pseudo,
112 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
113 "#CMOV_FR32 PSEUDO!",
114 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
115 def CMOV_FR64 : I<0, Pseudo,
116 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
117 "#CMOV_FR64 PSEUDO!",
118 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
122 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
123 "movss {$src, $dst|$dst, $src}", []>;
124 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
125 "movss {$src, $dst|$dst, $src}",
126 [(set FR32:$dst, (loadf32 addr:$src))]>;
127 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
128 "movsd {$src, $dst|$dst, $src}", []>;
129 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
130 "movsd {$src, $dst|$dst, $src}",
131 [(set FR64:$dst, (loadf64 addr:$src))]>;
133 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
134 "movss {$src, $dst|$dst, $src}",
135 [(store FR32:$src, addr:$dst)]>;
136 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
137 "movsd {$src, $dst|$dst, $src}",
138 [(store FR64:$src, addr:$dst)]>;
140 // Conversion instructions
141 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
142 "cvttss2si {$src, $dst|$dst, $src}",
143 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
144 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
145 "cvttss2si {$src, $dst|$dst, $src}",
146 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
147 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
148 "cvttsd2si {$src, $dst|$dst, $src}",
149 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
150 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
151 "cvttsd2si {$src, $dst|$dst, $src}",
152 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
153 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
154 "cvtsd2ss {$src, $dst|$dst, $src}",
155 [(set FR32:$dst, (fround FR64:$src))]>;
156 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
157 "cvtsd2ss {$src, $dst|$dst, $src}",
158 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
159 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
160 "cvtsi2ss {$src, $dst|$dst, $src}",
161 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
162 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
163 "cvtsi2ss {$src, $dst|$dst, $src}",
164 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
165 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
166 "cvtsi2sd {$src, $dst|$dst, $src}",
167 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
168 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
169 "cvtsi2sd {$src, $dst|$dst, $src}",
170 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
171 // SSE2 instructions with XS prefix
172 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
173 "cvtss2sd {$src, $dst|$dst, $src}",
174 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
176 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
177 "cvtss2sd {$src, $dst|$dst, $src}",
178 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
181 // Arithmetic instructions
182 let isTwoAddress = 1 in {
183 let isCommutable = 1 in {
184 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
185 "addss {$src2, $dst|$dst, $src2}",
186 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
187 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
188 "addsd {$src2, $dst|$dst, $src2}",
189 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
190 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
191 "mulss {$src2, $dst|$dst, $src2}",
192 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
193 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
194 "mulsd {$src2, $dst|$dst, $src2}",
195 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
198 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
199 "addss {$src2, $dst|$dst, $src2}",
200 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
201 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
202 "addsd {$src2, $dst|$dst, $src2}",
203 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
204 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
205 "mulss {$src2, $dst|$dst, $src2}",
206 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
207 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
208 "mulsd {$src2, $dst|$dst, $src2}",
209 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
211 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
212 "divss {$src2, $dst|$dst, $src2}",
213 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
214 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
215 "divss {$src2, $dst|$dst, $src2}",
216 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
217 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
218 "divsd {$src2, $dst|$dst, $src2}",
219 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
220 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
221 "divsd {$src2, $dst|$dst, $src2}",
222 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
224 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
225 "subss {$src2, $dst|$dst, $src2}",
226 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
227 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
228 "subss {$src2, $dst|$dst, $src2}",
229 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
230 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
231 "subsd {$src2, $dst|$dst, $src2}",
232 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
233 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
234 "subsd {$src2, $dst|$dst, $src2}",
235 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
238 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
239 "sqrtss {$src, $dst|$dst, $src}",
240 [(set FR32:$dst, (fsqrt FR32:$src))]>;
241 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
242 "sqrtss {$src, $dst|$dst, $src}",
243 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
244 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
245 "sqrtsd {$src, $dst|$dst, $src}",
246 [(set FR64:$dst, (fsqrt FR64:$src))]>;
247 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
248 "sqrtsd {$src, $dst|$dst, $src}",
249 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
251 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
252 "rsqrtss {$src, $dst|$dst, $src}", []>;
253 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
254 "rsqrtss {$src, $dst|$dst, $src}", []>;
255 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
256 "rcpss {$src, $dst|$dst, $src}", []>;
257 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
258 "rcpss {$src, $dst|$dst, $src}", []>;
260 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
261 "maxss {$src, $dst|$dst, $src}", []>;
262 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
263 "maxss {$src, $dst|$dst, $src}", []>;
264 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
265 "maxsd {$src, $dst|$dst, $src}", []>;
266 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
267 "maxsd {$src, $dst|$dst, $src}", []>;
268 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
269 "minss {$src, $dst|$dst, $src}", []>;
270 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
271 "minss {$src, $dst|$dst, $src}", []>;
272 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
273 "minsd {$src, $dst|$dst, $src}", []>;
274 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
275 "minsd {$src, $dst|$dst, $src}", []>;
277 // Comparison instructions
278 let isTwoAddress = 1 in {
279 def CMPSSrr : SSI<0xC2, MRMSrcReg,
280 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
281 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
282 def CMPSSrm : SSI<0xC2, MRMSrcMem,
283 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
284 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
285 def CMPSDrr : SDI<0xC2, MRMSrcReg,
286 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
287 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
288 def CMPSDrm : SDI<0xC2, MRMSrcMem,
289 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
290 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
293 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
294 "ucomiss {$src2, $src1|$src1, $src2}",
295 [(X86cmp FR32:$src1, FR32:$src2)]>;
296 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
297 "ucomiss {$src2, $src1|$src1, $src2}",
298 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
299 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
300 "ucomisd {$src2, $src1|$src1, $src2}",
301 [(X86cmp FR64:$src1, FR64:$src2)]>;
302 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
303 "ucomisd {$src2, $src1|$src1, $src2}",
304 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
306 // Aliases of packed instructions for scalar use. These all have names that
309 // Alias instructions that map fld0 to pxor for sse.
310 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
311 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
312 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
313 Requires<[HasSSE1]>, TB, OpSize;
314 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
315 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
316 Requires<[HasSSE2]>, TB, OpSize;
318 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
319 // Upper bits are disregarded.
320 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
321 "movaps {$src, $dst|$dst, $src}", []>;
322 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
323 "movapd {$src, $dst|$dst, $src}", []>;
325 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
326 // Upper bits are disregarded.
327 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
328 "movaps {$src, $dst|$dst, $src}",
329 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
330 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
331 "movapd {$src, $dst|$dst, $src}",
332 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
334 // Alias bitwise logical operations using SSE logical ops on packed FP values.
335 let isTwoAddress = 1 in {
336 let isCommutable = 1 in {
337 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
338 "andps {$src2, $dst|$dst, $src2}",
339 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
340 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
341 "andpd {$src2, $dst|$dst, $src2}",
342 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
343 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
344 "orps {$src2, $dst|$dst, $src2}", []>;
345 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
346 "orpd {$src2, $dst|$dst, $src2}", []>;
347 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
348 "xorps {$src2, $dst|$dst, $src2}",
349 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
350 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
351 "xorpd {$src2, $dst|$dst, $src2}",
352 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
354 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
355 "andps {$src2, $dst|$dst, $src2}",
356 [(set FR32:$dst, (X86fand FR32:$src1,
357 (X86loadpf32 addr:$src2)))]>;
358 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
359 "andpd {$src2, $dst|$dst, $src2}",
360 [(set FR64:$dst, (X86fand FR64:$src1,
361 (X86loadpf64 addr:$src2)))]>;
362 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
363 "orps {$src2, $dst|$dst, $src2}", []>;
364 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
365 "orpd {$src2, $dst|$dst, $src2}", []>;
366 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
367 "xorps {$src2, $dst|$dst, $src2}",
368 [(set FR32:$dst, (X86fxor FR32:$src1,
369 (X86loadpf32 addr:$src2)))]>;
370 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
371 "xorpd {$src2, $dst|$dst, $src2}",
372 [(set FR64:$dst, (X86fxor FR64:$src1,
373 (X86loadpf64 addr:$src2)))]>;
375 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
376 "andnps {$src2, $dst|$dst, $src2}", []>;
377 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
378 "andnps {$src2, $dst|$dst, $src2}", []>;
379 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
380 "andnpd {$src2, $dst|$dst, $src2}", []>;
381 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
382 "andnpd {$src2, $dst|$dst, $src2}", []>;
385 //===----------------------------------------------------------------------===//
386 // SSE packed FP Instructions
387 //===----------------------------------------------------------------------===//
389 // Some 'special' instructions
390 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
391 "#IMPLICIT_DEF $dst",
392 [(set VR128:$dst, (v4f32 (undef)))]>,
396 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
397 "movaps {$src, $dst|$dst, $src}", []>;
398 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
399 "movaps {$src, $dst|$dst, $src}",
400 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
401 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
402 "movapd {$src, $dst|$dst, $src}", []>;
403 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
404 "movapd {$src, $dst|$dst, $src}",
405 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
407 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
408 "movaps {$src, $dst|$dst, $src}",
409 [(store (v4f32 VR128:$src), addr:$dst)]>;
410 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
411 "movapd {$src, $dst|$dst, $src}",
412 [(store (v2f64 VR128:$src), addr:$dst)]>;
414 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
415 "movups {$src, $dst|$dst, $src}", []>;
416 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
417 "movups {$src, $dst|$dst, $src}", []>;
418 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
419 "movups {$src, $dst|$dst, $src}", []>;
420 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
421 "movupd {$src, $dst|$dst, $src}", []>;
422 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
423 "movupd {$src, $dst|$dst, $src}", []>;
424 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
425 "movupd {$src, $dst|$dst, $src}", []>;
427 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
428 "movlps {$src, $dst|$dst, $src}", []>;
429 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
430 "movlps {$src, $dst|$dst, $src}", []>;
431 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
432 "movlpd {$src, $dst|$dst, $src}", []>;
433 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
434 "movlpd {$src, $dst|$dst, $src}", []>;
436 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
437 "movhps {$src, $dst|$dst, $src}", []>;
438 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
439 "movhps {$src, $dst|$dst, $src}", []>;
440 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
441 "movhpd {$src, $dst|$dst, $src}", []>;
442 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
443 "movhpd {$src, $dst|$dst, $src}", []>;
445 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src),
446 "movlhps {$src, $dst|$dst, $src}", []>;
447 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src),
448 "movlhps {$src, $dst|$dst, $src}", []>;
450 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
451 "movmskps {$src, $dst|$dst, $src}", []>;
452 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
453 "movmskpd {$src, $dst|$dst, $src}", []>;
455 // Conversion instructions
456 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
457 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
458 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
459 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
460 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
461 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
462 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
463 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
465 // SSE2 instructions without OpSize prefix
466 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
467 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
469 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
470 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
473 // SSE2 instructions with XS prefix
474 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
475 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
476 XS, Requires<[HasSSE2]>;
477 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
478 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
479 XS, Requires<[HasSSE2]>;
481 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
482 "cvtps2pi {$src, $dst|$dst, $src}", []>;
483 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
484 "cvtps2pi {$src, $dst|$dst, $src}", []>;
485 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
486 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
487 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
488 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
490 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
491 "cvtps2dq {$src, $dst|$dst, $src}", []>;
492 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
493 "cvtps2dq {$src, $dst|$dst, $src}", []>;
494 // SSE2 packed instructions with XD prefix
495 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
496 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
497 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
498 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
500 // SSE2 instructions without OpSize prefix
501 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
502 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
504 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
505 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
508 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
509 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
510 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
511 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
514 let isTwoAddress = 1 in {
515 let isCommutable = 1 in {
516 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
517 "addps {$src2, $dst|$dst, $src2}",
518 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
519 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
520 "addpd {$src2, $dst|$dst, $src2}",
521 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
522 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
523 "mulps {$src2, $dst|$dst, $src2}",
524 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
525 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
526 "mulpd {$src2, $dst|$dst, $src2}",
527 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
530 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
531 "addps {$src2, $dst|$dst, $src2}",
532 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
533 (load addr:$src2))))]>;
534 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
535 "addpd {$src2, $dst|$dst, $src2}",
536 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
537 (load addr:$src2))))]>;
538 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
539 "mulps {$src2, $dst|$dst, $src2}",
540 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
541 (load addr:$src2))))]>;
542 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
543 "mulpd {$src2, $dst|$dst, $src2}",
544 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
545 (load addr:$src2))))]>;
547 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
548 "divps {$src2, $dst|$dst, $src2}",
549 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
550 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
551 "divps {$src2, $dst|$dst, $src2}",
552 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
553 (load addr:$src2))))]>;
554 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
555 "divpd {$src2, $dst|$dst, $src2}",
556 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
557 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
558 "divpd {$src2, $dst|$dst, $src2}",
559 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
560 (load addr:$src2))))]>;
562 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
563 "subps {$src2, $dst|$dst, $src2}",
564 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
565 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
566 "subps {$src2, $dst|$dst, $src2}",
567 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
568 (load addr:$src2))))]>;
569 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
570 "subpd {$src2, $dst|$dst, $src2}",
571 [(set VR128:$dst, (fsub VR128:$src1, VR128:$src2))]>;
572 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
573 "subpd {$src2, $dst|$dst, $src2}",
574 [(set VR128:$dst, (fsub VR128:$src1,
575 (load addr:$src2)))]>;
578 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
579 "sqrtps {$src, $dst|$dst, $src}",
580 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
581 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
582 "sqrtps {$src, $dst|$dst, $src}",
583 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
584 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
585 "sqrtpd {$src, $dst|$dst, $src}",
586 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
587 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
588 "sqrtpd {$src, $dst|$dst, $src}",
589 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
591 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
592 "rsqrtps {$src, $dst|$dst, $src}", []>;
593 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
594 "rsqrtps {$src, $dst|$dst, $src}", []>;
595 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
596 "rcpps {$src, $dst|$dst, $src}", []>;
597 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
598 "rcpps {$src, $dst|$dst, $src}", []>;
600 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
601 "maxps {$src, $dst|$dst, $src}", []>;
602 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
603 "maxps {$src, $dst|$dst, $src}", []>;
604 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
605 "maxpd {$src, $dst|$dst, $src}", []>;
606 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
607 "maxpd {$src, $dst|$dst, $src}", []>;
608 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
609 "minps {$src, $dst|$dst, $src}", []>;
610 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
611 "minps {$src, $dst|$dst, $src}", []>;
612 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
613 "minpd {$src, $dst|$dst, $src}", []>;
614 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
615 "minpd {$src, $dst|$dst, $src}", []>;
618 let isTwoAddress = 1 in {
619 let isCommutable = 1 in {
620 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
621 "andps {$src2, $dst|$dst, $src2}",
622 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
623 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
624 "andpd {$src2, $dst|$dst, $src2}",
625 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
626 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
627 "orps {$src2, $dst|$dst, $src2}",
628 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
629 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
630 "orpd {$src2, $dst|$dst, $src2}",
631 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
632 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
633 "xorps {$src2, $dst|$dst, $src2}",
634 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
635 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
636 "xorpd {$src2, $dst|$dst, $src2}",
637 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
639 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
640 "andps {$src2, $dst|$dst, $src2}",
641 [(set VR128:$dst, (v4i32 (and VR128:$src1,
642 (load addr:$src2))))]>;
643 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
644 "andpd {$src2, $dst|$dst, $src2}",
645 [(set VR128:$dst, (v2i64 (and VR128:$src1,
646 (load addr:$src2))))]>;
647 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
648 "orps {$src2, $dst|$dst, $src2}",
649 [(set VR128:$dst, (v4i32 (or VR128:$src1,
650 (load addr:$src2))))]>;
651 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
652 "orpd {$src2, $dst|$dst, $src2}",
653 [(set VR128:$dst, (v2i64 (or VR128:$src1,
654 (load addr:$src2))))]>;
655 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
656 "xorps {$src2, $dst|$dst, $src2}",
657 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
658 (load addr:$src2))))]>;
659 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
660 "xorpd {$src2, $dst|$dst, $src2}",
661 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
662 (load addr:$src2))))]>;
663 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
664 "andnps {$src2, $dst|$dst, $src2}",
665 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
667 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
668 "andnps {$src2, $dst|$dst, $src2}",
669 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
670 (load addr:$src2))))]>;
671 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
672 "andnpd {$src2, $dst|$dst, $src2}",
673 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
676 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
677 "andnpd {$src2, $dst|$dst, $src2}",
678 [(set VR128:$dst, (v2i64 (and VR128:$src1,
679 (load addr:$src2))))]>;
682 let isTwoAddress = 1 in {
683 def CMPPSrr : PSI<0xC2, MRMSrcReg,
684 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
685 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
686 def CMPPSrm : PSI<0xC2, MRMSrcMem,
687 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
688 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
689 def CMPPDrr : PDI<0xC2, MRMSrcReg,
690 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
691 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
692 def CMPPDrm : PDI<0xC2, MRMSrcMem,
693 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
694 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
697 // Shuffle and unpack instructions
698 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
699 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
700 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
701 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
702 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
703 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
704 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
705 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
706 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
707 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
708 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
709 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
711 let isTwoAddress = 1 in {
712 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
713 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
714 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
715 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
716 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
717 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
718 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
719 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
720 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
721 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
722 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
723 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
726 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
727 (ops VR128:$dst, VR128:$src1, VR128:$src2),
728 "unpckhps {$src2, $dst|$dst, $src2}", []>;
729 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
730 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
731 "unpckhps {$src2, $dst|$dst, $src2}", []>;
732 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
733 (ops VR128:$dst, VR128:$src1, VR128:$src2),
734 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
735 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
736 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
737 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
738 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
739 (ops VR128:$dst, VR128:$src1, VR128:$src2),
740 "unpcklps {$src2, $dst|$dst, $src2}", []>;
741 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
742 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
743 "unpcklps {$src2, $dst|$dst, $src2}", []>;
744 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
745 (ops VR128:$dst, VR128:$src1, VR128:$src2),
746 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
747 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
748 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
749 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
751 //===----------------------------------------------------------------------===//
752 // SSE integer instructions
753 //===----------------------------------------------------------------------===//
756 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
757 "movd {$src, $dst|$dst, $src}",
759 (v4i32 (scalar_to_vector R32:$src)))]>;
760 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
761 "movd {$src, $dst|$dst, $src}", []>;
762 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
763 "movd {$src, $dst|$dst, $src}", []>;
765 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
766 "movdqa {$src, $dst|$dst, $src}", []>;
767 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
768 "movdqa {$src, $dst|$dst, $src}",
769 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
770 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
771 "movdqa {$src, $dst|$dst, $src}",
772 [(store (v4i32 VR128:$src), addr:$dst)]>;
774 // SSE2 instructions with XS prefix
775 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
776 "movq {$src, $dst|$dst, $src}",
778 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
780 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
781 "movq {$src, $dst|$dst, $src}", []>, XS;
783 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
784 "movq {$src, $dst|$dst, $src}", []>;
786 // 128-bit Integer Arithmetic
787 let isTwoAddress = 1 in {
788 let isCommutable = 1 in {
789 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
790 "paddb {$src2, $dst|$dst, $src2}",
791 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
792 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
793 "paddw {$src2, $dst|$dst, $src2}",
794 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
795 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
796 "paddd {$src2, $dst|$dst, $src2}",
797 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
799 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
800 "paddb {$src2, $dst|$dst, $src2}",
801 [(set VR128:$dst, (v16i8 (add VR128:$src1,
802 (load addr:$src2))))]>;
803 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
804 "paddw {$src2, $dst|$dst, $src2}",
805 [(set VR128:$dst, (v8i16 (add VR128:$src1,
806 (load addr:$src2))))]>;
807 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
808 "paddd {$src2, $dst|$dst, $src2}",
809 [(set VR128:$dst, (v4i32 (add VR128:$src1,
810 (load addr:$src2))))]>;
813 //===----------------------------------------------------------------------===//
814 // Alias Instructions
815 //===----------------------------------------------------------------------===//
817 def FR32ToV4F32 : PSI<0x28, MRMSrcReg, (ops VR128:$dst, FR32:$src),
818 "movaps {$src, $dst|$dst, $src}",
820 (v4f32 (scalar_to_vector FR32:$src)))]>;
822 def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src),
823 "movapd {$src, $dst|$dst, $src}",
825 (v2f64 (scalar_to_vector FR64:$src)))]>;
827 //===----------------------------------------------------------------------===//
828 // Non-Instruction Patterns
829 //===----------------------------------------------------------------------===//
831 // 128-bit vector undef's.
832 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
833 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
834 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
835 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
836 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
838 // Load 128-bit integer vector values.
839 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
841 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
843 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
845 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
848 // Store 128-bit integer vector values.
849 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
850 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
851 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
852 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
853 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
854 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
855 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
856 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
858 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
860 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
862 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
866 // Splat v4f32 / v4i32
867 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
868 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
870 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
871 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
874 // Splat v2f64 / v2i64
875 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
876 (v2f64 (MOVLHPSrr VR128:$src))>, Requires<[HasSSE1]>;
877 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
878 (v2i64 (MOVLHPSrr VR128:$src))>, Requires<[HasSSE1]>;
880 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
881 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
882 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
884 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
885 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
889 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
890 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;