1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def SDTX86Unpcklp : SDTypeProfile<1, 2,
21 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
23 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
25 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
26 [SDNPCommutative, SDNPAssociative]>;
27 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
28 [SDNPCommutative, SDNPAssociative]>;
29 def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR",
30 SDTypeProfile<1, 1, []>, []>;
31 def X86unpcklp : SDNode<"X86ISD::UNPCKLP",
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
43 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
44 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
45 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
46 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
48 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
50 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
51 return getI8Imm(X86::getShuffleSHUFImmediate(N));
54 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
55 return X86::isSplatMask(N);
56 }], SHUFFLE_get_shuf_imm>;
58 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
62 // Only use PSHUF if it is not a splat.
63 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
64 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
65 }], SHUFFLE_get_shuf_imm>;
67 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
68 return X86::isSHUFPMask(N);
69 }], SHUFFLE_get_shuf_imm>;
71 //===----------------------------------------------------------------------===//
72 // SSE scalar FP Instructions
73 //===----------------------------------------------------------------------===//
75 // Instruction templates
76 // SSI - SSE1 instructions with XS prefix.
77 // SDI - SSE2 instructions with XD prefix.
78 // PSI - SSE1 instructions with TB prefix.
79 // PDI - SSE2 instructions with TB and OpSize prefixes.
80 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
81 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
82 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
83 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
84 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
85 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
86 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
87 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
88 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
89 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
90 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
91 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
92 let Pattern = pattern;
94 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
95 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
96 let Pattern = pattern;
99 // Some 'special' instructions
100 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
101 "#IMPLICIT_DEF $dst",
102 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
103 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
104 "#IMPLICIT_DEF $dst",
105 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
107 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
108 // scheduler into a branch sequence.
109 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
110 def CMOV_FR32 : I<0, Pseudo,
111 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
112 "#CMOV_FR32 PSEUDO!",
113 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
114 def CMOV_FR64 : I<0, Pseudo,
115 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
116 "#CMOV_FR64 PSEUDO!",
117 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
121 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
122 "movss {$src, $dst|$dst, $src}", []>;
123 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
124 "movss {$src, $dst|$dst, $src}",
125 [(set FR32:$dst, (loadf32 addr:$src))]>;
126 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
127 "movsd {$src, $dst|$dst, $src}", []>;
128 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
129 "movsd {$src, $dst|$dst, $src}",
130 [(set FR64:$dst, (loadf64 addr:$src))]>;
132 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
133 "movss {$src, $dst|$dst, $src}",
134 [(store FR32:$src, addr:$dst)]>;
135 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
136 "movsd {$src, $dst|$dst, $src}",
137 [(store FR64:$src, addr:$dst)]>;
139 // Conversion instructions
140 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
141 "cvttss2si {$src, $dst|$dst, $src}",
142 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
143 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
144 "cvttss2si {$src, $dst|$dst, $src}",
145 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
146 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
147 "cvttsd2si {$src, $dst|$dst, $src}",
148 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
149 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
150 "cvttsd2si {$src, $dst|$dst, $src}",
151 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
152 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
153 "cvtsd2ss {$src, $dst|$dst, $src}",
154 [(set FR32:$dst, (fround FR64:$src))]>;
155 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
156 "cvtsd2ss {$src, $dst|$dst, $src}",
157 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
158 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
159 "cvtsi2ss {$src, $dst|$dst, $src}",
160 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
161 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
162 "cvtsi2ss {$src, $dst|$dst, $src}",
163 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
164 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
165 "cvtsi2sd {$src, $dst|$dst, $src}",
166 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
167 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
168 "cvtsi2sd {$src, $dst|$dst, $src}",
169 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
170 // SSE2 instructions with XS prefix
171 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
172 "cvtss2sd {$src, $dst|$dst, $src}",
173 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
175 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
176 "cvtss2sd {$src, $dst|$dst, $src}",
177 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
180 // Arithmetic instructions
181 let isTwoAddress = 1 in {
182 let isCommutable = 1 in {
183 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
184 "addss {$src2, $dst|$dst, $src2}",
185 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
186 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
187 "addsd {$src2, $dst|$dst, $src2}",
188 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
189 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
190 "mulss {$src2, $dst|$dst, $src2}",
191 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
192 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
193 "mulsd {$src2, $dst|$dst, $src2}",
194 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
197 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
198 "addss {$src2, $dst|$dst, $src2}",
199 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
200 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
201 "addsd {$src2, $dst|$dst, $src2}",
202 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
203 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
204 "mulss {$src2, $dst|$dst, $src2}",
205 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
206 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
207 "mulsd {$src2, $dst|$dst, $src2}",
208 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
210 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
211 "divss {$src2, $dst|$dst, $src2}",
212 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
213 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
214 "divss {$src2, $dst|$dst, $src2}",
215 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
216 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
217 "divsd {$src2, $dst|$dst, $src2}",
218 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
219 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
220 "divsd {$src2, $dst|$dst, $src2}",
221 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
223 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
224 "subss {$src2, $dst|$dst, $src2}",
225 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
226 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
227 "subss {$src2, $dst|$dst, $src2}",
228 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
229 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
230 "subsd {$src2, $dst|$dst, $src2}",
231 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
232 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
233 "subsd {$src2, $dst|$dst, $src2}",
234 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
237 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
238 "sqrtss {$src, $dst|$dst, $src}",
239 [(set FR32:$dst, (fsqrt FR32:$src))]>;
240 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
241 "sqrtss {$src, $dst|$dst, $src}",
242 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
243 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
244 "sqrtsd {$src, $dst|$dst, $src}",
245 [(set FR64:$dst, (fsqrt FR64:$src))]>;
246 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
247 "sqrtsd {$src, $dst|$dst, $src}",
248 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
250 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
251 "rsqrtss {$src, $dst|$dst, $src}", []>;
252 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
253 "rsqrtss {$src, $dst|$dst, $src}", []>;
254 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
255 "rcpss {$src, $dst|$dst, $src}", []>;
256 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
257 "rcpss {$src, $dst|$dst, $src}", []>;
259 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
260 "maxss {$src, $dst|$dst, $src}", []>;
261 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
262 "maxss {$src, $dst|$dst, $src}", []>;
263 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
264 "maxsd {$src, $dst|$dst, $src}", []>;
265 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
266 "maxsd {$src, $dst|$dst, $src}", []>;
267 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
268 "minss {$src, $dst|$dst, $src}", []>;
269 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
270 "minss {$src, $dst|$dst, $src}", []>;
271 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
272 "minsd {$src, $dst|$dst, $src}", []>;
273 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
274 "minsd {$src, $dst|$dst, $src}", []>;
276 // Comparison instructions
277 let isTwoAddress = 1 in {
278 def CMPSSrr : SSI<0xC2, MRMSrcReg,
279 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
280 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
281 def CMPSSrm : SSI<0xC2, MRMSrcMem,
282 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
283 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
284 def CMPSDrr : SDI<0xC2, MRMSrcReg,
285 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
286 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
287 def CMPSDrm : SDI<0xC2, MRMSrcMem,
288 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
289 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
292 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
293 "ucomiss {$src2, $src1|$src1, $src2}",
294 [(X86cmp FR32:$src1, FR32:$src2)]>;
295 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
296 "ucomiss {$src2, $src1|$src1, $src2}",
297 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
298 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
299 "ucomisd {$src2, $src1|$src1, $src2}",
300 [(X86cmp FR64:$src1, FR64:$src2)]>;
301 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
302 "ucomisd {$src2, $src1|$src1, $src2}",
303 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
305 // Aliases of packed instructions for scalar use. These all have names that
308 // Alias instructions that map fld0 to pxor for sse.
309 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
310 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
311 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
312 Requires<[HasSSE1]>, TB, OpSize;
313 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
314 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
315 Requires<[HasSSE2]>, TB, OpSize;
317 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
318 // Upper bits are disregarded.
319 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
320 "movaps {$src, $dst|$dst, $src}", []>;
321 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
322 "movapd {$src, $dst|$dst, $src}", []>;
324 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
325 // Upper bits are disregarded.
326 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
327 "movaps {$src, $dst|$dst, $src}",
328 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
329 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
330 "movapd {$src, $dst|$dst, $src}",
331 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
333 // Alias bitwise logical operations using SSE logical ops on packed FP values.
334 let isTwoAddress = 1 in {
335 let isCommutable = 1 in {
336 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
337 "andps {$src2, $dst|$dst, $src2}",
338 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
339 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
340 "andpd {$src2, $dst|$dst, $src2}",
341 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
342 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
343 "orps {$src2, $dst|$dst, $src2}", []>;
344 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
345 "orpd {$src2, $dst|$dst, $src2}", []>;
346 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
347 "xorps {$src2, $dst|$dst, $src2}",
348 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
349 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
350 "xorpd {$src2, $dst|$dst, $src2}",
351 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
353 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
354 "andps {$src2, $dst|$dst, $src2}",
355 [(set FR32:$dst, (X86fand FR32:$src1,
356 (X86loadpf32 addr:$src2)))]>;
357 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
358 "andpd {$src2, $dst|$dst, $src2}",
359 [(set FR64:$dst, (X86fand FR64:$src1,
360 (X86loadpf64 addr:$src2)))]>;
361 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
362 "orps {$src2, $dst|$dst, $src2}", []>;
363 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
364 "orpd {$src2, $dst|$dst, $src2}", []>;
365 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
366 "xorps {$src2, $dst|$dst, $src2}",
367 [(set FR32:$dst, (X86fxor FR32:$src1,
368 (X86loadpf32 addr:$src2)))]>;
369 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
370 "xorpd {$src2, $dst|$dst, $src2}",
371 [(set FR64:$dst, (X86fxor FR64:$src1,
372 (X86loadpf64 addr:$src2)))]>;
374 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
375 "andnps {$src2, $dst|$dst, $src2}", []>;
376 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
377 "andnps {$src2, $dst|$dst, $src2}", []>;
378 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
379 "andnpd {$src2, $dst|$dst, $src2}", []>;
380 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
381 "andnpd {$src2, $dst|$dst, $src2}", []>;
384 //===----------------------------------------------------------------------===//
385 // SSE packed FP Instructions
386 //===----------------------------------------------------------------------===//
388 // Some 'special' instructions
389 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
390 "#IMPLICIT_DEF $dst",
391 [(set VR128:$dst, (v4f32 (undef)))]>,
395 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
396 "movaps {$src, $dst|$dst, $src}", []>;
397 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
398 "movaps {$src, $dst|$dst, $src}",
399 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
400 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
401 "movapd {$src, $dst|$dst, $src}", []>;
402 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
403 "movapd {$src, $dst|$dst, $src}",
404 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
406 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
407 "movaps {$src, $dst|$dst, $src}",
408 [(store (v4f32 VR128:$src), addr:$dst)]>;
409 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
410 "movapd {$src, $dst|$dst, $src}",
411 [(store (v2f64 VR128:$src), addr:$dst)]>;
413 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
414 "movups {$src, $dst|$dst, $src}", []>;
415 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
416 "movups {$src, $dst|$dst, $src}", []>;
417 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
418 "movups {$src, $dst|$dst, $src}", []>;
419 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
420 "movupd {$src, $dst|$dst, $src}", []>;
421 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
422 "movupd {$src, $dst|$dst, $src}", []>;
423 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
424 "movupd {$src, $dst|$dst, $src}", []>;
426 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
427 "movlps {$src, $dst|$dst, $src}", []>;
428 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
429 "movlps {$src, $dst|$dst, $src}", []>;
430 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
431 "movlpd {$src, $dst|$dst, $src}", []>;
432 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
433 "movlpd {$src, $dst|$dst, $src}", []>;
435 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
436 "movhps {$src, $dst|$dst, $src}", []>;
437 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
438 "movhps {$src, $dst|$dst, $src}", []>;
439 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
440 "movhpd {$src, $dst|$dst, $src}", []>;
441 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
442 "movhpd {$src, $dst|$dst, $src}", []>;
444 let isTwoAddress = 1 in {
445 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
446 "movlhps {$src2, $dst|$dst, $src2}", []>;
447 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
448 "movlhps {$src2, $dst|$dst, $src2}", []>;
451 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
452 "movmskps {$src, $dst|$dst, $src}", []>;
453 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
454 "movmskpd {$src, $dst|$dst, $src}", []>;
456 // Conversion instructions
457 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
458 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
459 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
460 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
461 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
462 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
463 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
464 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
466 // SSE2 instructions without OpSize prefix
467 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
468 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
470 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
471 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
474 // SSE2 instructions with XS prefix
475 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
476 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
477 XS, Requires<[HasSSE2]>;
478 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
479 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
480 XS, Requires<[HasSSE2]>;
482 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
483 "cvtps2pi {$src, $dst|$dst, $src}", []>;
484 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
485 "cvtps2pi {$src, $dst|$dst, $src}", []>;
486 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
487 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
488 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
489 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
491 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
492 "cvtps2dq {$src, $dst|$dst, $src}", []>;
493 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
494 "cvtps2dq {$src, $dst|$dst, $src}", []>;
495 // SSE2 packed instructions with XD prefix
496 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
497 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
498 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
499 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
501 // SSE2 instructions without OpSize prefix
502 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
503 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
505 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
506 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
509 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
510 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
511 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
512 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
515 let isTwoAddress = 1 in {
516 let isCommutable = 1 in {
517 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
518 "addps {$src2, $dst|$dst, $src2}",
519 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
520 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
521 "addpd {$src2, $dst|$dst, $src2}",
522 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
523 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
524 "mulps {$src2, $dst|$dst, $src2}",
525 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
526 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
527 "mulpd {$src2, $dst|$dst, $src2}",
528 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
531 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
532 "addps {$src2, $dst|$dst, $src2}",
533 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
534 (load addr:$src2))))]>;
535 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
536 "addpd {$src2, $dst|$dst, $src2}",
537 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
538 (load addr:$src2))))]>;
539 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
540 "mulps {$src2, $dst|$dst, $src2}",
541 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
542 (load addr:$src2))))]>;
543 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
544 "mulpd {$src2, $dst|$dst, $src2}",
545 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
546 (load addr:$src2))))]>;
548 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
549 "divps {$src2, $dst|$dst, $src2}",
550 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
551 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
552 "divps {$src2, $dst|$dst, $src2}",
553 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
554 (load addr:$src2))))]>;
555 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
556 "divpd {$src2, $dst|$dst, $src2}",
557 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
558 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
559 "divpd {$src2, $dst|$dst, $src2}",
560 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
561 (load addr:$src2))))]>;
563 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
564 "subps {$src2, $dst|$dst, $src2}",
565 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
566 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
567 "subps {$src2, $dst|$dst, $src2}",
568 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
569 (load addr:$src2))))]>;
570 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
571 "subpd {$src2, $dst|$dst, $src2}",
572 [(set VR128:$dst, (fsub VR128:$src1, VR128:$src2))]>;
573 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
574 "subpd {$src2, $dst|$dst, $src2}",
575 [(set VR128:$dst, (fsub VR128:$src1,
576 (load addr:$src2)))]>;
579 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
580 "sqrtps {$src, $dst|$dst, $src}",
581 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
582 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
583 "sqrtps {$src, $dst|$dst, $src}",
584 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
585 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
586 "sqrtpd {$src, $dst|$dst, $src}",
587 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
588 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
589 "sqrtpd {$src, $dst|$dst, $src}",
590 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
592 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
593 "rsqrtps {$src, $dst|$dst, $src}", []>;
594 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
595 "rsqrtps {$src, $dst|$dst, $src}", []>;
596 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
597 "rcpps {$src, $dst|$dst, $src}", []>;
598 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
599 "rcpps {$src, $dst|$dst, $src}", []>;
601 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
602 "maxps {$src, $dst|$dst, $src}", []>;
603 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
604 "maxps {$src, $dst|$dst, $src}", []>;
605 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
606 "maxpd {$src, $dst|$dst, $src}", []>;
607 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
608 "maxpd {$src, $dst|$dst, $src}", []>;
609 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
610 "minps {$src, $dst|$dst, $src}", []>;
611 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
612 "minps {$src, $dst|$dst, $src}", []>;
613 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
614 "minpd {$src, $dst|$dst, $src}", []>;
615 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
616 "minpd {$src, $dst|$dst, $src}", []>;
619 let isTwoAddress = 1 in {
620 let isCommutable = 1 in {
621 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
622 "andps {$src2, $dst|$dst, $src2}",
623 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
624 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
625 "andpd {$src2, $dst|$dst, $src2}",
626 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
627 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
628 "orps {$src2, $dst|$dst, $src2}",
629 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
630 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
631 "orpd {$src2, $dst|$dst, $src2}",
632 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
633 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
634 "xorps {$src2, $dst|$dst, $src2}",
635 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
636 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
637 "xorpd {$src2, $dst|$dst, $src2}",
638 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
640 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
641 "andps {$src2, $dst|$dst, $src2}",
642 [(set VR128:$dst, (v4i32 (and VR128:$src1,
643 (load addr:$src2))))]>;
644 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
645 "andpd {$src2, $dst|$dst, $src2}",
646 [(set VR128:$dst, (v2i64 (and VR128:$src1,
647 (load addr:$src2))))]>;
648 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
649 "orps {$src2, $dst|$dst, $src2}",
650 [(set VR128:$dst, (v4i32 (or VR128:$src1,
651 (load addr:$src2))))]>;
652 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
653 "orpd {$src2, $dst|$dst, $src2}",
654 [(set VR128:$dst, (v2i64 (or VR128:$src1,
655 (load addr:$src2))))]>;
656 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
657 "xorps {$src2, $dst|$dst, $src2}",
658 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
659 (load addr:$src2))))]>;
660 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
661 "xorpd {$src2, $dst|$dst, $src2}",
662 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
663 (load addr:$src2))))]>;
664 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
665 "andnps {$src2, $dst|$dst, $src2}",
666 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
668 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
669 "andnps {$src2, $dst|$dst, $src2}",
670 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
671 (load addr:$src2))))]>;
672 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
673 "andnpd {$src2, $dst|$dst, $src2}",
674 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
677 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
678 "andnpd {$src2, $dst|$dst, $src2}",
679 [(set VR128:$dst, (v2i64 (and VR128:$src1,
680 (load addr:$src2))))]>;
683 let isTwoAddress = 1 in {
684 def CMPPSrr : PSI<0xC2, MRMSrcReg,
685 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
686 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
687 def CMPPSrm : PSI<0xC2, MRMSrcMem,
688 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
689 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
690 def CMPPDrr : PDI<0xC2, MRMSrcReg,
691 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
692 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
693 def CMPPDrm : PDI<0xC2, MRMSrcMem,
694 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
695 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
698 // Shuffle and unpack instructions
699 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
700 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
701 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
702 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
703 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
704 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
705 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
706 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
707 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
708 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
709 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
710 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
712 let isTwoAddress = 1 in {
713 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
714 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
715 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
716 [(set VR128:$dst, (vector_shuffle
717 (v4f32 VR128:$src1), (v4f32 VR128:$src2),
718 SHUFP_shuffle_mask:$src3))]>;
719 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
720 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
721 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
722 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
723 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
724 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
725 [(set VR128:$dst, (vector_shuffle
726 (v2f64 VR128:$src1), (v2f64 VR128:$src2),
727 SHUFP_shuffle_mask:$src3))]>;
728 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
729 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
730 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
733 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
734 (ops VR128:$dst, VR128:$src1, VR128:$src2),
735 "unpckhps {$src2, $dst|$dst, $src2}", []>;
736 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
737 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
738 "unpckhps {$src2, $dst|$dst, $src2}", []>;
739 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
740 (ops VR128:$dst, VR128:$src1, VR128:$src2),
741 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
742 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
743 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
744 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
745 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
746 (ops VR128:$dst, VR128:$src1, VR128:$src2),
747 "unpcklps {$src2, $dst|$dst, $src2}", []>;
748 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
749 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
750 "unpcklps {$src2, $dst|$dst, $src2}", []>;
751 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
752 (ops VR128:$dst, VR128:$src1, VR128:$src2),
753 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
754 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
755 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
756 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
758 //===----------------------------------------------------------------------===//
759 // SSE integer instructions
760 //===----------------------------------------------------------------------===//
763 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
764 "movd {$src, $dst|$dst, $src}",
766 (v4i32 (scalar_to_vector R32:$src)))]>;
767 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
768 "movd {$src, $dst|$dst, $src}", []>;
769 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
770 "movd {$src, $dst|$dst, $src}", []>;
772 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
773 "movdqa {$src, $dst|$dst, $src}", []>;
774 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
775 "movdqa {$src, $dst|$dst, $src}",
776 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
777 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
778 "movdqa {$src, $dst|$dst, $src}",
779 [(store (v4i32 VR128:$src), addr:$dst)]>;
781 // SSE2 instructions with XS prefix
782 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
783 "movq {$src, $dst|$dst, $src}",
785 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
787 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
788 "movq {$src, $dst|$dst, $src}", []>, XS;
790 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
791 "movq {$src, $dst|$dst, $src}", []>;
793 // 128-bit Integer Arithmetic
794 let isTwoAddress = 1 in {
795 let isCommutable = 1 in {
796 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
797 "paddb {$src2, $dst|$dst, $src2}",
798 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
799 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
800 "paddw {$src2, $dst|$dst, $src2}",
801 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
802 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
803 "paddd {$src2, $dst|$dst, $src2}",
804 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
806 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
807 "paddb {$src2, $dst|$dst, $src2}",
808 [(set VR128:$dst, (v16i8 (add VR128:$src1,
809 (load addr:$src2))))]>;
810 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
811 "paddw {$src2, $dst|$dst, $src2}",
812 [(set VR128:$dst, (v8i16 (add VR128:$src1,
813 (load addr:$src2))))]>;
814 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
815 "paddd {$src2, $dst|$dst, $src2}",
816 [(set VR128:$dst, (v4i32 (add VR128:$src1,
817 (load addr:$src2))))]>;
820 //===----------------------------------------------------------------------===//
821 // Alias Instructions
822 //===----------------------------------------------------------------------===//
824 def FR32ToV4F32 : PSI<0x28, MRMSrcReg, (ops VR128:$dst, FR32:$src),
825 "movaps {$src, $dst|$dst, $src}",
827 (v4f32 (scalar_to_vector FR32:$src)))]>;
829 def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src),
830 "movapd {$src, $dst|$dst, $src}",
832 (v2f64 (scalar_to_vector FR64:$src)))]>;
834 //===----------------------------------------------------------------------===//
835 // Non-Instruction Patterns
836 //===----------------------------------------------------------------------===//
838 // 128-bit vector undef's.
839 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
840 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
841 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
842 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
843 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
845 // Load 128-bit integer vector values.
846 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
848 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
850 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
852 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
855 // Store 128-bit integer vector values.
856 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
857 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
858 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
859 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
860 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
861 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE1]>;
862 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
863 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
865 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
867 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
869 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
873 // Splat v4f32 / v4i32
874 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
875 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
877 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
878 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
881 // Splat v2f64 / v2i64
882 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
883 (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
884 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
885 (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE1]>;
887 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
888 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
889 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
891 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
892 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
896 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
897 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;