1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def SDTX86Unpcklp : SDTypeProfile<1, 2,
21 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
23 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
25 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
26 [SDNPCommutative, SDNPAssociative]>;
27 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
28 [SDNPCommutative, SDNPAssociative]>;
29 def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR",
30 SDTypeProfile<1, 1, []>, []>;
31 def X86unpcklp : SDNode<"X86ISD::UNPCKLP",
34 //===----------------------------------------------------------------------===//
35 // SSE pattern fragments
36 //===----------------------------------------------------------------------===//
38 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
39 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
41 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
42 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
44 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
46 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
47 return getI8Imm(X86::getShuffleSHUFImmediate(N));
50 def SHUFFLE_get_pshufd_imm : SDNodeXForm<build_vector, [{
51 return getI8Imm(X86::getShufflePSHUFDImmediate(N));
54 def SHUFP_splat_mask : PatLeaf<(build_vector), [{
55 return X86::isSplatMask(N);
56 }], SHUFFLE_get_shuf_imm>;
58 def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
59 return X86::isSplatMask(N);
62 // Only use PSHUF if it is not a splat.
63 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
64 return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
65 }], SHUFFLE_get_pshufd_imm>;
68 //===----------------------------------------------------------------------===//
69 // SSE scalar FP Instructions
70 //===----------------------------------------------------------------------===//
72 // Instruction templates
73 // SSI - SSE1 instructions with XS prefix.
74 // SDI - SSE2 instructions with XD prefix.
75 // PSI - SSE1 instructions with TB prefix.
76 // PDI - SSE2 instructions with TB and OpSize prefixes.
77 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
78 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
79 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
80 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
81 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
82 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
83 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
84 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
85 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
86 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
87 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
88 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
89 let Pattern = pattern;
91 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
92 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
93 let Pattern = pattern;
96 // Some 'special' instructions
97 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
99 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
100 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
101 "#IMPLICIT_DEF $dst",
102 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
104 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
105 // scheduler into a branch sequence.
106 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
107 def CMOV_FR32 : I<0, Pseudo,
108 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
109 "#CMOV_FR32 PSEUDO!",
110 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
111 def CMOV_FR64 : I<0, Pseudo,
112 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
113 "#CMOV_FR64 PSEUDO!",
114 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
118 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
119 "movss {$src, $dst|$dst, $src}", []>;
120 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
121 "movss {$src, $dst|$dst, $src}",
122 [(set FR32:$dst, (loadf32 addr:$src))]>;
123 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
124 "movsd {$src, $dst|$dst, $src}", []>;
125 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
126 "movsd {$src, $dst|$dst, $src}",
127 [(set FR64:$dst, (loadf64 addr:$src))]>;
129 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
130 "movss {$src, $dst|$dst, $src}",
131 [(store FR32:$src, addr:$dst)]>;
132 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
133 "movsd {$src, $dst|$dst, $src}",
134 [(store FR64:$src, addr:$dst)]>;
136 // Conversion instructions
137 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
138 "cvttss2si {$src, $dst|$dst, $src}",
139 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
140 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
141 "cvttss2si {$src, $dst|$dst, $src}",
142 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
143 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
144 "cvttsd2si {$src, $dst|$dst, $src}",
145 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
146 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
147 "cvttsd2si {$src, $dst|$dst, $src}",
148 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
149 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
150 "cvtsd2ss {$src, $dst|$dst, $src}",
151 [(set FR32:$dst, (fround FR64:$src))]>;
152 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
153 "cvtsd2ss {$src, $dst|$dst, $src}",
154 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
155 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
156 "cvtsi2ss {$src, $dst|$dst, $src}",
157 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
158 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
159 "cvtsi2ss {$src, $dst|$dst, $src}",
160 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
161 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
162 "cvtsi2sd {$src, $dst|$dst, $src}",
163 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
164 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
165 "cvtsi2sd {$src, $dst|$dst, $src}",
166 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
167 // SSE2 instructions with XS prefix
168 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
169 "cvtss2sd {$src, $dst|$dst, $src}",
170 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
172 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
173 "cvtss2sd {$src, $dst|$dst, $src}",
174 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
177 // Arithmetic instructions
178 let isTwoAddress = 1 in {
179 let isCommutable = 1 in {
180 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
181 "addss {$src2, $dst|$dst, $src2}",
182 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
183 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
184 "addsd {$src2, $dst|$dst, $src2}",
185 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
186 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
187 "mulss {$src2, $dst|$dst, $src2}",
188 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
189 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
190 "mulsd {$src2, $dst|$dst, $src2}",
191 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
194 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
195 "addss {$src2, $dst|$dst, $src2}",
196 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
197 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
198 "addsd {$src2, $dst|$dst, $src2}",
199 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
200 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
201 "mulss {$src2, $dst|$dst, $src2}",
202 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
203 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
204 "mulsd {$src2, $dst|$dst, $src2}",
205 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
207 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
208 "divss {$src2, $dst|$dst, $src2}",
209 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
210 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
211 "divss {$src2, $dst|$dst, $src2}",
212 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
213 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
214 "divsd {$src2, $dst|$dst, $src2}",
215 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
216 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
217 "divsd {$src2, $dst|$dst, $src2}",
218 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
220 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
221 "subss {$src2, $dst|$dst, $src2}",
222 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
223 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
224 "subss {$src2, $dst|$dst, $src2}",
225 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
226 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
227 "subsd {$src2, $dst|$dst, $src2}",
228 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
229 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
230 "subsd {$src2, $dst|$dst, $src2}",
231 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
234 def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
235 "sqrtss {$src, $dst|$dst, $src}",
236 [(set FR32:$dst, (fsqrt FR32:$src))]>;
237 def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
238 "sqrtss {$src, $dst|$dst, $src}",
239 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
240 def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
241 "sqrtsd {$src, $dst|$dst, $src}",
242 [(set FR64:$dst, (fsqrt FR64:$src))]>;
243 def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
244 "sqrtsd {$src, $dst|$dst, $src}",
245 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
247 def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
248 "rsqrtss {$src, $dst|$dst, $src}", []>;
249 def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
250 "rsqrtss {$src, $dst|$dst, $src}", []>;
251 def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
252 "rcpss {$src, $dst|$dst, $src}", []>;
253 def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
254 "rcpss {$src, $dst|$dst, $src}", []>;
256 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
257 "maxss {$src, $dst|$dst, $src}", []>;
258 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
259 "maxss {$src, $dst|$dst, $src}", []>;
260 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
261 "maxsd {$src, $dst|$dst, $src}", []>;
262 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
263 "maxsd {$src, $dst|$dst, $src}", []>;
264 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
265 "minss {$src, $dst|$dst, $src}", []>;
266 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
267 "minss {$src, $dst|$dst, $src}", []>;
268 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
269 "minsd {$src, $dst|$dst, $src}", []>;
270 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
271 "minsd {$src, $dst|$dst, $src}", []>;
273 // Comparison instructions
274 let isTwoAddress = 1 in {
275 def CMPSSrr : SSI<0xC2, MRMSrcReg,
276 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
277 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
278 def CMPSSrm : SSI<0xC2, MRMSrcMem,
279 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
280 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
281 def CMPSDrr : SDI<0xC2, MRMSrcReg,
282 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
283 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
284 def CMPSDrm : SDI<0xC2, MRMSrcMem,
285 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
286 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
289 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
290 "ucomiss {$src2, $src1|$src1, $src2}",
291 [(X86cmp FR32:$src1, FR32:$src2)]>;
292 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
293 "ucomiss {$src2, $src1|$src1, $src2}",
294 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
295 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
296 "ucomisd {$src2, $src1|$src1, $src2}",
297 [(X86cmp FR64:$src1, FR64:$src2)]>;
298 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
299 "ucomisd {$src2, $src1|$src1, $src2}",
300 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
302 // Aliases of packed instructions for scalar use. These all have names that
305 // Alias instructions that map fld0 to pxor for sse.
306 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
307 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
308 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
309 Requires<[HasSSE1]>, TB, OpSize;
310 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
311 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
312 Requires<[HasSSE2]>, TB, OpSize;
314 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
315 // Upper bits are disregarded.
316 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
317 "movaps {$src, $dst|$dst, $src}", []>;
318 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
319 "movapd {$src, $dst|$dst, $src}", []>;
321 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
322 // Upper bits are disregarded.
323 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
324 "movaps {$src, $dst|$dst, $src}",
325 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
326 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
327 "movapd {$src, $dst|$dst, $src}",
328 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
330 // Alias bitwise logical operations using SSE logical ops on packed FP values.
331 let isTwoAddress = 1 in {
332 let isCommutable = 1 in {
333 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
334 "andps {$src2, $dst|$dst, $src2}",
335 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
336 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
337 "andpd {$src2, $dst|$dst, $src2}",
338 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
339 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
340 "orps {$src2, $dst|$dst, $src2}", []>;
341 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
342 "orpd {$src2, $dst|$dst, $src2}", []>;
343 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
344 "xorps {$src2, $dst|$dst, $src2}",
345 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
346 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
347 "xorpd {$src2, $dst|$dst, $src2}",
348 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
350 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
351 "andps {$src2, $dst|$dst, $src2}",
352 [(set FR32:$dst, (X86fand FR32:$src1,
353 (X86loadpf32 addr:$src2)))]>;
354 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
355 "andpd {$src2, $dst|$dst, $src2}",
356 [(set FR64:$dst, (X86fand FR64:$src1,
357 (X86loadpf64 addr:$src2)))]>;
358 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
359 "orps {$src2, $dst|$dst, $src2}", []>;
360 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
361 "orpd {$src2, $dst|$dst, $src2}", []>;
362 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
363 "xorps {$src2, $dst|$dst, $src2}",
364 [(set FR32:$dst, (X86fxor FR32:$src1,
365 (X86loadpf32 addr:$src2)))]>;
366 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
367 "xorpd {$src2, $dst|$dst, $src2}",
368 [(set FR64:$dst, (X86fxor FR64:$src1,
369 (X86loadpf64 addr:$src2)))]>;
371 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
372 "andnps {$src2, $dst|$dst, $src2}", []>;
373 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
374 "andnps {$src2, $dst|$dst, $src2}", []>;
375 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
376 "andnpd {$src2, $dst|$dst, $src2}", []>;
377 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
378 "andnpd {$src2, $dst|$dst, $src2}", []>;
381 //===----------------------------------------------------------------------===//
382 // SSE packed FP Instructions
383 //===----------------------------------------------------------------------===//
385 // Some 'special' instructions
386 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
387 "#IMPLICIT_DEF $dst",
388 [(set VR128:$dst, (v4f32 (undef)))]>,
392 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
393 "movaps {$src, $dst|$dst, $src}", []>;
394 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
395 "movaps {$src, $dst|$dst, $src}",
396 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
397 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
398 "movapd {$src, $dst|$dst, $src}", []>;
399 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
400 "movapd {$src, $dst|$dst, $src}",
401 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
403 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
404 "movaps {$src, $dst|$dst, $src}",
405 [(store (v4f32 VR128:$src), addr:$dst)]>;
406 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
407 "movapd {$src, $dst|$dst, $src}",
408 [(store (v2f64 VR128:$src), addr:$dst)]>;
410 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
411 "movups {$src, $dst|$dst, $src}", []>;
412 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
413 "movups {$src, $dst|$dst, $src}", []>;
414 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
415 "movups {$src, $dst|$dst, $src}", []>;
416 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
417 "movupd {$src, $dst|$dst, $src}", []>;
418 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
419 "movupd {$src, $dst|$dst, $src}", []>;
420 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
421 "movupd {$src, $dst|$dst, $src}", []>;
423 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
424 "movlps {$src, $dst|$dst, $src}", []>;
425 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
426 "movlps {$src, $dst|$dst, $src}", []>;
427 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
428 "movlpd {$src, $dst|$dst, $src}", []>;
429 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
430 "movlpd {$src, $dst|$dst, $src}", []>;
432 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
433 "movhps {$src, $dst|$dst, $src}", []>;
434 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
435 "movhps {$src, $dst|$dst, $src}", []>;
436 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
437 "movhpd {$src, $dst|$dst, $src}", []>;
438 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
439 "movhpd {$src, $dst|$dst, $src}", []>;
441 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src),
442 "movlhps {$src, $dst|$dst, $src}", []>;
443 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src),
444 "movlhps {$src, $dst|$dst, $src}", []>;
446 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
447 "movmskps {$src, $dst|$dst, $src}", []>;
448 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
449 "movmskpd {$src, $dst|$dst, $src}", []>;
451 // Conversion instructions
452 def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
453 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
454 def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
455 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
456 def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
457 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
458 def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
459 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
461 // SSE2 instructions without OpSize prefix
462 def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
463 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
465 def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
466 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
469 // SSE2 instructions with XS prefix
470 def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
471 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
472 XS, Requires<[HasSSE2]>;
473 def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
474 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
475 XS, Requires<[HasSSE2]>;
477 def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
478 "cvtps2pi {$src, $dst|$dst, $src}", []>;
479 def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
480 "cvtps2pi {$src, $dst|$dst, $src}", []>;
481 def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
482 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
483 def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
484 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
486 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
487 "cvtps2dq {$src, $dst|$dst, $src}", []>;
488 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
489 "cvtps2dq {$src, $dst|$dst, $src}", []>;
490 // SSE2 packed instructions with XD prefix
491 def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
492 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
493 def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
494 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
496 // SSE2 instructions without OpSize prefix
497 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
498 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
500 def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
501 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
504 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
505 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
506 def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
507 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
510 let isTwoAddress = 1 in {
511 let isCommutable = 1 in {
512 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
513 "addps {$src2, $dst|$dst, $src2}",
514 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
515 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
516 "addpd {$src2, $dst|$dst, $src2}",
517 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
518 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
519 "mulps {$src2, $dst|$dst, $src2}",
520 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
521 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
522 "mulpd {$src2, $dst|$dst, $src2}",
523 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
526 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
527 "addps {$src2, $dst|$dst, $src2}",
528 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
529 (load addr:$src2))))]>;
530 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
531 "addpd {$src2, $dst|$dst, $src2}",
532 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
533 (load addr:$src2))))]>;
534 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
535 "mulps {$src2, $dst|$dst, $src2}",
536 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
537 (load addr:$src2))))]>;
538 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
539 "mulpd {$src2, $dst|$dst, $src2}",
540 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
541 (load addr:$src2))))]>;
543 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
544 "divps {$src2, $dst|$dst, $src2}",
545 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
546 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
547 "divps {$src2, $dst|$dst, $src2}",
548 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
549 (load addr:$src2))))]>;
550 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
551 "divpd {$src2, $dst|$dst, $src2}",
552 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
553 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
554 "divpd {$src2, $dst|$dst, $src2}",
555 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
556 (load addr:$src2))))]>;
558 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
559 "subps {$src2, $dst|$dst, $src2}",
560 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
561 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
562 "subps {$src2, $dst|$dst, $src2}",
563 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
564 (load addr:$src2))))]>;
565 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
566 "subpd {$src2, $dst|$dst, $src2}",
567 [(set VR128:$dst, (fsub VR128:$src1, VR128:$src2))]>;
568 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
569 "subpd {$src2, $dst|$dst, $src2}",
570 [(set VR128:$dst, (fsub VR128:$src1,
571 (load addr:$src2)))]>;
574 def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
575 "sqrtps {$src, $dst|$dst, $src}",
576 [(set VR128:$dst, (v4f32 (fsqrt VR128:$src)))]>;
577 def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
578 "sqrtps {$src, $dst|$dst, $src}",
579 [(set VR128:$dst, (v4f32 (fsqrt (load addr:$src))))]>;
580 def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
581 "sqrtpd {$src, $dst|$dst, $src}",
582 [(set VR128:$dst, (v2f64 (fsqrt VR128:$src)))]>;
583 def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
584 "sqrtpd {$src, $dst|$dst, $src}",
585 [(set VR128:$dst, (v2f64 (fsqrt (load addr:$src))))]>;
587 def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops VR128:$dst, VR128:$src),
588 "rsqrtps {$src, $dst|$dst, $src}", []>;
589 def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
590 "rsqrtps {$src, $dst|$dst, $src}", []>;
591 def RCPPSrr : PSI<0x53, MRMSrcReg, (ops VR128:$dst, VR128:$src),
592 "rcpps {$src, $dst|$dst, $src}", []>;
593 def RCPPSrm : PSI<0x53, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
594 "rcpps {$src, $dst|$dst, $src}", []>;
596 def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
597 "maxps {$src, $dst|$dst, $src}", []>;
598 def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
599 "maxps {$src, $dst|$dst, $src}", []>;
600 def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
601 "maxpd {$src, $dst|$dst, $src}", []>;
602 def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
603 "maxpd {$src, $dst|$dst, $src}", []>;
604 def MINPSrr : PSI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
605 "minps {$src, $dst|$dst, $src}", []>;
606 def MINPSrm : PSI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
607 "minps {$src, $dst|$dst, $src}", []>;
608 def MINPDrr : PDI<0x5D, MRMSrcReg, (ops VR128:$dst, VR128:$src),
609 "minpd {$src, $dst|$dst, $src}", []>;
610 def MINPDrm : PDI<0x5D, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
611 "minpd {$src, $dst|$dst, $src}", []>;
614 let isTwoAddress = 1 in {
615 let isCommutable = 1 in {
616 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
617 "andps {$src2, $dst|$dst, $src2}",
618 [(set VR128:$dst, (v4i32 (and VR128:$src1, VR128:$src2)))]>;
619 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
620 "andpd {$src2, $dst|$dst, $src2}",
621 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
622 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
623 "orps {$src2, $dst|$dst, $src2}",
624 [(set VR128:$dst, (v4i32 (or VR128:$src1, VR128:$src2)))]>;
625 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
626 "orpd {$src2, $dst|$dst, $src2}",
627 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
628 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
629 "xorps {$src2, $dst|$dst, $src2}",
630 [(set VR128:$dst, (v4i32 (xor VR128:$src1, VR128:$src2)))]>;
631 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
632 "xorpd {$src2, $dst|$dst, $src2}",
633 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
635 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
636 "andps {$src2, $dst|$dst, $src2}",
637 [(set VR128:$dst, (v4i32 (and VR128:$src1,
638 (load addr:$src2))))]>;
639 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
640 "andpd {$src2, $dst|$dst, $src2}",
641 [(set VR128:$dst, (v2i64 (and VR128:$src1,
642 (load addr:$src2))))]>;
643 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
644 "orps {$src2, $dst|$dst, $src2}",
645 [(set VR128:$dst, (v4i32 (or VR128:$src1,
646 (load addr:$src2))))]>;
647 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
648 "orpd {$src2, $dst|$dst, $src2}",
649 [(set VR128:$dst, (v2i64 (or VR128:$src1,
650 (load addr:$src2))))]>;
651 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
652 "xorps {$src2, $dst|$dst, $src2}",
653 [(set VR128:$dst, (v4i32 (xor VR128:$src1,
654 (load addr:$src2))))]>;
655 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
656 "xorpd {$src2, $dst|$dst, $src2}",
657 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
658 (load addr:$src2))))]>;
659 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
660 "andnps {$src2, $dst|$dst, $src2}",
661 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
663 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
664 "andnps {$src2, $dst|$dst, $src2}",
665 [(set VR128:$dst, (v4i32 (and (not VR128:$src1),
666 (load addr:$src2))))]>;
667 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
668 "andnpd {$src2, $dst|$dst, $src2}",
669 [(set VR128:$dst, (v2i64 (and (not VR128:$src1),
672 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
673 "andnpd {$src2, $dst|$dst, $src2}",
674 [(set VR128:$dst, (v2i64 (and VR128:$src1,
675 (load addr:$src2))))]>;
678 let isTwoAddress = 1 in {
679 def CMPPSrr : PSI<0xC2, MRMSrcReg,
680 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
681 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
682 def CMPPSrm : PSI<0xC2, MRMSrcMem,
683 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
684 "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
685 def CMPPDrr : PDI<0xC2, MRMSrcReg,
686 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
687 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
688 def CMPPDrm : PDI<0xC2, MRMSrcMem,
689 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
690 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
693 // Shuffle and unpack instructions
694 def PSHUFWrr : PSIi8<0x70, MRMDestReg,
695 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
696 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
697 def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
698 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
699 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
700 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
701 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
702 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
703 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
704 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
705 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
707 let isTwoAddress = 1 in {
708 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
709 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
710 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
711 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
712 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
713 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
714 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
715 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
716 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
717 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
718 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
719 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
722 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
723 (ops VR128:$dst, VR128:$src1, VR128:$src2),
724 "unpckhps {$src2, $dst|$dst, $src2}", []>;
725 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
726 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
727 "unpckhps {$src2, $dst|$dst, $src2}", []>;
728 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
729 (ops VR128:$dst, VR128:$src1, VR128:$src2),
730 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
731 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
732 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
733 "unpckhpd {$src2, $dst|$dst, $src2}", []>;
734 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
735 (ops VR128:$dst, VR128:$src1, VR128:$src2),
736 "unpcklps {$src2, $dst|$dst, $src2}", []>;
737 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
738 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
739 "unpcklps {$src2, $dst|$dst, $src2}", []>;
740 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
741 (ops VR128:$dst, VR128:$src1, VR128:$src2),
742 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
743 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
744 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
745 "unpcklpd {$src2, $dst|$dst, $src2}", []>;
747 //===----------------------------------------------------------------------===//
748 // SSE integer instructions
749 //===----------------------------------------------------------------------===//
752 def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
753 "movd {$src, $dst|$dst, $src}",
755 (v4i32 (scalar_to_vector R32:$src)))]>;
756 def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
757 "movd {$src, $dst|$dst, $src}", []>;
758 def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
759 "movd {$src, $dst|$dst, $src}", []>;
761 // SSE2 instructions with XS prefix
762 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
763 "movq {$src, $dst|$dst, $src}",
765 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
767 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
768 "movq {$src, $dst|$dst, $src}", []>, XS;
770 def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
771 "movq {$src, $dst|$dst, $src}", []>;
774 //===----------------------------------------------------------------------===//
775 // Alias Instructions
776 //===----------------------------------------------------------------------===//
778 def FR32ToV4F32 : PSI<0x28, MRMSrcReg, (ops VR128:$dst, FR32:$src),
779 "movaps {$src, $dst|$dst, $src}",
781 (v4f32 (scalar_to_vector FR32:$src)))]>;
783 def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src),
784 "movapd {$src, $dst|$dst, $src}",
786 (v2f64 (scalar_to_vector FR64:$src)))]>;
788 //===----------------------------------------------------------------------===//
789 // Non-Instruction Patterns
790 //===----------------------------------------------------------------------===//
792 // 128-bit vector undef's.
793 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
794 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
795 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
796 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
797 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
799 // Store 128-bit integer vector values.
800 def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
801 def : Pat<(store (v8i16 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
802 def : Pat<(store (v4i32 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
803 def : Pat<(store (v2i64 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
805 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
807 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
809 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
813 // Splat v4f32 / v4i32
814 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
815 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
817 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
818 (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,
821 // Splat v2f64 / v2i64
822 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
823 (v2f64 (MOVLHPSrr VR128:$src))>, Requires<[HasSSE1]>;
824 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
825 (v2i64 (MOVLHPSrr VR128:$src))>, Requires<[HasSSE1]>;
827 // Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
828 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
829 (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
831 def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
832 (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,