1 //====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 SSE instruction set, defining the instructions,
11 // and properties of the instructions which are needed for code generation,
12 // machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
17 // SSE specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
20 def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
22 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
23 [SDNPCommutative, SDNPAssociative]>;
24 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
25 [SDNPCommutative, SDNPAssociative]>;
26 def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
28 def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest,
30 def X86s2vec : SDNode<"X86ISD::S2VEC",
31 SDTypeProfile<1, 1, []>, []>;
32 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
33 SDTypeProfile<1, 1, []>, []>;
34 def X86pextrw : SDNode<"X86ISD::PEXTRW",
35 SDTypeProfile<1, 2, []>, []>;
36 def X86pinsrw : SDNode<"X86ISD::PINSRW",
37 SDTypeProfile<1, 3, []>, []>;
39 //===----------------------------------------------------------------------===//
40 // SSE pattern fragments
41 //===----------------------------------------------------------------------===//
43 def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
44 def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
46 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
47 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
48 def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
49 def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
50 def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
51 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
53 def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
54 def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
55 def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
56 def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
57 def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
58 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
60 def fp32imm0 : PatLeaf<(f32 fpimm), [{
61 return N->isExactlyValue(+0.0);
64 def PSxLDQ_imm : SDNodeXForm<imm, [{
65 // Transformation function: imm >> 3
66 return getI32Imm(N->getValue() >> 3);
69 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
71 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
72 return getI8Imm(X86::getShuffleSHUFImmediate(N));
75 // SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
77 def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
78 return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
81 // SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
83 def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
84 return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
87 def SSE_splat_mask : PatLeaf<(build_vector), [{
88 return X86::isSplatMask(N);
89 }], SHUFFLE_get_shuf_imm>;
91 def MOVLHPS_shuffle_mask : PatLeaf<(build_vector), [{
92 return X86::isMOVLHPSMask(N);
95 def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
96 return X86::isMOVHLPSMask(N);
99 def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
100 return X86::isUNPCKLMask(N);
103 def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
104 return X86::isUNPCKHMask(N);
107 def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
108 return X86::isUNPCKL_v_undef_Mask(N);
111 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
112 return X86::isPSHUFDMask(N);
113 }], SHUFFLE_get_shuf_imm>;
115 def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
116 return X86::isPSHUFHWMask(N);
117 }], SHUFFLE_get_pshufhw_imm>;
119 def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
120 return X86::isPSHUFLWMask(N);
121 }], SHUFFLE_get_pshuflw_imm>;
123 // Only use PSHUF* for v4f32 if SHUFP does not match.
124 def PSHUFD_fp_shuffle_mask : PatLeaf<(build_vector), [{
125 return !X86::isSHUFPMask(N) &&
126 X86::isPSHUFDMask(N);
127 }], SHUFFLE_get_shuf_imm>;
129 def PSHUFHW_fp_shuffle_mask : PatLeaf<(build_vector), [{
130 return !X86::isSHUFPMask(N) &&
131 X86::isPSHUFHWMask(N);
132 }], SHUFFLE_get_pshufhw_imm>;
134 def PSHUFLW_fp_shuffle_mask : PatLeaf<(build_vector), [{
135 return !X86::isSHUFPMask(N) &&
136 X86::isPSHUFLWMask(N);
137 }], SHUFFLE_get_pshuflw_imm>;
139 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
140 return X86::isSHUFPMask(N);
141 }], SHUFFLE_get_shuf_imm>;
143 // Only use SHUFP for v4i32 if PSHUF* do not match.
144 def SHUFP_int_shuffle_mask : PatLeaf<(build_vector), [{
145 return !X86::isPSHUFDMask(N) &&
146 !X86::isPSHUFHWMask(N) &&
147 !X86::isPSHUFLWMask(N) &&
149 }], SHUFFLE_get_shuf_imm>;
151 //===----------------------------------------------------------------------===//
152 // SSE scalar FP Instructions
153 //===----------------------------------------------------------------------===//
155 // Instruction templates
156 // SSI - SSE1 instructions with XS prefix.
157 // SDI - SSE2 instructions with XD prefix.
158 // PSI - SSE1 instructions with TB prefix.
159 // PDI - SSE2 instructions with TB and OpSize prefixes.
160 // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
161 // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
162 // S3SI - SSE3 instructions with XD prefix.
163 // S3DI - SSE3 instructions with TB and OpSize prefixes.
164 class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
165 : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
166 class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
167 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
168 class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
169 : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
170 class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
171 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
172 class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
173 : X86Inst<o, F, Imm8, ops, asm>, TB, Requires<[HasSSE1]> {
174 let Pattern = pattern;
176 class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
177 : X86Inst<o, F, Imm8, ops, asm>, TB, OpSize, Requires<[HasSSE2]> {
178 let Pattern = pattern;
180 class S3SI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
181 : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE3]>;
182 class S3DI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
183 : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
185 //===----------------------------------------------------------------------===//
186 // Helpers for defining instructions that directly correspond to intrinsics.
187 class SS_Intr<bits<8> o, string asm, Intrinsic IntId>
188 : SSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src), asm,
189 [(set VR128:$dst, (v4f32 (IntId VR128:$src)))]>;
190 class SS_Intm<bits<8> o, string asm, Intrinsic IntId>
191 : SSI<o, MRMSrcMem, (ops VR128:$dst, f32mem:$src), asm,
192 [(set VR128:$dst, (v4f32 (IntId (load addr:$src))))]>;
193 class SD_Intr<bits<8> o, string asm, Intrinsic IntId>
194 : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src), asm,
195 [(set VR128:$dst, (v2f64 (IntId VR128:$src)))]>;
196 class SD_Intm<bits<8> o, string asm, Intrinsic IntId>
197 : SDI<o, MRMSrcMem, (ops VR128:$dst, f64mem:$src), asm,
198 [(set VR128:$dst, (v2f64 (IntId (load addr:$src))))]>;
200 class SS_Intrr<bits<8> o, string asm, Intrinsic IntId>
201 : SSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), asm,
202 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
203 class SS_Intrm<bits<8> o, string asm, Intrinsic IntId>
204 : SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2), asm,
205 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
206 class SD_Intrr<bits<8> o, string asm, Intrinsic IntId>
207 : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), asm,
208 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
209 class SD_Intrm<bits<8> o, string asm, Intrinsic IntId>
210 : SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), asm,
211 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
213 class PS_Intr<bits<8> o, string asm, Intrinsic IntId>
214 : PSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src), asm,
215 [(set VR128:$dst, (IntId VR128:$src))]>;
216 class PS_Intm<bits<8> o, string asm, Intrinsic IntId>
217 : PSI<o, MRMSrcMem, (ops VR128:$dst, f32mem:$src), asm,
218 [(set VR128:$dst, (IntId (loadv4f32 addr:$src)))]>;
219 class PD_Intr<bits<8> o, string asm, Intrinsic IntId>
220 : PDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src), asm,
221 [(set VR128:$dst, (IntId VR128:$src))]>;
222 class PD_Intm<bits<8> o, string asm, Intrinsic IntId>
223 : PDI<o, MRMSrcMem, (ops VR128:$dst, f64mem:$src), asm,
224 [(set VR128:$dst, (IntId (loadv2f64 addr:$src)))]>;
226 class PS_Intrr<bits<8> o, string asm, Intrinsic IntId>
227 : PSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), asm,
228 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
229 class PS_Intrm<bits<8> o, string asm, Intrinsic IntId>
230 : PSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2), asm,
231 [(set VR128:$dst, (IntId VR128:$src1, (loadv4f32 addr:$src2)))]>;
232 class PD_Intrr<bits<8> o, string asm, Intrinsic IntId>
233 : PDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), asm,
234 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
235 class PD_Intrm<bits<8> o, string asm, Intrinsic IntId>
236 : PDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), asm,
237 [(set VR128:$dst, (IntId VR128:$src1, (loadv2f64 addr:$src2)))]>;
239 class S3S_Intrr<bits<8> o, string asm, Intrinsic IntId>
240 : S3SI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), asm,
241 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
242 class S3S_Intrm<bits<8> o, string asm, Intrinsic IntId>
243 : S3SI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), asm,
244 [(set VR128:$dst, (v4f32 (IntId VR128:$src1,
245 (loadv4f32 addr:$src2))))]>;
246 class S3D_Intrr<bits<8> o, string asm, Intrinsic IntId>
247 : S3DI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), asm,
248 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
249 class S3D_Intrm<bits<8> o, string asm, Intrinsic IntId>
250 : S3DI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), asm,
251 [(set VR128:$dst, (v2f64 (IntId VR128:$src1,
252 (loadv2f64 addr:$src2))))]>;
254 // Some 'special' instructions
255 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
256 "#IMPLICIT_DEF $dst",
257 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
258 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
259 "#IMPLICIT_DEF $dst",
260 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
262 // CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
263 // scheduler into a branch sequence.
264 let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
265 def CMOV_FR32 : I<0, Pseudo,
266 (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
267 "#CMOV_FR32 PSEUDO!",
268 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
269 def CMOV_FR64 : I<0, Pseudo,
270 (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
271 "#CMOV_FR64 PSEUDO!",
272 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
276 def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
277 "movss {$src, $dst|$dst, $src}", []>;
278 def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
279 "movss {$src, $dst|$dst, $src}",
280 [(set FR32:$dst, (loadf32 addr:$src))]>;
281 def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
282 "movsd {$src, $dst|$dst, $src}", []>;
283 def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
284 "movsd {$src, $dst|$dst, $src}",
285 [(set FR64:$dst, (loadf64 addr:$src))]>;
287 def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
288 "movss {$src, $dst|$dst, $src}",
289 [(store FR32:$src, addr:$dst)]>;
290 def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
291 "movsd {$src, $dst|$dst, $src}",
292 [(store FR64:$src, addr:$dst)]>;
294 // Arithmetic instructions
295 let isTwoAddress = 1 in {
296 let isCommutable = 1 in {
297 def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
298 "addss {$src2, $dst|$dst, $src2}",
299 [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
300 def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
301 "addsd {$src2, $dst|$dst, $src2}",
302 [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
303 def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
304 "mulss {$src2, $dst|$dst, $src2}",
305 [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
306 def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
307 "mulsd {$src2, $dst|$dst, $src2}",
308 [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
311 def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
312 "addss {$src2, $dst|$dst, $src2}",
313 [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
314 def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
315 "addsd {$src2, $dst|$dst, $src2}",
316 [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
317 def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
318 "mulss {$src2, $dst|$dst, $src2}",
319 [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
320 def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
321 "mulsd {$src2, $dst|$dst, $src2}",
322 [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
324 def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
325 "divss {$src2, $dst|$dst, $src2}",
326 [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
327 def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
328 "divss {$src2, $dst|$dst, $src2}",
329 [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
330 def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
331 "divsd {$src2, $dst|$dst, $src2}",
332 [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
333 def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
334 "divsd {$src2, $dst|$dst, $src2}",
335 [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
337 def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
338 "subss {$src2, $dst|$dst, $src2}",
339 [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
340 def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
341 "subss {$src2, $dst|$dst, $src2}",
342 [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
343 def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
344 "subsd {$src2, $dst|$dst, $src2}",
345 [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
346 def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
347 "subsd {$src2, $dst|$dst, $src2}",
348 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
351 def SQRTSSr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
352 "sqrtss {$src, $dst|$dst, $src}",
353 [(set FR32:$dst, (fsqrt FR32:$src))]>;
354 def SQRTSSm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
355 "sqrtss {$src, $dst|$dst, $src}",
356 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
357 def SQRTSDr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
358 "sqrtsd {$src, $dst|$dst, $src}",
359 [(set FR64:$dst, (fsqrt FR64:$src))]>;
360 def SQRTSDm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
361 "sqrtsd {$src, $dst|$dst, $src}",
362 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
364 def RSQRTSSr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
365 "rsqrtss {$src, $dst|$dst, $src}", []>;
366 def RSQRTSSm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
367 "rsqrtss {$src, $dst|$dst, $src}", []>;
368 def RCPSSr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
369 "rcpss {$src, $dst|$dst, $src}", []>;
370 def RCPSSm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
371 "rcpss {$src, $dst|$dst, $src}", []>;
373 let isTwoAddress = 1 in {
374 def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
375 "maxss {$src2, $dst|$dst, $src2}", []>;
376 def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
377 "maxss {$src2, $dst|$dst, $src2}", []>;
378 def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR32:$src1, FR64:$src2),
379 "maxsd {$src2, $dst|$dst, $src2}", []>;
380 def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, FR32:$src1, f64mem:$src2),
381 "maxsd {$src2, $dst|$dst, $src2}", []>;
382 def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
383 "minss {$src2, $dst|$dst, $src2}", []>;
384 def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
385 "minss {$src2, $dst|$dst, $src2}", []>;
386 def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR32:$src1, FR64:$src2),
387 "minsd {$src2, $dst|$dst, $src2}", []>;
388 def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, FR32:$src1, f64mem:$src2),
389 "minsd {$src2, $dst|$dst, $src2}", []>;
392 // Aliases to match intrinsics which expect XMM operand(s).
393 let isTwoAddress = 1 in {
394 let isCommutable = 1 in {
395 def Int_ADDSSrr : SS_Intrr<0x58, "addss {$src2, $dst|$dst, $src2}",
397 def Int_ADDSDrr : SD_Intrr<0x58, "addsd {$src2, $dst|$dst, $src2}",
398 int_x86_sse2_add_sd>;
399 def Int_MULSSrr : SS_Intrr<0x59, "mulss {$src2, $dst|$dst, $src2}",
401 def Int_MULSDrr : SD_Intrr<0x59, "mulsd {$src2, $dst|$dst, $src2}",
402 int_x86_sse2_mul_sd>;
405 def Int_ADDSSrm : SS_Intrm<0x58, "addss {$src2, $dst|$dst, $src2}",
407 def Int_ADDSDrm : SD_Intrm<0x58, "addsd {$src2, $dst|$dst, $src2}",
408 int_x86_sse2_add_sd>;
409 def Int_MULSSrm : SS_Intrm<0x59, "mulss {$src2, $dst|$dst, $src2}",
411 def Int_MULSDrm : SD_Intrm<0x59, "mulsd {$src2, $dst|$dst, $src2}",
412 int_x86_sse2_mul_sd>;
414 def Int_DIVSSrr : SS_Intrr<0x5E, "divss {$src2, $dst|$dst, $src2}",
416 def Int_DIVSSrm : SS_Intrm<0x5E, "divss {$src2, $dst|$dst, $src2}",
418 def Int_DIVSDrr : SD_Intrr<0x5E, "divsd {$src2, $dst|$dst, $src2}",
419 int_x86_sse2_div_sd>;
420 def Int_DIVSDrm : SD_Intrm<0x5E, "divsd {$src2, $dst|$dst, $src2}",
421 int_x86_sse2_div_sd>;
423 def Int_SUBSSrr : SS_Intrr<0x5C, "subss {$src2, $dst|$dst, $src2}",
425 def Int_SUBSSrm : SS_Intrm<0x5C, "subss {$src2, $dst|$dst, $src2}",
427 def Int_SUBSDrr : SD_Intrr<0x5C, "subsd {$src2, $dst|$dst, $src2}",
428 int_x86_sse2_sub_sd>;
429 def Int_SUBSDrm : SD_Intrm<0x5C, "subsd {$src2, $dst|$dst, $src2}",
430 int_x86_sse2_sub_sd>;
433 def Int_SQRTSSr : SS_Intr<0x51, "sqrtss {$src, $dst|$dst, $src}",
434 int_x86_sse_sqrt_ss>;
435 def Int_SQRTSSm : SS_Intm<0x51, "sqrtss {$src, $dst|$dst, $src}",
436 int_x86_sse_sqrt_ss>;
437 def Int_SQRTSDr : SD_Intr<0x51, "sqrtsd {$src, $dst|$dst, $src}",
438 int_x86_sse2_sqrt_sd>;
439 def Int_SQRTSDm : SD_Intm<0x51, "sqrtsd {$src, $dst|$dst, $src}",
440 int_x86_sse2_sqrt_sd>;
442 def Int_RSQRTSSr : SS_Intr<0x52, "rsqrtss {$src, $dst|$dst, $src}",
443 int_x86_sse_rsqrt_ss>;
444 def Int_RSQRTSSm : SS_Intm<0x52, "rsqrtss {$src, $dst|$dst, $src}",
445 int_x86_sse_rsqrt_ss>;
446 def Int_RCPSSr : SS_Intr<0x53, "rcpss {$src, $dst|$dst, $src}",
448 def Int_RCPSSm : SS_Intm<0x53, "rcpss {$src, $dst|$dst, $src}",
451 let isTwoAddress = 1 in {
452 def Int_MAXSSrr : SS_Intrr<0x5F, "maxss {$src2, $dst|$dst, $src2}",
454 def Int_MAXSSrm : SS_Intrm<0x5F, "maxss {$src2, $dst|$dst, $src2}",
456 def Int_MAXSDrr : SD_Intrr<0x5F, "maxsd {$src2, $dst|$dst, $src2}",
457 int_x86_sse2_max_sd>;
458 def Int_MAXSDrm : SD_Intrm<0x5F, "maxsd {$src2, $dst|$dst, $src2}",
459 int_x86_sse2_max_sd>;
460 def Int_MINSSrr : SS_Intrr<0x5D, "minss {$src2, $dst|$dst, $src2}",
462 def Int_MINSSrm : SS_Intrm<0x5D, "minss {$src2, $dst|$dst, $src2}",
464 def Int_MINSDrr : SD_Intrr<0x5D, "minsd {$src2, $dst|$dst, $src2}",
465 int_x86_sse2_min_sd>;
466 def Int_MINSDrm : SD_Intrm<0x5D, "minsd {$src2, $dst|$dst, $src2}",
467 int_x86_sse2_min_sd>;
470 // Conversion instructions
471 def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (ops R32:$dst, FR32:$src),
472 "cvtss2si {$src, $dst|$dst, $src}", []>;
473 def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
474 "cvtss2si {$src, $dst|$dst, $src}", []>;
476 def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
477 "cvttss2si {$src, $dst|$dst, $src}",
478 [(set R32:$dst, (fp_to_sint FR32:$src))]>;
479 def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
480 "cvttss2si {$src, $dst|$dst, $src}",
481 [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
482 def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
483 "cvttsd2si {$src, $dst|$dst, $src}",
484 [(set R32:$dst, (fp_to_sint FR64:$src))]>;
485 def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
486 "cvttsd2si {$src, $dst|$dst, $src}",
487 [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
488 def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
489 "cvtsd2ss {$src, $dst|$dst, $src}",
490 [(set FR32:$dst, (fround FR64:$src))]>;
491 def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
492 "cvtsd2ss {$src, $dst|$dst, $src}",
493 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
494 def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
495 "cvtsi2ss {$src, $dst|$dst, $src}",
496 [(set FR32:$dst, (sint_to_fp R32:$src))]>;
497 def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
498 "cvtsi2ss {$src, $dst|$dst, $src}",
499 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
500 def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
501 "cvtsi2sd {$src, $dst|$dst, $src}",
502 [(set FR64:$dst, (sint_to_fp R32:$src))]>;
503 def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
504 "cvtsi2sd {$src, $dst|$dst, $src}",
505 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
506 // SSE2 instructions with XS prefix
507 def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
508 "cvtss2sd {$src, $dst|$dst, $src}",
509 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
511 def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
512 "cvtss2sd {$src, $dst|$dst, $src}",
513 [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
516 // Comparison instructions
517 let isTwoAddress = 1 in {
518 def CMPSSrr : SSI<0xC2, MRMSrcReg,
519 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
520 "cmp${cc}ss {$src, $dst|$dst, $src}",
522 def CMPSSrm : SSI<0xC2, MRMSrcMem,
523 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
524 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
525 def CMPSDrr : SDI<0xC2, MRMSrcReg,
526 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
527 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
528 def CMPSDrm : SDI<0xC2, MRMSrcMem,
529 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
530 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
533 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
534 "ucomiss {$src2, $src1|$src1, $src2}",
535 [(X86cmp FR32:$src1, FR32:$src2)]>;
536 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
537 "ucomiss {$src2, $src1|$src1, $src2}",
538 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
539 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
540 "ucomisd {$src2, $src1|$src1, $src2}",
541 [(X86cmp FR64:$src1, FR64:$src2)]>;
542 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
543 "ucomisd {$src2, $src1|$src1, $src2}",
544 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
546 // Aliases to match intrinsics which expect XMM operand(s).
547 let isTwoAddress = 1 in {
548 def Int_CMPSSrr : SSI<0xC2, MRMSrcReg,
549 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
550 "cmp${cc}ss {$src, $dst|$dst, $src}",
551 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
552 VR128:$src, imm:$cc))]>;
553 def Int_CMPSSrm : SSI<0xC2, MRMSrcMem,
554 (ops VR128:$dst, VR128:$src1, f32mem:$src, SSECC:$cc),
555 "cmp${cc}ss {$src, $dst|$dst, $src}",
556 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
557 (load addr:$src), imm:$cc))]>;
558 def Int_CMPSDrr : SDI<0xC2, MRMSrcReg,
559 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
560 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
561 def Int_CMPSDrm : SDI<0xC2, MRMSrcMem,
562 (ops VR128:$dst, VR128:$src1, f64mem:$src, SSECC:$cc),
563 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
566 def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
567 "ucomiss {$src2, $src1|$src1, $src2}",
568 [(X86ucomi (v4f32 VR128:$src1), VR128:$src2)]>;
569 def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
570 "ucomiss {$src2, $src1|$src1, $src2}",
571 [(X86ucomi (v4f32 VR128:$src1), (loadv4f32 addr:$src2))]>;
572 def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
573 "ucomisd {$src2, $src1|$src1, $src2}",
574 [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
575 def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
576 "ucomisd {$src2, $src1|$src1, $src2}",
577 [(X86ucomi (v2f64 VR128:$src1), (loadv2f64 addr:$src2))]>;
579 def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
580 "comiss {$src2, $src1|$src1, $src2}",
581 [(X86comi (v4f32 VR128:$src1), VR128:$src2)]>;
582 def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
583 "comiss {$src2, $src1|$src1, $src2}",
584 [(X86comi (v4f32 VR128:$src1), (loadv4f32 addr:$src2))]>;
585 def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
586 "comisd {$src2, $src1|$src1, $src2}",
587 [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
588 def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
589 "comisd {$src2, $src1|$src1, $src2}",
590 [(X86comi (v2f64 VR128:$src1), (loadv2f64 addr:$src2))]>;
592 // Aliases of packed instructions for scalar use. These all have names that
595 // Alias instructions that map fld0 to pxor for sse.
596 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
597 def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
598 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
599 Requires<[HasSSE1]>, TB, OpSize;
600 def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
601 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
602 Requires<[HasSSE2]>, TB, OpSize;
604 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
605 // Upper bits are disregarded.
606 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
607 "movaps {$src, $dst|$dst, $src}", []>;
608 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
609 "movapd {$src, $dst|$dst, $src}", []>;
611 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
612 // Upper bits are disregarded.
613 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
614 "movaps {$src, $dst|$dst, $src}",
615 [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
616 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
617 "movapd {$src, $dst|$dst, $src}",
618 [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
620 // Alias bitwise logical operations using SSE logical ops on packed FP values.
621 let isTwoAddress = 1 in {
622 let isCommutable = 1 in {
623 def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
624 "andps {$src2, $dst|$dst, $src2}",
625 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
626 def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
627 "andpd {$src2, $dst|$dst, $src2}",
628 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
629 def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
630 "orps {$src2, $dst|$dst, $src2}", []>;
631 def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
632 "orpd {$src2, $dst|$dst, $src2}", []>;
633 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
634 "xorps {$src2, $dst|$dst, $src2}",
635 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
636 def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
637 "xorpd {$src2, $dst|$dst, $src2}",
638 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
640 def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
641 "andps {$src2, $dst|$dst, $src2}",
642 [(set FR32:$dst, (X86fand FR32:$src1,
643 (X86loadpf32 addr:$src2)))]>;
644 def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
645 "andpd {$src2, $dst|$dst, $src2}",
646 [(set FR64:$dst, (X86fand FR64:$src1,
647 (X86loadpf64 addr:$src2)))]>;
648 def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
649 "orps {$src2, $dst|$dst, $src2}", []>;
650 def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
651 "orpd {$src2, $dst|$dst, $src2}", []>;
652 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
653 "xorps {$src2, $dst|$dst, $src2}",
654 [(set FR32:$dst, (X86fxor FR32:$src1,
655 (X86loadpf32 addr:$src2)))]>;
656 def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
657 "xorpd {$src2, $dst|$dst, $src2}",
658 [(set FR64:$dst, (X86fxor FR64:$src1,
659 (X86loadpf64 addr:$src2)))]>;
661 def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
662 "andnps {$src2, $dst|$dst, $src2}", []>;
663 def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
664 "andnps {$src2, $dst|$dst, $src2}", []>;
665 def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
666 "andnpd {$src2, $dst|$dst, $src2}", []>;
667 def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
668 "andnpd {$src2, $dst|$dst, $src2}", []>;
671 //===----------------------------------------------------------------------===//
672 // SSE packed FP Instructions
673 //===----------------------------------------------------------------------===//
675 // Some 'special' instructions
676 def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
677 "#IMPLICIT_DEF $dst",
678 [(set VR128:$dst, (v4f32 (undef)))]>,
682 def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
683 "movaps {$src, $dst|$dst, $src}", []>;
684 def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
685 "movaps {$src, $dst|$dst, $src}",
686 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
687 def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
688 "movapd {$src, $dst|$dst, $src}", []>;
689 def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
690 "movapd {$src, $dst|$dst, $src}",
691 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
693 def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
694 "movaps {$src, $dst|$dst, $src}",
695 [(store (v4f32 VR128:$src), addr:$dst)]>;
696 def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
697 "movapd {$src, $dst|$dst, $src}",
698 [(store (v2f64 VR128:$src), addr:$dst)]>;
700 def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
701 "movups {$src, $dst|$dst, $src}", []>;
702 def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
703 "movups {$src, $dst|$dst, $src}", []>;
704 def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
705 "movups {$src, $dst|$dst, $src}", []>;
706 def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
707 "movupd {$src, $dst|$dst, $src}", []>;
708 def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
709 "movupd {$src, $dst|$dst, $src}", []>;
710 def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
711 "movupd {$src, $dst|$dst, $src}", []>;
713 let isTwoAddress = 1 in {
714 def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
715 "movlps {$src2, $dst|$dst, $src2}", []>;
716 def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
717 "movlpd {$src2, $dst|$dst, $src2}", []>;
718 def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
719 "movhps {$src2, $dst|$dst, $src2}", []>;
720 def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
721 "movhpd {$src2, $dst|$dst, $src2}",
723 (v2f64 (vector_shuffle VR128:$src1,
724 (scalar_to_vector (loadf64 addr:$src2)),
725 UNPCKL_shuffle_mask)))]>;
728 def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
729 "movlps {$src, $dst|$dst, $src}", []>;
730 def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
731 "movlpd {$src, $dst|$dst, $src}",
732 [(store (f64 (vector_extract (v2f64 VR128:$src),
733 (i32 0))), addr:$dst)]>;
735 def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
736 "movhps {$src, $dst|$dst, $src}", []>;
737 def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
738 "movhpd {$src, $dst|$dst, $src}",
739 [(store (f64 (vector_extract
740 (v2f64 (vector_shuffle VR128:$src, (undef),
741 UNPCKH_shuffle_mask)), (i32 0))),
744 let isTwoAddress = 1 in {
745 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
746 "movlhps {$src2, $dst|$dst, $src2}",
748 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
749 MOVLHPS_shuffle_mask)))]>;
751 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
752 "movhlps {$src2, $dst|$dst, $src2}",
754 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
755 MOVHLPS_shuffle_mask)))]>;
758 // Conversion instructions
759 def CVTPI2PSr : PSI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
760 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
761 def CVTPI2PSm : PSI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
762 "cvtpi2ps {$src, $dst|$dst, $src}", []>;
763 def CVTPI2PDr : PDI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
764 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
765 def CVTPI2PDm : PDI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
766 "cvtpi2pd {$src, $dst|$dst, $src}", []>;
768 // SSE2 instructions without OpSize prefix
769 def CVTDQ2PSr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
770 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
772 def CVTDQ2PSm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
773 "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
776 // SSE2 instructions with XS prefix
777 def CVTDQ2PDr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR64:$src),
778 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
779 XS, Requires<[HasSSE2]>;
780 def CVTDQ2PDm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
781 "cvtdq2pd {$src, $dst|$dst, $src}", []>,
782 XS, Requires<[HasSSE2]>;
784 def CVTPS2PIr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
785 "cvtps2pi {$src, $dst|$dst, $src}", []>;
786 def CVTPS2PIm : PSI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
787 "cvtps2pi {$src, $dst|$dst, $src}", []>;
788 def CVTPD2PIr : PDI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
789 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
790 def CVTPD2PIm : PDI<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
791 "cvtpd2pi {$src, $dst|$dst, $src}", []>;
793 def CVTPS2DQr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
794 "cvtps2dq {$src, $dst|$dst, $src}", []>;
795 def CVTPS2DQm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
796 "cvtps2dq {$src, $dst|$dst, $src}", []>;
797 // SSE2 packed instructions with XD prefix
798 def CVTPD2DQr : SDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
799 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
800 def CVTPD2DQm : SDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
801 "cvtpd2dq {$src, $dst|$dst, $src}", []>;
803 // SSE2 instructions without OpSize prefix
804 def CVTPS2PDr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
805 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
807 def CVTPS2PDm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
808 "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
811 def CVTPD2PSr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
812 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
813 def CVTPD2PSm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
814 "cvtpd2ps {$src, $dst|$dst, $src}", []>;
817 let isTwoAddress = 1 in {
818 let isCommutable = 1 in {
819 def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
820 "addps {$src2, $dst|$dst, $src2}",
821 [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
822 def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
823 "addpd {$src2, $dst|$dst, $src2}",
824 [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
825 def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
826 "mulps {$src2, $dst|$dst, $src2}",
827 [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
828 def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
829 "mulpd {$src2, $dst|$dst, $src2}",
830 [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
833 def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
834 "addps {$src2, $dst|$dst, $src2}",
835 [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
836 (load addr:$src2))))]>;
837 def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
838 "addpd {$src2, $dst|$dst, $src2}",
839 [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
840 (load addr:$src2))))]>;
841 def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
842 "mulps {$src2, $dst|$dst, $src2}",
843 [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
844 (load addr:$src2))))]>;
845 def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
846 "mulpd {$src2, $dst|$dst, $src2}",
847 [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
848 (load addr:$src2))))]>;
850 def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
851 "divps {$src2, $dst|$dst, $src2}",
852 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
853 def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
854 "divps {$src2, $dst|$dst, $src2}",
855 [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
856 (load addr:$src2))))]>;
857 def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
858 "divpd {$src2, $dst|$dst, $src2}",
859 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
860 def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
861 "divpd {$src2, $dst|$dst, $src2}",
862 [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
863 (load addr:$src2))))]>;
865 def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
866 "subps {$src2, $dst|$dst, $src2}",
867 [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
868 def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
869 "subps {$src2, $dst|$dst, $src2}",
870 [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
871 (load addr:$src2))))]>;
872 def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
873 "subpd {$src2, $dst|$dst, $src2}",
874 [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
875 def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
876 "subpd {$src2, $dst|$dst, $src2}",
877 [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
878 (load addr:$src2))))]>;
881 def SQRTPSr : PS_Intr<0x51, "sqrtps {$src, $dst|$dst, $src}",
882 int_x86_sse_sqrt_ps>;
883 def SQRTPSm : PS_Intm<0x51, "sqrtps {$src, $dst|$dst, $src}",
884 int_x86_sse_sqrt_ps>;
885 def SQRTPDr : PD_Intr<0x51, "sqrtpd {$src, $dst|$dst, $src}",
886 int_x86_sse2_sqrt_pd>;
887 def SQRTPDm : PD_Intm<0x51, "sqrtpd {$src, $dst|$dst, $src}",
888 int_x86_sse2_sqrt_pd>;
890 def RSQRTPSr : PS_Intr<0x52, "rsqrtps {$src, $dst|$dst, $src}",
891 int_x86_sse_rsqrt_ps>;
892 def RSQRTPSm : PS_Intm<0x52, "rsqrtps {$src, $dst|$dst, $src}",
893 int_x86_sse_rsqrt_ps>;
894 def RCPPSr : PS_Intr<0x53, "rcpps {$src, $dst|$dst, $src}",
896 def RCPPSm : PS_Intm<0x53, "rcpps {$src, $dst|$dst, $src}",
899 let isTwoAddress = 1 in {
900 def MAXPSrr : PS_Intrr<0x5F, "maxps {$src2, $dst|$dst, $src2}",
902 def MAXPSrm : PS_Intrm<0x5F, "maxps {$src2, $dst|$dst, $src2}",
904 def MAXPDrr : PD_Intrr<0x5F, "maxpd {$src2, $dst|$dst, $src2}",
905 int_x86_sse2_max_pd>;
906 def MAXPDrm : PD_Intrm<0x5F, "maxpd {$src2, $dst|$dst, $src2}",
907 int_x86_sse2_max_pd>;
908 def MINPSrr : PS_Intrr<0x5D, "minps {$src2, $dst|$dst, $src2}",
910 def MINPSrm : PS_Intrm<0x5D, "minps {$src2, $dst|$dst, $src2}",
912 def MINPDrr : PD_Intrr<0x5D, "minpd {$src2, $dst|$dst, $src2}",
913 int_x86_sse2_min_pd>;
914 def MINPDrm : PD_Intrm<0x5D, "minpd {$src2, $dst|$dst, $src2}",
915 int_x86_sse2_min_pd>;
919 let isTwoAddress = 1 in {
920 let isCommutable = 1 in {
921 def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
922 "andps {$src2, $dst|$dst, $src2}",
924 (and (bc_v4i32 (v4f32 VR128:$src1)),
925 (bc_v4i32 (v4f32 VR128:$src2))))]>;
926 def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
927 "andpd {$src2, $dst|$dst, $src2}",
929 (and (bc_v2i64 (v2f64 VR128:$src1)),
930 (bc_v2i64 (v2f64 VR128:$src2))))]>;
931 def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
932 "orps {$src2, $dst|$dst, $src2}",
934 (or (bc_v4i32 (v4f32 VR128:$src1)),
935 (bc_v4i32 (v4f32 VR128:$src2))))]>;
936 def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
937 "orpd {$src2, $dst|$dst, $src2}",
939 (or (bc_v2i64 (v2f64 VR128:$src1)),
940 (bc_v2i64 (v2f64 VR128:$src2))))]>;
941 def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
942 "xorps {$src2, $dst|$dst, $src2}",
944 (xor (bc_v4i32 (v4f32 VR128:$src1)),
945 (bc_v4i32 (v4f32 VR128:$src2))))]>;
946 def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
947 "xorpd {$src2, $dst|$dst, $src2}",
949 (xor (bc_v2i64 (v2f64 VR128:$src1)),
950 (bc_v2i64 (v2f64 VR128:$src2))))]>;
952 def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
953 "andps {$src2, $dst|$dst, $src2}",
955 (and (bc_v4i32 (v4f32 VR128:$src1)),
956 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
957 def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
958 "andpd {$src2, $dst|$dst, $src2}",
960 (and (bc_v2i64 (v2f64 VR128:$src1)),
961 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
962 def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
963 "orps {$src2, $dst|$dst, $src2}",
965 (or (bc_v4i32 (v4f32 VR128:$src1)),
966 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
967 def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
968 "orpd {$src2, $dst|$dst, $src2}",
970 (or (bc_v2i64 (v2f64 VR128:$src1)),
971 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
972 def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
973 "xorps {$src2, $dst|$dst, $src2}",
975 (xor (bc_v4i32 (v4f32 VR128:$src1)),
976 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
977 def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
978 "xorpd {$src2, $dst|$dst, $src2}",
980 (xor (bc_v2i64 (v2f64 VR128:$src1)),
981 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
982 def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
983 "andnps {$src2, $dst|$dst, $src2}",
985 (and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
986 (bc_v4i32 (v4f32 VR128:$src2))))]>;
987 def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
988 "andnps {$src2, $dst|$dst, $src2}",
990 (and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
991 (bc_v4i32 (loadv4f32 addr:$src2))))]>;
992 def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
993 "andnpd {$src2, $dst|$dst, $src2}",
995 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
996 (bc_v2i64 (v2f64 VR128:$src2))))]>;
997 def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
998 "andnpd {$src2, $dst|$dst, $src2}",
1000 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
1001 (bc_v2i64 (loadv2f64 addr:$src2))))]>;
1004 let isTwoAddress = 1 in {
1005 def CMPPSrr : PSIi8<0xC2, MRMSrcReg,
1006 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
1007 "cmp${cc}ps {$src, $dst|$dst, $src}",
1008 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
1009 VR128:$src, imm:$cc))]>;
1010 def CMPPSrm : PSIi8<0xC2, MRMSrcMem,
1011 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
1012 "cmp${cc}ps {$src, $dst|$dst, $src}",
1013 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
1014 (load addr:$src), imm:$cc))]>;
1015 def CMPPDrr : PDIi8<0xC2, MRMSrcReg,
1016 (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
1017 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
1018 def CMPPDrm : PDIi8<0xC2, MRMSrcMem,
1019 (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
1020 "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
1023 // Shuffle and unpack instructions
1024 let isTwoAddress = 1 in {
1025 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
1026 (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
1027 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
1028 [(set VR128:$dst, (v4f32 (vector_shuffle
1029 VR128:$src1, VR128:$src2,
1030 SHUFP_shuffle_mask:$src3)))]>;
1031 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
1032 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
1033 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
1034 [(set VR128:$dst, (v4f32 (vector_shuffle
1035 VR128:$src1, (load addr:$src2),
1036 SHUFP_shuffle_mask:$src3)))]>;
1037 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
1038 (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
1039 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
1040 [(set VR128:$dst, (v2f64 (vector_shuffle
1041 VR128:$src1, VR128:$src2,
1042 SHUFP_shuffle_mask:$src3)))]>;
1043 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
1044 (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
1045 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
1046 [(set VR128:$dst, (v2f64 (vector_shuffle
1047 VR128:$src1, (load addr:$src2),
1048 SHUFP_shuffle_mask:$src3)))]>;
1050 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
1051 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1052 "unpckhps {$src2, $dst|$dst, $src2}",
1053 [(set VR128:$dst, (v4f32 (vector_shuffle
1054 VR128:$src1, VR128:$src2,
1055 UNPCKH_shuffle_mask)))]>;
1056 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
1057 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1058 "unpckhps {$src2, $dst|$dst, $src2}",
1059 [(set VR128:$dst, (v4f32 (vector_shuffle
1060 VR128:$src1, (load addr:$src2),
1061 UNPCKH_shuffle_mask)))]>;
1062 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
1063 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1064 "unpckhpd {$src2, $dst|$dst, $src2}",
1065 [(set VR128:$dst, (v2f64 (vector_shuffle
1066 VR128:$src1, VR128:$src2,
1067 UNPCKH_shuffle_mask)))]>;
1068 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
1069 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1070 "unpckhpd {$src2, $dst|$dst, $src2}",
1071 [(set VR128:$dst, (v2f64 (vector_shuffle
1072 VR128:$src1, (load addr:$src2),
1073 UNPCKH_shuffle_mask)))]>;
1075 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
1076 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1077 "unpcklps {$src2, $dst|$dst, $src2}",
1078 [(set VR128:$dst, (v4f32 (vector_shuffle
1079 VR128:$src1, VR128:$src2,
1080 UNPCKL_shuffle_mask)))]>;
1081 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
1082 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1083 "unpcklps {$src2, $dst|$dst, $src2}",
1084 [(set VR128:$dst, (v4f32 (vector_shuffle
1085 VR128:$src1, (load addr:$src2),
1086 UNPCKL_shuffle_mask)))]>;
1087 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
1088 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1089 "unpcklpd {$src2, $dst|$dst, $src2}",
1090 [(set VR128:$dst, (v2f64 (vector_shuffle
1091 VR128:$src1, VR128:$src2,
1092 UNPCKL_shuffle_mask)))]>;
1093 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
1094 (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1095 "unpcklpd {$src2, $dst|$dst, $src2}",
1096 [(set VR128:$dst, (v2f64 (vector_shuffle
1097 VR128:$src1, (load addr:$src2),
1098 UNPCKL_shuffle_mask)))]>;
1102 let isTwoAddress = 1 in {
1103 def HADDPSrr : S3S_Intrr<0x7C, "haddps {$src2, $dst|$dst, $src2}",
1104 int_x86_sse3_hadd_ps>;
1105 def HADDPSrm : S3S_Intrm<0x7C, "haddps {$src2, $dst|$dst, $src2}",
1106 int_x86_sse3_hadd_ps>;
1107 def HADDPDrr : S3D_Intrr<0x7C, "haddpd {$src2, $dst|$dst, $src2}",
1108 int_x86_sse3_hadd_pd>;
1109 def HADDPDrm : S3D_Intrm<0x7C, "haddpd {$src2, $dst|$dst, $src2}",
1110 int_x86_sse3_hadd_pd>;
1111 def HSUBPSrr : S3S_Intrr<0x7C, "hsubps {$src2, $dst|$dst, $src2}",
1112 int_x86_sse3_hsub_ps>;
1113 def HSUBPSrm : S3S_Intrm<0x7C, "hsubps {$src2, $dst|$dst, $src2}",
1114 int_x86_sse3_hsub_ps>;
1115 def HSUBPDrr : S3D_Intrr<0x7C, "hsubpd {$src2, $dst|$dst, $src2}",
1116 int_x86_sse3_hsub_pd>;
1117 def HSUBPDrm : S3D_Intrm<0x7C, "hsubpd {$src2, $dst|$dst, $src2}",
1118 int_x86_sse3_hsub_pd>;
1121 //===----------------------------------------------------------------------===//
1122 // SSE integer instructions
1123 //===----------------------------------------------------------------------===//
1125 // Move Instructions
1126 def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
1127 "movdqa {$src, $dst|$dst, $src}", []>;
1128 def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
1129 "movdqa {$src, $dst|$dst, $src}",
1130 [(set VR128:$dst, (loadv4i32 addr:$src))]>;
1131 def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1132 "movdqa {$src, $dst|$dst, $src}",
1133 [(store (v4i32 VR128:$src), addr:$dst)]>;
1135 // 128-bit Integer Arithmetic
1136 let isTwoAddress = 1 in {
1137 let isCommutable = 1 in {
1138 def PADDBrr : PDI<0xFC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1139 "paddb {$src2, $dst|$dst, $src2}",
1140 [(set VR128:$dst, (v16i8 (add VR128:$src1, VR128:$src2)))]>;
1141 def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1142 "paddw {$src2, $dst|$dst, $src2}",
1143 [(set VR128:$dst, (v8i16 (add VR128:$src1, VR128:$src2)))]>;
1144 def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1145 "paddd {$src2, $dst|$dst, $src2}",
1146 [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
1148 def PADDQrr : PDI<0xD4, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1149 "paddq {$src2, $dst|$dst, $src2}",
1150 [(set VR128:$dst, (v2i64 (add VR128:$src1, VR128:$src2)))]>;
1152 def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1153 "paddb {$src2, $dst|$dst, $src2}",
1154 [(set VR128:$dst, (v16i8 (add VR128:$src1,
1155 (load addr:$src2))))]>;
1156 def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1157 "paddw {$src2, $dst|$dst, $src2}",
1158 [(set VR128:$dst, (v8i16 (add VR128:$src1,
1159 (load addr:$src2))))]>;
1160 def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1161 "paddd {$src2, $dst|$dst, $src2}",
1162 [(set VR128:$dst, (v4i32 (add VR128:$src1,
1163 (load addr:$src2))))]>;
1164 def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1165 "paddd {$src2, $dst|$dst, $src2}",
1166 [(set VR128:$dst, (v2i64 (add VR128:$src1,
1167 (load addr:$src2))))]>;
1169 def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1170 "psubb {$src2, $dst|$dst, $src2}",
1171 [(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
1172 def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1173 "psubw {$src2, $dst|$dst, $src2}",
1174 [(set VR128:$dst, (v8i16 (sub VR128:$src1, VR128:$src2)))]>;
1175 def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1176 "psubd {$src2, $dst|$dst, $src2}",
1177 [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
1178 def PSUBQrr : PDI<0xFB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1179 "psubq {$src2, $dst|$dst, $src2}",
1180 [(set VR128:$dst, (v2i64 (sub VR128:$src1, VR128:$src2)))]>;
1182 def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1183 "psubb {$src2, $dst|$dst, $src2}",
1184 [(set VR128:$dst, (v16i8 (sub VR128:$src1,
1185 (load addr:$src2))))]>;
1186 def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1187 "psubw {$src2, $dst|$dst, $src2}",
1188 [(set VR128:$dst, (v8i16 (sub VR128:$src1,
1189 (load addr:$src2))))]>;
1190 def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1191 "psubd {$src2, $dst|$dst, $src2}",
1192 [(set VR128:$dst, (v4i32 (sub VR128:$src1,
1193 (load addr:$src2))))]>;
1194 def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1195 "psubd {$src2, $dst|$dst, $src2}",
1196 [(set VR128:$dst, (v2i64 (sub VR128:$src1,
1197 (load addr:$src2))))]>;
1200 let isTwoAddress = 1 in {
1201 def PSLLDQri : PDIi8<0x73, MRM7r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
1202 "pslldq {$src2, $dst|$dst, $src2}", []>;
1203 def PSRLDQri : PDIi8<0x73, MRM7r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
1204 "psrldq {$src2, $dst|$dst, $src2}", []>;
1208 let isTwoAddress = 1 in {
1209 let isCommutable = 1 in {
1210 def PANDrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1211 "pand {$src2, $dst|$dst, $src2}",
1212 [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
1214 def PANDrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1215 "pand {$src2, $dst|$dst, $src2}",
1216 [(set VR128:$dst, (v2i64 (and VR128:$src1,
1217 (load addr:$src2))))]>;
1218 def PORrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1219 "por {$src2, $dst|$dst, $src2}",
1220 [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
1222 def PORrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1223 "por {$src2, $dst|$dst, $src2}",
1224 [(set VR128:$dst, (v2i64 (or VR128:$src1,
1225 (load addr:$src2))))]>;
1226 def PXORrr : PDI<0xEF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1227 "pxor {$src2, $dst|$dst, $src2}",
1228 [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
1230 def PXORrm : PDI<0xEF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1231 "pxor {$src2, $dst|$dst, $src2}",
1232 [(set VR128:$dst, (v2i64 (xor VR128:$src1,
1233 (load addr:$src2))))]>;
1236 def PANDNrr : PDI<0xDF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1237 "pandn {$src2, $dst|$dst, $src2}",
1238 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1241 def PANDNrm : PDI<0xDF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1242 "pandn {$src2, $dst|$dst, $src2}",
1243 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1244 (load addr:$src2))))]>;
1247 // Pack instructions
1248 let isTwoAddress = 1 in {
1249 def PACKSSWBrr : PDI<0x63, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1251 "packsswb {$src2, $dst|$dst, $src2}",
1252 [(set VR128:$dst, (v8i16 (int_x86_sse2_packsswb_128
1255 def PACKSSWBrm : PDI<0x63, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
1257 "packsswb {$src2, $dst|$dst, $src2}",
1258 [(set VR128:$dst, (v8i16 (int_x86_sse2_packsswb_128
1260 (bc_v8i16 (loadv2f64 addr:$src2)))))]>;
1261 def PACKSSDWrr : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1263 "packssdw {$src2, $dst|$dst, $src2}",
1264 [(set VR128:$dst, (v4i32 (int_x86_sse2_packssdw_128
1267 def PACKSSDWrm : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1269 "packssdw {$src2, $dst|$dst, $src2}",
1270 [(set VR128:$dst, (v4i32 (int_x86_sse2_packssdw_128
1272 (bc_v4i32 (loadv2i64 addr:$src2)))))]>;
1273 def PACKUSWBrr : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1275 "packuswb {$src2, $dst|$dst, $src2}",
1276 [(set VR128:$dst, (v8i16 (int_x86_sse2_packuswb_128
1279 def PACKUSWBrm : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
1281 "packuswb {$src2, $dst|$dst, $src2}",
1282 [(set VR128:$dst, (v8i16 (int_x86_sse2_packuswb_128
1284 (bc_v8i16 (loadv2i64 addr:$src2)))))]>;
1287 // Shuffle and unpack instructions
1288 def PSHUFWri : PSIi8<0x70, MRMSrcReg,
1289 (ops VR64:$dst, VR64:$src1, i8imm:$src2),
1290 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
1291 def PSHUFWmi : PSIi8<0x70, MRMSrcMem,
1292 (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
1293 "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
1295 def PSHUFDri : PDIi8<0x70, MRMSrcReg,
1296 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
1297 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
1298 [(set VR128:$dst, (v4i32 (vector_shuffle
1299 VR128:$src1, (undef),
1300 PSHUFD_shuffle_mask:$src2)))]>;
1301 def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
1302 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
1303 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
1304 [(set VR128:$dst, (v4i32 (vector_shuffle
1305 (load addr:$src1), (undef),
1306 PSHUFD_shuffle_mask:$src2)))]>;
1308 // SSE2 with ImmT == Imm8 and XS prefix.
1309 def PSHUFHWri : Ii8<0x70, MRMSrcReg,
1310 (ops VR128:$dst, VR128:$src1, i8imm:$src2),
1311 "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
1312 [(set VR128:$dst, (v8i16 (vector_shuffle
1313 VR128:$src1, (undef),
1314 PSHUFHW_shuffle_mask:$src2)))]>,
1315 XS, Requires<[HasSSE2]>;
1316 def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
1317 (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
1318 "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
1319 [(set VR128:$dst, (v8i16 (vector_shuffle
1320 (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
1321 PSHUFHW_shuffle_mask:$src2)))]>,
1322 XS, Requires<[HasSSE2]>;
1324 // SSE2 with ImmT == Imm8 and XD prefix.
1325 def PSHUFLWri : Ii8<0x70, MRMSrcReg,
1326 (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
1327 "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
1328 [(set VR128:$dst, (v8i16 (vector_shuffle
1329 VR128:$src1, (undef),
1330 PSHUFLW_shuffle_mask:$src2)))]>,
1331 XD, Requires<[HasSSE2]>;
1332 def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
1333 (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
1334 "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
1335 [(set VR128:$dst, (v8i16 (vector_shuffle
1336 (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
1337 PSHUFLW_shuffle_mask:$src2)))]>,
1338 XD, Requires<[HasSSE2]>;
1340 let isTwoAddress = 1 in {
1341 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
1342 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1343 "punpcklbw {$src2, $dst|$dst, $src2}",
1345 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1346 UNPCKL_shuffle_mask)))]>;
1347 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
1348 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1349 "punpcklbw {$src2, $dst|$dst, $src2}",
1351 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1352 UNPCKL_shuffle_mask)))]>;
1353 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
1354 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1355 "punpcklwd {$src2, $dst|$dst, $src2}",
1357 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1358 UNPCKL_shuffle_mask)))]>;
1359 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
1360 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1361 "punpcklwd {$src2, $dst|$dst, $src2}",
1363 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1364 UNPCKL_shuffle_mask)))]>;
1365 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
1366 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1367 "punpckldq {$src2, $dst|$dst, $src2}",
1369 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1370 UNPCKL_shuffle_mask)))]>;
1371 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
1372 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1373 "punpckldq {$src2, $dst|$dst, $src2}",
1375 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1376 UNPCKL_shuffle_mask)))]>;
1377 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
1378 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1379 "punpcklqdq {$src2, $dst|$dst, $src2}",
1381 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1382 UNPCKL_shuffle_mask)))]>;
1383 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
1384 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1385 "punpcklqdq {$src2, $dst|$dst, $src2}",
1387 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1388 UNPCKL_shuffle_mask)))]>;
1390 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
1391 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1392 "punpckhbw {$src2, $dst|$dst, $src2}",
1394 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1395 UNPCKH_shuffle_mask)))]>;
1396 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
1397 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1398 "punpckhbw {$src2, $dst|$dst, $src2}",
1400 (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
1401 UNPCKH_shuffle_mask)))]>;
1402 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
1403 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1404 "punpckhwd {$src2, $dst|$dst, $src2}",
1406 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1407 UNPCKH_shuffle_mask)))]>;
1408 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
1409 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1410 "punpckhwd {$src2, $dst|$dst, $src2}",
1412 (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
1413 UNPCKH_shuffle_mask)))]>;
1414 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
1415 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1416 "punpckhdq {$src2, $dst|$dst, $src2}",
1418 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1419 UNPCKH_shuffle_mask)))]>;
1420 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
1421 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1422 "punpckhdq {$src2, $dst|$dst, $src2}",
1424 (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
1425 UNPCKH_shuffle_mask)))]>;
1426 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
1427 (ops VR128:$dst, VR128:$src1, VR128:$src2),
1428 "punpckhdq {$src2, $dst|$dst, $src2}",
1430 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1431 UNPCKH_shuffle_mask)))]>;
1432 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
1433 (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1434 "punpckhqdq {$src2, $dst|$dst, $src2}",
1436 (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
1437 UNPCKH_shuffle_mask)))]>;
1441 def PEXTRWr : PDIi8<0xC5, MRMSrcReg,
1442 (ops R32:$dst, VR128:$src1, i32i8imm:$src2),
1443 "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
1444 [(set R32:$dst, (X86pextrw (v8i16 VR128:$src1),
1445 (i32 imm:$src2)))]>;
1446 def PEXTRWm : PDIi8<0xC5, MRMSrcMem,
1447 (ops R32:$dst, i128mem:$src1, i32i8imm:$src2),
1448 "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
1449 [(set R32:$dst, (X86pextrw (loadv8i16 addr:$src1),
1450 (i32 imm:$src2)))]>;
1452 let isTwoAddress = 1 in {
1453 def PINSRWr : PDIi8<0xC4, MRMSrcReg,
1454 (ops VR128:$dst, VR128:$src1, R32:$src2, i32i8imm:$src3),
1455 "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
1456 [(set VR128:$dst, (v8i16 (X86pinsrw (v8i16 VR128:$src1),
1457 R32:$src2, (i32 imm:$src3))))]>;
1458 def PINSRWm : PDIi8<0xC4, MRMSrcMem,
1459 (ops VR128:$dst, VR128:$src1, i16mem:$src2, i32i8imm:$src3),
1460 "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
1462 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
1463 (i32 (anyext (loadi16 addr:$src2))),
1464 (i32 imm:$src3))))]>;
1467 //===----------------------------------------------------------------------===//
1468 // Miscellaneous Instructions
1469 //===----------------------------------------------------------------------===//
1472 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
1473 "movmskps {$src, $dst|$dst, $src}",
1474 [(set R32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
1475 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src),
1476 "movmskpd {$src, $dst|$dst, $src}",
1477 [(set R32:$dst, (int_x86_sse2_movmskpd VR128:$src))]>;
1479 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (ops R32:$dst, VR128:$src),
1480 "pmovmskb {$src, $dst|$dst, $src}",
1481 [(set R32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
1483 // Prefetching loads
1484 def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src),
1485 "prefetcht0 $src", []>, TB,
1486 Requires<[HasSSE1]>;
1487 def PREFETCHT1 : I<0x18, MRM2m, (ops i8mem:$src),
1488 "prefetcht0 $src", []>, TB,
1489 Requires<[HasSSE1]>;
1490 def PREFETCHT2 : I<0x18, MRM3m, (ops i8mem:$src),
1491 "prefetcht0 $src", []>, TB,
1492 Requires<[HasSSE1]>;
1493 def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src),
1494 "prefetcht0 $src", []>, TB,
1495 Requires<[HasSSE1]>;
1497 // Non-temporal stores
1498 def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1499 "movntq {$src, $dst|$dst, $src}", []>, TB,
1500 Requires<[HasSSE1]>;
1501 def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
1502 "movntps {$src, $dst|$dst, $src}", []>, TB,
1503 Requires<[HasSSE1]>;
1504 def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
1505 "maskmovq {$src, $dst|$dst, $src}", []>, TB,
1506 Requires<[HasSSE1]>;
1509 def SFENCE : I<0xAE, MRM7m, (ops),
1510 "sfence", []>, TB, Requires<[HasSSE1]>;
1512 // Load MXCSR register
1513 def LDMXCSR : I<0xAE, MRM2m, (ops i32mem:$src),
1514 "ldmxcsr {$src|$src}", []>, TB, Requires<[HasSSE1]>;
1516 //===----------------------------------------------------------------------===//
1517 // Alias Instructions
1518 //===----------------------------------------------------------------------===//
1520 // Alias instructions that map zero vector to pxor / xorp* for sse.
1521 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
1522 def V_SET0_PI : PDI<0xEF, MRMInitReg, (ops VR128:$dst),
1524 [(set VR128:$dst, (v2i64 immAllZerosV))]>;
1525 def V_SET0_PS : PSI<0x57, MRMInitReg, (ops VR128:$dst),
1527 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1528 def V_SET0_PD : PDI<0x57, MRMInitReg, (ops VR128:$dst),
1530 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
1532 def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
1533 "pcmpeqd $dst, $dst",
1534 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
1536 // FR32 / FR64 to 128-bit vector conversion.
1537 def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
1538 "movss {$src, $dst|$dst, $src}",
1540 (v4f32 (scalar_to_vector FR32:$src)))]>;
1541 def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1542 "movss {$src, $dst|$dst, $src}",
1544 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
1545 def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
1546 "movsd {$src, $dst|$dst, $src}",
1548 (v2f64 (scalar_to_vector FR64:$src)))]>;
1549 def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1550 "movsd {$src, $dst|$dst, $src}",
1552 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
1554 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
1555 "movd {$src, $dst|$dst, $src}",
1557 (v4i32 (scalar_to_vector R32:$src)))]>;
1558 def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1559 "movd {$src, $dst|$dst, $src}",
1561 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
1562 // SSE2 instructions with XS prefix
1563 def MOVQI2PQIrr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
1564 "movq {$src, $dst|$dst, $src}",
1566 (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
1567 Requires<[HasSSE2]>;
1568 def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
1569 "movq {$src, $dst|$dst, $src}",
1571 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
1572 Requires<[HasSSE2]>;
1573 // FIXME: may not be able to eliminate this movss with coalescing the src and
1574 // dest register classes are different. We really want to write this pattern
1576 // def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (i32 0))),
1577 // (f32 FR32:$src)>;
1578 def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, VR128:$src),
1579 "movss {$src, $dst|$dst, $src}",
1580 [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
1582 def MOVPS2SSmr : SSI<0x10, MRMDestMem, (ops f32mem:$dst, VR128:$src),
1583 "movss {$src, $dst|$dst, $src}",
1584 [(store (f32 (vector_extract (v4f32 VR128:$src),
1585 (i32 0))), addr:$dst)]>;
1586 def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, VR128:$src),
1587 "movsd {$src, $dst|$dst, $src}",
1588 [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
1590 def MOVPDI2DIrr : PDI<0x7E, MRMSrcReg, (ops R32:$dst, VR128:$src),
1591 "movd {$src, $dst|$dst, $src}",
1592 [(set R32:$dst, (vector_extract (v4i32 VR128:$src),
1594 def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
1595 "movd {$src, $dst|$dst, $src}",
1596 [(store (i32 (vector_extract (v4i32 VR128:$src),
1597 (i32 0))), addr:$dst)]>;
1599 // Move to lower bits of a VR128, leaving upper bits alone.
1600 // Three operand (but two address) aliases.
1601 let isTwoAddress = 1 in {
1602 def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
1603 "movss {$src2, $dst|$dst, $src2}", []>;
1604 def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
1605 "movsd {$src2, $dst|$dst, $src2}", []>;
1606 def MOVLDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
1607 "movd {$src2, $dst|$dst, $src2}", []>;
1610 // Move to lower bits of a VR128 and zeroing upper bits.
1611 // Loading from memory automatically zeroing upper bits.
1612 def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
1613 "movss {$src, $dst|$dst, $src}",
1615 (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
1616 def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
1617 "movsd {$src, $dst|$dst, $src}",
1619 (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
1620 def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
1621 "movd {$src, $dst|$dst, $src}",
1623 (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
1624 def MOVZQI2PQIrm : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
1625 "movd {$src, $dst|$dst, $src}",
1627 (v2i64 (X86zexts2vec (loadi64 addr:$src))))]>;
1629 //===----------------------------------------------------------------------===//
1630 // Non-Instruction Patterns
1631 //===----------------------------------------------------------------------===//
1633 // 128-bit vector undef's.
1634 def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1635 def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1636 def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1637 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1638 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
1640 // 128-bit vector all zero's.
1641 def : Pat<(v16i8 immAllZerosV), (v16i8 (V_SET0_PI))>, Requires<[HasSSE2]>;
1642 def : Pat<(v8i16 immAllZerosV), (v8i16 (V_SET0_PI))>, Requires<[HasSSE2]>;
1643 def : Pat<(v4i32 immAllZerosV), (v4i32 (V_SET0_PI))>, Requires<[HasSSE2]>;
1645 // 128-bit vector all one's.
1646 def : Pat<(v16i8 immAllOnesV), (v16i8 (V_SETALLONES))>, Requires<[HasSSE2]>;
1647 def : Pat<(v8i16 immAllOnesV), (v8i16 (V_SETALLONES))>, Requires<[HasSSE2]>;
1648 def : Pat<(v4i32 immAllOnesV), (v4i32 (V_SETALLONES))>, Requires<[HasSSE2]>;
1649 def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
1650 def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
1652 // Load 128-bit integer vector values.
1653 def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
1654 Requires<[HasSSE2]>;
1655 def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
1656 Requires<[HasSSE2]>;
1657 def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
1658 Requires<[HasSSE2]>;
1659 def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
1660 Requires<[HasSSE2]>;
1662 // Store 128-bit integer vector values.
1663 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1664 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1665 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1666 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1667 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1668 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1669 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1670 (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
1672 // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
1674 def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>,
1675 Requires<[HasSSE2]>;
1676 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>,
1677 Requires<[HasSSE2]>;
1680 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
1681 Requires<[HasSSE2]>;
1682 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>,
1683 Requires<[HasSSE2]>;
1684 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>,
1685 Requires<[HasSSE2]>;
1686 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
1687 Requires<[HasSSE2]>;
1688 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
1689 Requires<[HasSSE2]>;
1690 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
1691 Requires<[HasSSE2]>;
1692 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
1693 Requires<[HasSSE2]>;
1694 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
1695 Requires<[HasSSE2]>;
1696 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
1697 Requires<[HasSSE2]>;
1698 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
1699 Requires<[HasSSE2]>;
1700 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
1701 Requires<[HasSSE2]>;
1702 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
1703 Requires<[HasSSE2]>;
1705 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
1706 Requires<[HasSSE2]>;
1707 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
1708 Requires<[HasSSE2]>;
1710 // Zeroing a VR128 then do a MOVS* to the lower bits.
1711 def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
1712 (MOVLSD2PDrr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
1713 def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
1714 (MOVLSS2PSrr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
1715 def : Pat<(v4i32 (X86zexts2vec R32:$src)),
1716 (MOVLDI2PDIrr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
1717 def : Pat<(v8i16 (X86zexts2vec R16:$src)),
1718 (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
1719 def : Pat<(v16i8 (X86zexts2vec R8:$src)),
1720 (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
1722 // Splat v2f64 / v2i64
1723 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_mask:$sm),
1724 (v2f64 (UNPCKLPDrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1725 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_mask:$sm),
1726 (v2i64 (PUNPCKLQDQrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
1729 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
1730 (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
1731 Requires<[HasSSE1]>;
1733 // Shuffle v4i32 with SHUFP* if others do not match.
1734 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
1735 SHUFP_int_shuffle_mask:$sm),
1736 (v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
1737 SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
1738 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
1739 SHUFP_int_shuffle_mask:$sm),
1740 (v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
1741 SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
1743 // Shuffle v4f32 with PSHUF* if others do not match.
1744 def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
1745 PSHUFD_fp_shuffle_mask:$sm),
1746 (v4f32 (PSHUFDri VR128:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
1747 Requires<[HasSSE2]>;
1748 def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
1749 PSHUFD_fp_shuffle_mask:$sm),
1750 (v4f32 (PSHUFDmi addr:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
1751 Requires<[HasSSE2]>;
1752 def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
1753 PSHUFHW_fp_shuffle_mask:$sm),
1754 (v4f32 (PSHUFHWri VR128:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
1755 Requires<[HasSSE2]>;
1756 def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
1757 PSHUFHW_fp_shuffle_mask:$sm),
1758 (v4f32 (PSHUFHWmi addr:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
1759 Requires<[HasSSE2]>;
1760 def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
1761 PSHUFLW_fp_shuffle_mask:$sm),
1762 (v4f32 (PSHUFLWri VR128:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
1763 Requires<[HasSSE2]>;
1764 def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
1765 PSHUFLW_fp_shuffle_mask:$sm),
1766 (v4f32 (PSHUFLWmi addr:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
1767 Requires<[HasSSE2]>;
1769 // vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
1770 def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
1771 UNPCKL_v_undef_shuffle_mask)),
1772 (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
1773 def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
1774 UNPCKL_v_undef_shuffle_mask)),
1775 (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
1776 def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
1777 UNPCKL_v_undef_shuffle_mask)),
1778 (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
1779 def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
1780 UNPCKL_v_undef_shuffle_mask)),
1781 (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
1783 // 128-bit logical shifts
1784 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
1785 (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1786 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
1787 (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1790 def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),
1791 (ANDPSrm VR128:$src1, addr:$src2)>;
1792 def : Pat<(and (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)),
1793 (ANDPDrm VR128:$src1, addr:$src2)>;
1794 def : Pat<(or (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),
1795 (ORPSrm VR128:$src1, addr:$src2)>;
1796 def : Pat<(or (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)),
1797 (ORPDrm VR128:$src1, addr:$src2)>;
1798 def : Pat<(xor (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),
1799 (XORPSrm VR128:$src1, addr:$src2)>;
1800 def : Pat<(xor (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)),
1801 (XORPDrm VR128:$src1, addr:$src2)>;
1802 def : Pat<(and (vnot (bc_v4i32 (v4f32 VR128:$src1))), (loadv4i32 addr:$src2)),
1803 (ANDNPSrm VR128:$src1, addr:$src2)>;
1804 def : Pat<(and (vnot (bc_v2i64 (v2f64 VR128:$src1))), (loadv2i64 addr:$src2)),
1805 (ANDNPDrm VR128:$src1, addr:$src2)>;
1807 def : Pat<(bc_v4f32 (v4i32 (and VR128:$src1, VR128:$src2))),
1808 (ANDPSrr VR128:$src1, VR128:$src2)>;
1809 def : Pat<(bc_v4f32 (v4i32 (or VR128:$src1, VR128:$src2))),
1810 (ORPSrr VR128:$src1, VR128:$src2)>;
1811 def : Pat<(bc_v4f32 (v4i32 (xor VR128:$src1, VR128:$src2))),
1812 (XORPSrr VR128:$src1, VR128:$src2)>;
1813 def : Pat<(bc_v4f32 (v4i32 (and (vnot VR128:$src1), VR128:$src2))),
1814 (ANDNPSrr VR128:$src1, VR128:$src2)>;
1816 def : Pat<(bc_v4f32 (v4i32 (and VR128:$src1, (load addr:$src2)))),
1817 (ANDPSrm (v4i32 VR128:$src1), addr:$src2)>;
1818 def : Pat<(bc_v4f32 (v4i32 (or VR128:$src1, (load addr:$src2)))),
1819 (ORPSrm VR128:$src1, addr:$src2)>;
1820 def : Pat<(bc_v4f32 (v4i32 (xor VR128:$src1, (load addr:$src2)))),
1821 (XORPSrm VR128:$src1, addr:$src2)>;
1822 def : Pat<(bc_v4f32 (v4i32 (and (vnot VR128:$src1), (load addr:$src2)))),
1823 (ANDNPSrm VR128:$src1, addr:$src2)>;
1825 def : Pat<(bc_v2f64 (v2i64 (and VR128:$src1, VR128:$src2))),
1826 (ANDPDrr VR128:$src1, VR128:$src2)>;
1827 def : Pat<(bc_v2f64 (v2i64 (or VR128:$src1, VR128:$src2))),
1828 (ORPDrr VR128:$src1, VR128:$src2)>;
1829 def : Pat<(bc_v2f64 (v2i64 (xor VR128:$src1, VR128:$src2))),
1830 (XORPDrr VR128:$src1, VR128:$src2)>;
1831 def : Pat<(bc_v2f64 (v2i64 (and (vnot VR128:$src1), VR128:$src2))),
1832 (ANDNPDrr VR128:$src1, VR128:$src2)>;
1834 def : Pat<(bc_v2f64 (v2i64 (and VR128:$src1, (load addr:$src2)))),
1835 (ANDPSrm (v2i64 VR128:$src1), addr:$src2)>;
1836 def : Pat<(bc_v2f64 (v2i64 (or VR128:$src1, (load addr:$src2)))),
1837 (ORPSrm VR128:$src1, addr:$src2)>;
1838 def : Pat<(bc_v2f64 (v2i64 (xor VR128:$src1, (load addr:$src2)))),
1839 (XORPSrm VR128:$src1, addr:$src2)>;
1840 def : Pat<(bc_v2f64 (v2i64 (and (vnot VR128:$src1), (load addr:$src2)))),
1841 (ANDNPSrm VR128:$src1, addr:$src2)>;
1843 def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
1844 (PANDrr VR128:$src1, VR128:$src2)>;
1845 def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
1846 (PANDrr VR128:$src1, VR128:$src2)>;
1847 def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
1848 (PANDrr VR128:$src1, VR128:$src2)>;
1849 def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
1850 (PORrr VR128:$src1, VR128:$src2)>;
1851 def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
1852 (PORrr VR128:$src1, VR128:$src2)>;
1853 def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
1854 (PORrr VR128:$src1, VR128:$src2)>;
1855 def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
1856 (PXORrr VR128:$src1, VR128:$src2)>;
1857 def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
1858 (PXORrr VR128:$src1, VR128:$src2)>;
1859 def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
1860 (PXORrr VR128:$src1, VR128:$src2)>;
1861 def : Pat<(v4i32 (and (vnot VR128:$src1), VR128:$src2)),
1862 (PANDNrr VR128:$src1, VR128:$src2)>;
1863 def : Pat<(v8i16 (and (vnot VR128:$src1), VR128:$src2)),
1864 (PANDNrr VR128:$src1, VR128:$src2)>;
1865 def : Pat<(v16i8 (and (vnot VR128:$src1), VR128:$src2)),
1866 (PANDNrr VR128:$src1, VR128:$src2)>;
1868 def : Pat<(v4i32 (and VR128:$src1, (load addr:$src2))),
1869 (PANDrm VR128:$src1, addr:$src2)>;
1870 def : Pat<(v8i16 (and VR128:$src1, (load addr:$src2))),
1871 (PANDrm VR128:$src1, addr:$src2)>;
1872 def : Pat<(v16i8 (and VR128:$src1, (load addr:$src2))),
1873 (PANDrm VR128:$src1, addr:$src2)>;
1874 def : Pat<(v4i32 (or VR128:$src1, (load addr:$src2))),
1875 (PORrm VR128:$src1, addr:$src2)>;
1876 def : Pat<(v8i16 (or VR128:$src1, (load addr:$src2))),
1877 (PORrm VR128:$src1, addr:$src2)>;
1878 def : Pat<(v16i8 (or VR128:$src1, (load addr:$src2))),
1879 (PORrm VR128:$src1, addr:$src2)>;
1880 def : Pat<(v4i32 (xor VR128:$src1, (load addr:$src2))),
1881 (PXORrm VR128:$src1, addr:$src2)>;
1882 def : Pat<(v8i16 (xor VR128:$src1, (load addr:$src2))),
1883 (PXORrm VR128:$src1, addr:$src2)>;
1884 def : Pat<(v16i8 (xor VR128:$src1, (load addr:$src2))),
1885 (PXORrm VR128:$src1, addr:$src2)>;
1886 def : Pat<(v4i32 (and (vnot VR128:$src1), (load addr:$src2))),
1887 (PANDNrm VR128:$src1, addr:$src2)>;
1888 def : Pat<(v8i16 (and (vnot VR128:$src1), (load addr:$src2))),
1889 (PANDNrm VR128:$src1, addr:$src2)>;
1890 def : Pat<(v16i8 (and (vnot VR128:$src1), (load addr:$src2))),
1891 (PANDNrm VR128:$src1, addr:$src2)>;