This situation can occur:
[oota-llvm.git] / lib / Target / X86 / X86InstrSSE.td
index 8e6d9df3827a1df469e93cde78f4bb4e5cd98303..74fcb5c7764319f3c081debf55e4771e907f01a8 100644 (file)
@@ -51,6 +51,8 @@ def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad]>;
+def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
+def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
 
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
@@ -1531,7 +1533,7 @@ def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
                      TB, Requires<[HasSSE2]>;
-def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd
                                           (load addr:$src)))]>,
@@ -1540,7 +1542,7 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
 def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
-def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
                                             (memop addr:$src)))]>;
@@ -1957,6 +1959,12 @@ let Predicates = [HasSSE2] in {
             (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
   def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
             (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+
+  // Shift up / down and insert zero's.
+  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
 }
 
 // Logical
@@ -2376,6 +2384,8 @@ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
             (MOVZDI2PDIrm addr:$src)>;
 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
             (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
 
 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "movq\t{$src, $dst|$dst, $src}",
@@ -2735,13 +2745,13 @@ let Constraints = "$src1 = $dst" in {
 
 defm PHADDW      : SS3I_binop_rm_int_16<0x01, "phaddw",
                                         int_x86_ssse3_phadd_w,
-                                        int_x86_ssse3_phadd_w_128, 1>;
+                                        int_x86_ssse3_phadd_w_128>;
 defm PHADDD      : SS3I_binop_rm_int_32<0x02, "phaddd",
                                         int_x86_ssse3_phadd_d,
-                                        int_x86_ssse3_phadd_d_128, 1>;
+                                        int_x86_ssse3_phadd_d_128>;
 defm PHADDSW     : SS3I_binop_rm_int_16<0x03, "phaddsw",
                                         int_x86_ssse3_phadd_sw,
-                                        int_x86_ssse3_phadd_sw_128, 1>;
+                                        int_x86_ssse3_phadd_sw_128>;
 defm PHSUBW      : SS3I_binop_rm_int_16<0x05, "phsubw",
                                         int_x86_ssse3_phsub_w,
                                         int_x86_ssse3_phsub_w_128>;
@@ -2753,7 +2763,7 @@ defm PHSUBSW     : SS3I_binop_rm_int_16<0x07, "phsubsw",
                                         int_x86_ssse3_phsub_sw_128>;
 defm PMADDUBSW   : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
                                         int_x86_ssse3_pmadd_ub_sw,
-                                        int_x86_ssse3_pmadd_ub_sw_128, 1>;
+                                        int_x86_ssse3_pmadd_ub_sw_128>;
 defm PMULHRSW    : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
                                         int_x86_ssse3_pmul_hr_sw,
                                         int_x86_ssse3_pmul_hr_sw_128, 1>;
@@ -2778,7 +2788,7 @@ let Constraints = "$src1 = $dst" in {
                              (int_x86_ssse3_palign_r
                               VR64:$src1, VR64:$src2,
                               imm:$src3))]>;
-  def PALIGNR64rm  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+  def PALIGNR64rm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
                            (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                            [(set VR64:$dst,
@@ -2794,7 +2804,7 @@ let Constraints = "$src1 = $dst" in {
                              (int_x86_ssse3_palign_r_128
                               VR128:$src1, VR128:$src2,
                               imm:$src3))]>, OpSize;
-  def PALIGNR128rm : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+  def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
                            (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                            [(set VR128:$dst,
@@ -3050,16 +3060,6 @@ def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
           (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
 
-// FIXME: Temporary workaround since 2-wide shuffle is broken.
-def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
-          (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (memop addr:$src2)),
-          (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
-          (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (memop addr:$src2)),
-          (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
 // Some special case pandn patterns.
 def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
                   VR128:$src2)),
@@ -3330,7 +3330,7 @@ defm DPPS         : SS41I_binop_rmi_int<0x40, "dpps",
 defm DPPD         : SS41I_binop_rmi_int<0x41, "dppd",
                                         int_x86_sse41_dppd, 1>;
 defm MPSADBW      : SS41I_binop_rmi_int<0x42, "mpsadbw",
-                                        int_x86_sse41_mpsadbw, 0>;
+                                        int_x86_sse41_mpsadbw, 1>;
 
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator