This situation can occur:
[oota-llvm.git] / lib / Target / X86 / X86InstrSSE.td
index 1ea4bfd35e030f1cd3ade95c97cf9a770800bc96..74fcb5c7764319f3c081debf55e4771e907f01a8 100644 (file)
@@ -51,6 +51,8 @@ def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad]>;
+def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
+def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
 
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
@@ -1957,6 +1959,12 @@ let Predicates = [HasSSE2] in {
             (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
   def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
             (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+
+  // Shift up / down and insert zero's.
+  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
 }
 
 // Logical
@@ -2376,6 +2384,8 @@ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
             (MOVZDI2PDIrm addr:$src)>;
 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
             (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
 
 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "movq\t{$src, $dst|$dst, $src}",
@@ -2735,13 +2745,13 @@ let Constraints = "$src1 = $dst" in {
 
 defm PHADDW      : SS3I_binop_rm_int_16<0x01, "phaddw",
                                         int_x86_ssse3_phadd_w,
-                                        int_x86_ssse3_phadd_w_128, 1>;
+                                        int_x86_ssse3_phadd_w_128>;
 defm PHADDD      : SS3I_binop_rm_int_32<0x02, "phaddd",
                                         int_x86_ssse3_phadd_d,
-                                        int_x86_ssse3_phadd_d_128, 1>;
+                                        int_x86_ssse3_phadd_d_128>;
 defm PHADDSW     : SS3I_binop_rm_int_16<0x03, "phaddsw",
                                         int_x86_ssse3_phadd_sw,
-                                        int_x86_ssse3_phadd_sw_128, 1>;
+                                        int_x86_ssse3_phadd_sw_128>;
 defm PHSUBW      : SS3I_binop_rm_int_16<0x05, "phsubw",
                                         int_x86_ssse3_phsub_w,
                                         int_x86_ssse3_phsub_w_128>;
@@ -2753,7 +2763,7 @@ defm PHSUBSW     : SS3I_binop_rm_int_16<0x07, "phsubsw",
                                         int_x86_ssse3_phsub_sw_128>;
 defm PMADDUBSW   : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
                                         int_x86_ssse3_pmadd_ub_sw,
-                                        int_x86_ssse3_pmadd_ub_sw_128, 1>;
+                                        int_x86_ssse3_pmadd_ub_sw_128>;
 defm PMULHRSW    : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
                                         int_x86_ssse3_pmul_hr_sw,
                                         int_x86_ssse3_pmul_hr_sw_128, 1>;
@@ -3320,7 +3330,7 @@ defm DPPS         : SS41I_binop_rmi_int<0x40, "dpps",
 defm DPPD         : SS41I_binop_rmi_int<0x41, "dppd",
                                         int_x86_sse41_dppd, 1>;
 defm MPSADBW      : SS41I_binop_rmi_int<0x42, "mpsadbw",
-                                        int_x86_sse41_mpsadbw, 0>;
+                                        int_x86_sse41_mpsadbw, 1>;
 
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator