Delete the allocated vector.

[oota-llvm.git] / lib / Target / X86 / X86InstrSSE.td
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index ced3bb94d1234ca67c551929157295a5fdc1d739..e4e92ccbf1462f035e2ff5edf490f79e95be8899 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -654,10 +654,10 @@ defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                      int_x86_sse_cvttss2si64, f32mem, load,
                                      "cvttss2si">, XS, VEX, VEX_W;
  defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttss2si">, XD, VEX;
+                                    f128mem, load, "cvttsd2si">, XD, VEX;
  defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                      int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttss2si">, XD, VEX, VEX_W;
+                                    "cvttsd2si">, XD, VEX, VEX_W;
  }
  defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
                                      f32mem, load, "cvttss2si">, XS;
@@ -665,10 +665,10 @@ defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                      int_x86_sse_cvttss2si64, f32mem, load,
                                      "cvttss2si{q}">, XS, REX_W;
  defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttss2si">, XD;
+                                    f128mem, load, "cvttsd2si">, XD;
  defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                      int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttss2si{q}">, XD, REX_W;
+                                    "cvttsd2si{q}">, XD, REX_W;
  
  let isAsmParserOnly = 1, Pattern = []<dag> in {
  defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
@@ -2082,7 +2082,7 @@ def : Pat<(X86SFence), (SFENCE)>;
  // We set canFoldAsLoad because this can be converted to a constant-pool
  // load of an all-zeros value if folding it would be beneficial.
  // FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
+// JIT implementation, it does not expand the instructions below like
  // X86MCInstLower does.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
      isCodeGenOnly = 1 in {
@@ -5537,19 +5537,14 @@ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
  def : Pat<(X86Movddup (memopv2f64 addr:$src)),
            (MOVDDUPrm addr:$src)>;
  
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
-          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
-          (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
            (MOVDDUPrm addr:$src)>;
  
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
            (MOVDDUPrm addr:$src)>;
  
  def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
@@ -5564,6 +5559,7 @@ def : Pat<(X86Movddup (bc_v2f64
                             (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
            (MOVDDUPrm addr:$src)>;
  
+
  // Shuffle with UNPCKLPS
  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
            (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
@@ -5675,14 +5671,11 @@ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
            (MOVLHPSrr VR128:$src1, VR128:$src2)>;
  def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
            (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
-// FIXME: Instead of X86Movddup, there should be a X86Movlhps here, the problem
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
  // is during lowering, where it's not possible to recognize the load fold cause
  // it has two uses through a bitcast. One use disappears at isel time and the
  // fold opportunity reappears.
-def : Pat<(v2i64 (X86Movddup VR128:$src)),
-          (MOVLHPSrr VR128:$src, VR128:$src)>;
-def : Pat<(v4f32 (X86Movddup VR128:$src)),
-          (MOVLHPSrr VR128:$src, VR128:$src)>;
  def : Pat<(v2f64 (X86Movddup VR128:$src)),
            (UNPCKLPDrr VR128:$src, VR128:$src)>;
  
@@ -5690,6 +5683,7 @@ def : Pat<(v2f64 (X86Movddup VR128:$src)),
  def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
                      (scalar_to_vector (loadf64 addr:$src2)))),
            (MOVHPDrm VR128:$src1, addr:$src2)>;
+
  // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
  // is during lowering, where it's not possible to recognize the load fold cause
  // it has two uses through a bitcast. One use disappears at isel time and the
@@ -5781,6 +5775,12 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
  def : Pat<(X86Movlps VR128:$src1,
                      (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
            (MOVLPSrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
  
  // Shuffle with MOVLPD
  def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),