Turn off the old way of handling debug information in the code generator. Use

[oota-llvm.git] / lib / Target / X86 / X86InstrInfo.td
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td

index 0eaab8d93d1e8b9cc1d0a6b67cb2864770e250e6..19f1e6a4a39a7e0c91bc36838e477586a8c8a374 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -308,6 +308,16 @@ def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
    return false;
  }]>;
  
+def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  const Value *Src = LD->getSrcValue();
+  if (!Src)
+    return false;
+  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+    return PT->getAddressSpace() == 256;
+  return false;
+}]>;
+
  def loadi8  : PatFrag<(ops node:$ptr), (i8  (load node:$ptr))>;
  def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
  
@@ -1235,12 +1245,15 @@ let isTwoAddress = 0 in {
  }
  } // Defs = [EFLAGS]
  
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
  def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
                 [(set GR8:$dst, (not GR8:$src))]>;
  def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
                 [(set GR16:$dst, (not GR16:$src))]>, OpSize;
  def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
                 [(set GR32:$dst, (not GR32:$src))]>;
+}
  let isTwoAddress = 0 in {
    def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
                   [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
@@ -2682,14 +2695,18 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
                 [(X86bt GR32:$src1, GR32:$src2),
                  (implicit EFLAGS)]>, TB;
-def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
-               "bt{w}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt (loadi16 addr:$src1), GR16:$src2),
-                (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
-def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
-               "bt{l}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt (loadi32 addr:$src1), GR32:$src2),
-                (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Disable these instructions for now.
+//def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+//               "bt{w}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi16 addr:$src1), GR16:$src2),
+//                (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
+//def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+//               "bt{l}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi32 addr:$src1), GR32:$src2),
+//                (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
  
  def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                  "bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -2795,7 +2812,7 @@ def MOV32r0  : I<0x31, MRMInitReg,  (outs GR32:$dst), (ins),
  
  // Basic operations on GR16 / GR32 subclasses GR16_ and GR32_ which contains only
  // those registers that have GR8 sub-registers (i.e. AX - DX, EAX - EDX).
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1 in {
  def MOV16to16_ : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16:$src),
                  "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
  def MOV32to32_ : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32:$src),
@@ -2845,6 +2862,11 @@ def TLS_tp : I<0x8B, Pseudo, (outs GR32:$dst), (ins),
                 "movl\t%gs:0, $dst",
                 [(set GR32:$dst, X86TLStp)]>, SegGS;
  
+let AddedComplexity = 5 in
+def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "movl\t%gs:$src, $dst",
+                   [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
+
  //===----------------------------------------------------------------------===//
  // DWARF Pseudo Instructions
  //
@@ -3590,6 +3612,17 @@ def : Pat<(parallel (X86smul_ovf (load addr:$src1), i32immSExt8:$src2),
                      (implicit EFLAGS)),
            (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
  
+// Optimize multiple with overflow by 2.
+let AddedComplexity = 2 in {
+def : Pat<(parallel (X86smul_ovf GR16:$src1, 2),
+                    (implicit EFLAGS)),
+          (ADD16rr GR16:$src1, GR16:$src1)>;
+
+def : Pat<(parallel (X86smul_ovf GR32:$src1, 2),
+                    (implicit EFLAGS)),
+          (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
  //===----------------------------------------------------------------------===//
  // Floating Point Stack Support
  //===----------------------------------------------------------------------===//