Factor out offset printing code into generic AsmPrinter.

[oota-llvm.git] / lib / Target / X86 / X86Instr64bit.td
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td

index 29066b504473f6754e73de2a045f7581a77cd4a8..125eefbf518bea5465f575b1cc18299b7f50c561 100644 (file)
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -46,28 +46,21 @@ def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
  def i64immSExt32  : PatLeaf<(i64 imm), [{
    // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
    // sign extended field.
-  return (int64_t)N->getValue() == (int32_t)N->getValue();
+  return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
  }]>;
  
  def i64immZExt32  : PatLeaf<(i64 imm), [{
    // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
    // unsignedsign extended field.
-  return (uint64_t)N->getValue() == (uint32_t)N->getValue();
+  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
  }]>;
  
  def i64immSExt8  : PatLeaf<(i64 imm), [{
    // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
    // sign extended field.
-  return (int64_t)N->getValue() == (int8_t)N->getValue();
+  return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue();
  }]>;
  
-def i64immFFFFFFFF  : PatLeaf<(i64 imm), [{
-  // i64immFFFFFFFF - True if this is a specific constant we can't write in
-  // tblgen files.
-  return N->getValue() == 0x00000000FFFFFFFFULL;
-}]>;
-
-
  def sextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
  def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
  def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
@@ -86,16 +79,36 @@ def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
  // Instruction list...
  //
  
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+                           "#ADJCALLSTACKDOWN",
+                           [(X86callseq_start timm:$amt)]>,
+                          Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+                           "#ADJCALLSTACKUP",
+                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+                          Requires<[In64BitMode]>;
+}
+
  //===----------------------------------------------------------------------===//
  //  Call Instructions...
  //
  let isCall = 1 in
-  // All calls clobber the non-callee saved registers...
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
    let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
                FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
                MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
                XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS] in {
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
      def CALL64pcrel32 : I<0xE8, RawFrm, (outs), (ins i64imm:$dst, variable_ops),
                            "call\t${dst:call}", []>;
      def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
@@ -1303,8 +1316,22 @@ def : Pat<(i32 (anyext GR8:$src)),
  // Some peepholes
  //===----------------------------------------------------------------------===//
  
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR64:$src1, 128),
+          (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+          (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+          (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+          (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
  // r & (2^32-1) ==> movz
-def : Pat<(and GR64:$src, i64immFFFFFFFF),
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
            (MOVZX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
  // r & (2^16-1) ==> movz
  def : Pat<(and GR64:$src, 0xffff),
@@ -1377,6 +1404,22 @@ def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
                       (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
            (SHRD64mrCL addr:$dst, GR64:$src2)>;
  
+def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
+              (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+          (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+                     (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+                 addr:$dst),
+          (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+          (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
+                       GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+          (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
  // (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
  def : Pat<(or (shl GR64:$src1, CL:$amt),
                (srl GR64:$src2, (sub 64, CL:$amt))),
@@ -1386,6 +1429,22 @@ def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
                       (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
            (SHLD64mrCL addr:$dst, GR64:$src2)>;
  
+def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
+              (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+          (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+                     (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+                 addr:$dst),
+          (SHLD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+          (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
+                       GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+          (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
  // X86 specific add which produces a flag.
  def : Pat<(addc GR64:$src1, GR64:$src2),
            (ADD64rr GR64:$src1, GR64:$src2)>;
@@ -1441,7 +1500,7 @@ def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
  
  /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
  multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
-  def rr : SS4AIi8<opc, MRMSrcReg, (outs GR64:$dst),
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
                   (ins VR128:$src1, i32i8imm:$src2),
                   !strconcat(OpcodeStr, 
                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),