From: Dan Gohman <gohman@apple.com>
Date: Mon, 11 Jan 2010 17:37:57 +0000 (+0000)
Subject: Re-instate MOV64r0 and MOV16r0, with adjustments to work with the
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=71c25b7d7bdf9b49dd70965c7486ce930b846aac;p=oota-llvm.git

Re-instate MOV64r0 and MOV16r0, with adjustments to work with the
new AsmPrinter. This is perhaps less elegant than describing them
in terms of MOV32r0 and subreg operations, but it allows the
current register to rematerialize them.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93158 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 1015b692473..a4939af1b8e 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -399,6 +399,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     OutMI.setOpcode(X86::MOVZX32rm16);
     lower_subreg32(&OutMI, 0);
     break;
+  case X86::MOV16r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOV64r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
   }
 }
 
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c795b62050e..e2a53d1118b 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1873,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     unsigned LoReg, HiReg, ClrReg;
     unsigned ClrOpcode, SExtOpcode;
-    EVT ClrVT = NVT;
     switch (NVT.getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unsupported VT!");
     case MVT::i8:
@@ -1883,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       break;
     case MVT::i16:
       LoReg = X86::AX;  HiReg = X86::DX;
-      ClrOpcode  = X86::MOV32r0;  ClrReg = X86::EDX;  ClrVT = MVT::i32;
+      ClrOpcode  = X86::MOV16r0; ClrReg = X86::DX;
       SExtOpcode = X86::CWD;
       break;
     case MVT::i32:
@@ -1893,7 +1892,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       break;
     case MVT::i64:
       LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
-      ClrOpcode  = ~0U; // NOT USED.
+      ClrOpcode  = X86::MOV64r0;
       SExtOpcode = X86::CQO;
       break;
     }
@@ -1932,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
       } else {
         // Zero out the high part, effectively zero extending the input.
-        SDValue ClrNode;
-
-        if (NVT.getSimpleVT() == MVT::i64) {
-          ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
-                            0);
-          // We just did a 32-bit clear, insert it into a 64-bit register to
-          // clear the whole 64-bit reg.
-          SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
-          SDValue SubRegNo =
-            CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
-          ClrNode =
-            SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
-                                           MVT::i64, Zero, ClrNode, SubRegNo),
-                    0);
-        } else {
-          ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0);
-        }
-
+        SDValue ClrNode =
+          SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
         InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
                                       ClrNode, InFlag).getValue(1);
       }
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index c6d32747c4e..1835c8feb62 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1598,17 +1598,20 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
 // Alias Instructions
 //===----------------------------------------------------------------------===//
 
-// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
-// equivalent due to implicit zero-extending, and it sometimes has a smaller
-// encoding.
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
 // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
 // when we have a better way to specify isel priority.
-let AddedComplexity = 1 in
-def : Pat<(i64 0),
-          (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
-
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+                 "",
+                 [(set GR64:$dst, 0)]>;
 
-// Materialize i64 constant where top 32-bits are zero.
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
 let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
                         "", [(set GR64:$dst, i64immZExt32:$src)]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 9600cffa91f..d4582134383 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1019,12 +1019,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
   switch (Opc) {
   default: break;
   case X86::MOV8r0:
-  case X86::MOV32r0: {
+  case X86::MOV16r0:
+  case X86::MOV32r0:
+  case X86::MOV64r0: {
     if (!isSafeToClobberEFLAGS(MBB, I)) {
       switch (Opc) {
       default: break;
       case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
+      case X86::MOV16r0: Opc = X86::MOV16ri; break;
       case X86::MOV32r0: Opc = X86::MOV32ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri; break;
       }
       Clone = false;
     }
@@ -2291,8 +2295,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
     isTwoAddrFold = true;
   } else if (i == 0) { // If operand 0
-    if (MI->getOpcode() == X86::MOV32r0)
+    if (MI->getOpcode() == X86::MOV64r0)
+      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV32r0)
       NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV16r0)
+      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
     else if (MI->getOpcode() == X86::MOV8r0)
       NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
     if (NewMI)
@@ -2560,7 +2568,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   } else if (OpNum == 0) { // If operand 0
     switch (Opc) {
     case X86::MOV8r0:
+    case X86::MOV16r0:
     case X86::MOV32r0:
+    case X86::MOV64r0:
       return true;
     default: break;
     }
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 3b8f6ec790e..341bfb2f747 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -3718,18 +3718,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
                  "xor{b}\t$dst, $dst",
                  [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
                  
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
                  "xor{l}\t$dst, $dst",
                  [(set GR32:$dst, 0)]>;
 }
 
-// Use xorl instead of xorw since we don't care about the high 16 bits,
-// it's smaller, and it avoids a partial-register update.
-let AddedComplexity = 1 in
-def : Pat<(i16 0),
-          (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;
-
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
 //
diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
new file mode 100644
index 00000000000..c4f768ca529
--- /dev/null
+++ b/test/CodeGen/X86/remat-mov-0.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {xorl	%edi, %edi} | count 4
+
+; CodeGen should remat the zero instead of spilling it.
+
+declare void @foo(i64 %p)
+
+define void @bar() nounwind {
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  ret void
+}