Remove more dead LR-as-GPR PPC code
[oota-llvm.git] / lib / Target / PowerPC / PPCInstrInfo.cpp
index 8b868e69ecaa489d635831db7d410ce3843fff11..f2a9da5a8e16338ed32914e7da1db90f3fb55330 100644 (file)
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
+#include "PPCHazardRecognizers.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "PPCGenInstrInfo.inc"
 
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
 using namespace llvm;
 
 static cl::
-opt<bool> EnableCTRLoopAnal("enable-ppc-ctrloop-analysis", cl::Hidden,
-            cl::desc("Enable analysis for CTR loops (experimental)"));
+opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
+            cl::desc("Disable analysis for CTR loops"));
 
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
@@ -54,12 +49,13 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
   const TargetMachine *TM,
   const ScheduleDAG *DAG) const {
   unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
-  if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) {
+  if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
+      Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
     const InstrItineraryData *II = TM->getInstrItineraryData();
     return new PPCScoreboardHazardRecognizer(II, DAG);
   }
 
-  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
 }
 
 /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -70,7 +66,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
   unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
 
   // Most subtargets use a PPC970 recognizer.
-  if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) {
+  if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
+      Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
     const TargetInstrInfo *TII = TM.getInstrInfo();
     assert(TII && "No InstrInfo?");
 
@@ -79,6 +76,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
 
   return new PPCScoreboardHazardRecognizer(II, DAG);
 }
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                         unsigned &SrcReg, unsigned &DstReg,
+                                         unsigned &SubIdx) const {
+  switch (MI.getOpcode()) {
+  default: return false;
+  case PPC::EXTSW:
+  case PPC::EXTSW_32_64:
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SubIdx = PPC::sub_32;
+    return true;
+  }
+}
+
 unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                            int &FrameIndex) const {
   switch (MI->getOpcode()) {
@@ -123,7 +136,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 
   // Normal instructions can be commuted the obvious way.
   if (MI->getOpcode() != PPC::RLWIMI)
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
 
   // Cannot commute if it has a non-zero rotate count.
   if (MI->getOperand(3).getImm() != 0)
@@ -233,7 +246,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                LastInst->getOpcode() == PPC::BDNZ) {
       if (!LastInst->getOperand(0).isMBB())
         return true;
-      if (!EnableCTRLoopAnal)
+      if (DisableCTRLoopAnal)
         return true;
       TBB = LastInst->getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(1));
@@ -244,7 +257,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                LastInst->getOpcode() == PPC::BDZ) {
       if (!LastInst->getOperand(0).isMBB())
         return true;
-      if (!EnableCTRLoopAnal)
+      if (DisableCTRLoopAnal)
         return true;
       TBB = LastInst->getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(0));
@@ -282,7 +295,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     if (!SecondLastInst->getOperand(0).isMBB() ||
         !LastInst->getOperand(0).isMBB())
       return true;
-    if (!EnableCTRLoopAnal)
+    if (DisableCTRLoopAnal)
       return true;
     TBB = SecondLastInst->getOperand(0).getMBB();
     Cond.push_back(MachineOperand::CreateImm(1));
@@ -296,7 +309,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     if (!SecondLastInst->getOperand(0).isMBB() ||
         !LastInst->getOperand(0).isMBB())
       return true;
-    if (!EnableCTRLoopAnal)
+    if (DisableCTRLoopAnal)
       return true;
     TBB = SecondLastInst->getOperand(0).getMBB();
     Cond.push_back(MachineOperand::CreateImm(0));
@@ -426,40 +439,19 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                   unsigned SrcReg, bool isKill,
                                   int FrameIdx,
                                   const TargetRegisterClass *RC,
-                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
+                                  SmallVectorImpl<MachineInstr*> &NewMIs,
+                                  bool &NonRI, bool &SpillsVRS) const{
   DebugLoc DL;
   if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
-    if (SrcReg != PPC::LR) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    } else {
-      // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use R11, which we know cannot be used in the prolog/epilog.  This is
-      // a hack.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(PPC::R11,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
   } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
-    if (SrcReg != PPC::LR8) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    } else {
-      // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use X11, which we know cannot be used in the prolog/epilog.  This is
-      // a hack.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
-                                         .addReg(PPC::X11,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
   } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
                                        .addReg(SrcReg,
@@ -471,47 +463,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                getKillRegState(isKill)),
                                        FrameIdx));
   } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
-    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
-        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-      return true;
-    } else {
-      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
-      // it's possible for the stack frame to be so big the save location is
-      // out of range of immediate offsets, necessitating another register.
-      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
-      // at the moment.
-
-      bool is64Bit = TM.getSubtargetImpl()->isPPC64();
-      // We need to store the CR in the low 4-bits of the saved value.  First,
-      // issue a MFCR to save all of the CRBits.
-      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                              (is64Bit ? PPC::X2 : PPC::R2) :
-                              (is64Bit ? PPC::X0 : PPC::R0);
-      NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
-                                             PPC::MFCRpseud), ScratchReg)
-                               .addReg(SrcReg, getKillRegState(isKill)));
-
-      // If the saved register wasn't CR0, shift the bits left so that they are
-      // in CR0's slot.
-      if (SrcReg != PPC::CR0) {
-        unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
-        // rlwinm scratch, scratch, ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
-                           PPC::RLWINM), ScratchReg)
-                       .addReg(ScratchReg).addImm(ShiftBits)
-                       .addImm(0).addImm(31));
-      }
-
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
-                                           PPC::STW8 : PPC::STW))
-                                         .addReg(ScratchReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    return true;
   } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
     // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
     // backend currently only uses CR1EQ as an individual bit, this should
@@ -544,20 +500,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       Reg = PPC::CR7;
 
     return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
-                               &PPC::CRRCRegClass, NewMIs);
+                               &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
 
   } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
-    // We don't have indexed addressing for vector loads.  Emit:
-    // R0 = ADDI FI#
-    // STVX VAL, 0, R0
-    //
-    // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
-                                       FrameIdx, 0, 0));
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
-                     .addReg(SrcReg, getKillRegState(isKill))
-                     .addReg(PPC::R0)
-                     .addReg(PPC::R0));
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+    assert(TM.getSubtargetImpl()->isDarwin() &&
+           "VRSAVE only needs spill/restore on Darwin");
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    SpillsVRS = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -574,10 +532,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   SmallVector<MachineInstr*, 4> NewMIs;
 
-  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
-    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setHasSpills();
+
+  bool NonRI = false, SpillsVRS = false;
+  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+                          NonRI, SpillsVRS))
     FuncInfo->setSpillsCR();
-  }
+
+  if (SpillsVRS)
+    FuncInfo->setSpillsVRSAVE();
+
+  if (NonRI)
+    FuncInfo->setHasNonRISpills();
 
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
@@ -595,25 +562,14 @@ bool
 PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                    unsigned DestReg, int FrameIdx,
                                    const TargetRegisterClass *RC,
-                                   SmallVectorImpl<MachineInstr*> &NewMIs)const{
+                                   SmallVectorImpl<MachineInstr*> &NewMIs,
+                                   bool &NonRI, bool &SpillsVRS) const{
   if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
-    if (DestReg != PPC::LR) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                                 DestReg), FrameIdx));
-    } else {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                                 PPC::R11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                               DestReg), FrameIdx));
   } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
-    if (DestReg != PPC::LR8) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
-                                         FrameIdx));
-    } else {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
-                                                 PPC::X11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+                                       FrameIdx));
   } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
                                        FrameIdx));
@@ -621,37 +577,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
                                        FrameIdx));
   } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
-    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
-        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
-                                                 get(PPC::RESTORE_CR), DestReg)
-                                         , FrameIdx));
-      return true;
-    } else {
-      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
-      // it's possible for the stack frame to be so big the save location is
-      // out of range of immediate offsets, necessitating another register.
-      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
-      // at the moment.
-      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                                                            PPC::R2 : PPC::R0;
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                         ScratchReg), FrameIdx));
-  
-      // If the reloaded register isn't CR0, shift the bits right so that they are
-      // in the right CR's slot.
-      if (DestReg != PPC::CR0) {
-        unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
-        // rlwinm r11, r11, 32-ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
-                      .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
-                      .addImm(31));
-      }
-  
-      NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
-                         PPC::MTCRF8 : PPC::MTCRF), DestReg)
-                       .addReg(ScratchReg));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+                                               get(PPC::RESTORE_CR), DestReg),
+                                       FrameIdx));
+    return true;
   } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
 
     unsigned Reg = 0;
@@ -681,18 +610,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
       Reg = PPC::CR7;
 
     return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
-                                &PPC::CRRCRegClass, NewMIs);
+                                &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
 
   } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
-    // We don't have indexed addressing for vector loads.  Emit:
-    // R0 = ADDI FI#
-    // Dest = LVX 0, R0
-    //
-    // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
-                                       FrameIdx, 0, 0));
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
-                     .addReg(PPC::R0));
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+    assert(TM.getSubtargetImpl()->isDarwin() &&
+           "VRSAVE only needs spill/restore on Darwin");
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+                                               get(PPC::RESTORE_VRSAVE),
+                                               DestReg),
+                                       FrameIdx));
+    SpillsVRS = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -710,10 +641,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   SmallVector<MachineInstr*, 4> NewMIs;
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
-  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
-    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setHasSpills();
+
+  bool NonRI = false, SpillsVRS = false;
+  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+                           NonRI, SpillsVRS))
     FuncInfo->setSpillsCR();
-  }
+
+  if (SpillsVRS)
+    FuncInfo->setSpillsVRSAVE();
+
+  if (NonRI)
+    FuncInfo->setHasNonRISpills();
+
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
 
@@ -762,8 +704,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case PPC::GC_LABEL:
   case PPC::DBG_VALUE:
     return 0;
-  case PPC::BL8_NOP_ELF:
-  case PPC::BLA8_NOP_ELF:
+  case PPC::BL8_NOP:
+  case PPC::BLA8_NOP:
     return 8;
   default:
     return 4; // PowerPC instructions are all 4 bytes