[SystemZ] Clean up warning

[oota-llvm.git] / lib / Target / PowerPC / PPCISelLowering.cpp
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index e17c4e675c69774289e3c448f87d0494a350115e..1258d96cf62fafdcf4cb9b7a0785b7a48c052541 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -57,9 +57,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
  // FIXME: Remove this once the bug has been fixed!
  extern cl::opt<bool> ANDIGlueBug;
  
-PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
-    : TargetLowering(TM),
-      Subtarget(*TM.getSubtargetImpl()) {
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
+                                     const PPCSubtarget &STI)
+    : TargetLowering(TM), Subtarget(STI) {
    // Use _setjmp/_longjmp instead of setjmp/longjmp.
    setUseUnderscoreSetJmp(true);
    setUseUnderscoreLongJmp(true);
@@ -88,11 +88,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
    setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
    setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
    setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
    setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
    setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
    setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
    setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
    setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
  
    if (Subtarget.useCRBits()) {
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -172,13 +176,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
  
    // If we're enabling GP optimizations, use hardware square root
    if (!Subtarget.hasFSQRT() &&
-      !(TM.Options.UnsafeFPMath &&
-        Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
+      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
+        Subtarget.hasFRE()))
      setOperationAction(ISD::FSQRT, MVT::f64, Expand);
  
    if (!Subtarget.hasFSQRT() &&
-      !(TM.Options.UnsafeFPMath &&
-        Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
+      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
+        Subtarget.hasFRES()))
      setOperationAction(ISD::FSQRT, MVT::f32, Expand);
  
    if (Subtarget.hasFCPSGN()) {
@@ -401,6 +405,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
        setOperationAction(ISD::ADD , VT, Legal);
        setOperationAction(ISD::SUB , VT, Legal);
  
+      // Vector instructions introduced in P8
+      if (Subtarget.hasP8Altivec()) {
+        setOperationAction(ISD::CTPOP, VT, Legal);
+        setOperationAction(ISD::CTLZ, VT, Legal);
+      }
+      else {
+        setOperationAction(ISD::CTPOP, VT, Expand);
+        setOperationAction(ISD::CTLZ, VT, Expand);
+      }
+
        // We promote all shuffles to v16i8.
        setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
        AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
@@ -455,8 +469,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
        setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
        setOperationAction(ISD::FPOW, VT, Expand);
        setOperationAction(ISD::BSWAP, VT, Expand);
-      setOperationAction(ISD::CTPOP, VT, Expand);
-      setOperationAction(ISD::CTLZ, VT, Expand);
        setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
        setOperationAction(ISD::CTTZ, VT, Expand);
        setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
@@ -593,6 +605,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
  
        addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
      }
+
+    if (Subtarget.hasP8Altivec()) 
+      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
    }
  
    if (Subtarget.has64BitSupport())
@@ -672,8 +687,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
  
    // With 32 condition bits, we don't need to sink (and duplicate) compares
    // aggressively in CodeGenPrep.
-  if (Subtarget.useCRBits())
+  if (Subtarget.useCRBits()) {
      setHasMultipleConditionRegisters();
+    setJumpIsExpensive();
+  }
  
    setMinFunctionAlignment(2);
    if (Subtarget.isDarwin())
@@ -791,8 +808,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::SHL:             return "PPCISD::SHL";
    case PPCISD::CALL:            return "PPCISD::CALL";
    case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
-  case PPCISD::CALL_TLS:        return "PPCISD::CALL_TLS";
-  case PPCISD::CALL_NOP_TLS:    return "PPCISD::CALL_NOP_TLS";
    case PPCISD::MTCTR:           return "PPCISD::MTCTR";
    case PPCISD::BCTRL:           return "PPCISD::BCTRL";
    case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
@@ -826,8 +841,12 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
    case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
    case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
+  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
    case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
    case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
+  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
+  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
    case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
    case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
    case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
@@ -877,7 +896,7 @@ static bool isConstantOrUndef(int Op, int Val) {
  /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
  bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                 SelectionDAG &DAG) {
-  bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
    if (ShuffleKind == 0) {
      if (IsLE)
        return false;
@@ -908,7 +927,7 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
  /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
  bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                 SelectionDAG &DAG) {
-  bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
    if (ShuffleKind == 0) {
      if (IsLE)
        return false;
@@ -963,7 +982,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
  /// the input operands are swapped (see PPCInstrAltivec.td).
  bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                               unsigned ShuffleKind, SelectionDAG &DAG) {
-  if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
      if (ShuffleKind == 1) // unary
        return isVMerge(N, UnitSize, 0, 0);
      else if (ShuffleKind == 2) // swapped
@@ -988,7 +1007,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
  /// the input operands are swapped (see PPCInstrAltivec.td).
  bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                               unsigned ShuffleKind, SelectionDAG &DAG) {
-  if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
      if (ShuffleKind == 1) // unary
        return isVMerge(N, UnitSize, 8, 8);
      else if (ShuffleKind == 2) // swapped
@@ -1032,8 +1051,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
    if (ShiftAmt < i) return -1;
  
    ShiftAmt -= i;
-  bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
-    isLittleEndian();
+  bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
  
    if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
      // Check the rest of the elements to see if they are consecutive.
@@ -1106,7 +1124,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
                                  SelectionDAG &DAG) {
    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
    assert(isSplatShuffleMask(SVOp, EltSize));
-  if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
+  if (DAG.getTarget().getDataLayout()->isLittleEndian())
      return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
    else
      return SVOp->getMaskElt(0) / EltSize;
@@ -1542,8 +1560,9 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
  
  /// GetLabelAccessInfo - Return true if we should reference labels using a
  /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
-static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
-                               unsigned &LoOpFlags,
+static bool GetLabelAccessInfo(const TargetMachine &TM,
+                               const PPCSubtarget &Subtarget,
+                               unsigned &HiOpFlags, unsigned &LoOpFlags,
                                 const GlobalValue *GV = nullptr) {
    HiOpFlags = PPCII::MO_HA;
    LoOpFlags = PPCII::MO_LO;
@@ -1558,7 +1577,7 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
  
    // If this is a reference to a global value that requires a non-lazy-ptr, make
    // sure that instruction lowering adds it.
-  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+  if (GV && Subtarget.hasLazyResolverStub(GV)) {
      HiOpFlags |= PPCII::MO_NLP_FLAG;
      LoOpFlags |= PPCII::MO_NLP_FLAG;
  
@@ -1590,6 +1609,15 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
    return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
  }
  
+static void setUsesTOCBasePtr(MachineFunction &MF) {
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setUsesTOCBasePtr();
+}
+
+static void setUsesTOCBasePtr(SelectionDAG &DAG) {
+  setUsesTOCBasePtr(DAG.getMachineFunction());
+}
+
  SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
                                               SelectionDAG &DAG) const {
    EVT PtrVT = Op.getValueType();
@@ -1599,13 +1627,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
    // 64-bit SVR4 ABI code is always position-independent.
    // The actual address of the GlobalValue is stored in the TOC.
    if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
      SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
      return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
    unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
  
    if (isPIC && Subtarget.isSVR4ABI()) {
      SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
@@ -1629,13 +1659,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
    // 64-bit SVR4 ABI code is always position-independent.
    // The actual address of the GlobalValue is stored in the TOC.
    if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
      SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
      return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
    unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
  
    if (isPIC && Subtarget.isSVR4ABI()) {
      SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
@@ -1659,39 +1691,20 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
    // 64-bit SVR4 ABI code is always position-independent.
    // The actual BlockAddress is stored in the TOC.
    if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
      SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
      return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
    unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
    SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
    SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
    return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
  }
  
-// Generate a call to __tls_get_addr for the given GOT entry Op.
-std::pair<SDValue,SDValue>
-PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
-                                SelectionDAG &DAG) const {
-
-  Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  Entry.Node = Op;
-  Entry.Ty = IntPtrTy;
-  Args.push_back(Entry);
-
-  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
-    .setCallee(CallingConv::C, IntPtrTy,
-               DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
-               std::move(Args), 0);
-
-  return LowerCallTo(CLI);
-}
-
  SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
  
@@ -1726,6 +1739,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                  PPCII::MO_TLS);
      SDValue GOTPtr;
      if (is64bit) {
+      setUsesTOCBasePtr(DAG);
        SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
        GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
                             PtrVT, GOTReg, TGA);
@@ -1737,10 +1751,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
    }
  
    if (Model == TLSModel::GeneralDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TLSGD);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
      SDValue GOTPtr;
      if (is64bit) {
+      setUsesTOCBasePtr(DAG);
        SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
        GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
                                     GOTReg, TGA);
@@ -1750,17 +1764,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
        else
          GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
      }
-    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
-                                   GOTPtr, TGA);
-    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
-    return CallResult.first;
+    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
+                       GOTPtr, TGA, TGA);
    }
  
    if (Model == TLSModel::LocalDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TLSLD);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
      SDValue GOTPtr;
      if (is64bit) {
+      setUsesTOCBasePtr(DAG);
        SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
        GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
                             GOTReg, TGA);
@@ -1770,13 +1782,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
        else
          GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
      }
-    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
-                                   GOTPtr, TGA);
-    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
-    SDValue TLSAddr = CallResult.first;
-    SDValue Chain = CallResult.second;
-    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
-                                      Chain, TLSAddr, TGA);
+    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
+                                  PtrVT, GOTPtr, TGA, TGA);
+    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
+                                      PtrVT, TLSAddr, TGA);
      return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
    }
  
@@ -1793,13 +1802,15 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
    // 64-bit SVR4 ABI code is always position-independent.
    // The actual address of the GlobalValue is stored in the TOC.
    if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
      SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
      return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
    unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
  
    if (isPIC && Subtarget.isSVR4ABI()) {
      SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
@@ -2338,10 +2349,9 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
  
  /// EnsureStackAlignment - Round stack frame size up from NumBytes to
  /// ensure minimum alignment required for target.
-static unsigned EnsureStackAlignment(const TargetMachine &Target,
+static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
                                       unsigned NumBytes) {
-  unsigned TargetAlign =
-      Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
+  unsigned TargetAlign = Lowering->getStackAlignment();
    unsigned AlignMask = TargetAlign - 1;
    NumBytes = (NumBytes + AlignMask) & ~AlignMask;
    return NumBytes;
@@ -2422,7 +2432,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                   *DAG.getContext());
  
    // Reserve space for the linkage area on the stack.
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
    CCInfo.AllocateStack(LinkageSize, PtrByteSize);
  
    CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2508,7 +2518,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
    // call optimized function's reserved stack space needs to be aligned so that
    // taking the difference between two stack areas will result in an aligned
    // stack.
-  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
    FuncInfo->setMinReservedArea(MinReservedArea);
  
    SmallVector<SDValue, 8> MemOps;
@@ -2623,14 +2634,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
    MachineFrameInfo *MFI = MF.getFrameInfo();
    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
  
+  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+         "fastcc not supported on varargs functions");
+
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    // Potential tail calls could cause overwriting of argument stack slots.
    bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                         (CallConv == CallingConv::Fast));
    unsigned PtrByteSize = 8;
-
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
-                                                          isELFv2ABI);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
  
    static const MCPhysReg GPR[] = {
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2674,7 +2686,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
    // although the first ones are often in registers.
  
    unsigned ArgOffset = LinkageSize;
-  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
    SmallVector<SDValue, 8> MemOps;
    Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
    unsigned CurArgIdx = 0;
@@ -2686,22 +2698,37 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      unsigned ObjSize = ObjectVT.getStoreSize();
      unsigned ArgSize = ObjSize;
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
-    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
-    CurArgIdx = Ins[ArgNo].OrigArgIndex;
+    if (Ins[ArgNo].isOrigArg()) {
+      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+    }
+    // We re-align the argument offset for each argument, except when using the
+    // fast calling convention, when we need to make sure we do that only when
+    // we'll actually use a stack slot.
+    unsigned CurArgOffset, Align;
+    auto ComputeArgOffset = [&]() {
+      /* Respect alignment of argument on the stack.  */
+      Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+      CurArgOffset = ArgOffset;
+    };
  
-    /* Respect alignment of argument on the stack.  */
-    unsigned Align =
-      CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
-    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
-    unsigned CurArgOffset = ArgOffset;
+    if (CallConv != CallingConv::Fast) {
+      ComputeArgOffset();
  
-    /* Compute GPR index associated with argument offset.  */
-    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
-    GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+      /* Compute GPR index associated with argument offset.  */
+      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+    }
  
      // FIXME the codegen can be much improved in some cases.
      // We do not have to keep everything in memory.
      if (Flags.isByVal()) {
+      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
+      if (CallConv == CallingConv::Fast)
+        ComputeArgOffset();
+
        // ObjSize is the true size, ArgSize rounded up to multiple of registers.
        ObjSize = Flags.getByValSize();
        ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -2745,7 +2772,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          InVals.push_back(Arg);
  
          if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            SDValue Store;
  
@@ -2807,7 +2834,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
        // passed directly.  Clang may use those instead of "byval" aggregate
        // types to avoid forcing arguments to memory unnecessarily.
        if (GPR_idx != Num_GPR_Regs) {
-        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
  
          if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
@@ -2815,10 +2842,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
            // value to MVT::i64 and then truncate to the correct register size.
            ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
        } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
+
          needsLoad = true;
          ArgSize = PtrByteSize;
        }
-      ArgOffset += 8;
+      if (CallConv != CallingConv::Fast || needsLoad)
+        ArgOffset += 8;
        break;
  
      case MVT::f32:
@@ -2832,17 +2863,20 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          if (ObjectVT == MVT::f32)
            VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
          else
-          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
-                                            &PPC::VSFRCRegClass :
-                                            &PPC::F8RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
+                                                ? &PPC::VSFRCRegClass
+                                                : &PPC::F8RCRegClass);
  
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
          ++FPR_idx;
-      } else if (GPR_idx != Num_GPR_Regs) {
+      } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
+        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+        // once we support fp <-> gpr moves.
+
          // This can only ever happen in the presence of f32 array types,
          // since otherwise we never run out of FPRs before running out
          // of GPRs.
-        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
  
          if (ObjectVT == MVT::f32) {
@@ -2854,16 +2888,21 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
  
          ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
        } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
+
          needsLoad = true;
        }
  
        // When passing an array of floats, the array occupies consecutive
        // space in the argument area; only round up to the next doubleword
        // at the end of the array.  Otherwise, each float takes 8 bytes.
-      ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
-      ArgOffset += ArgSize;
-      if (Flags.isInConsecutiveRegsLast())
-        ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      if (CallConv != CallingConv::Fast || needsLoad) {
+        ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+        ArgOffset += ArgSize;
+        if (Flags.isInConsecutiveRegsLast())
+          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      }
        break;
      case MVT::v4f32:
      case MVT::v4i32:
@@ -2881,9 +2920,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
          ++VR_idx;
        } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
+
          needsLoad = true;
        }
-      ArgOffset += 16;
+      if (CallConv != CallingConv::Fast || needsLoad)
+        ArgOffset += 16;
        break;
      }
  
@@ -2912,7 +2955,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
    // call optimized functions' reserved stack space needs to be aligned so that
    // taking the difference between two stack areas will result in an aligned
    // stack.
-  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
    FuncInfo->setMinReservedArea(MinReservedArea);
  
    // If the function takes variable number of arguments, make a frame index for
@@ -2966,9 +3010,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
    bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                         (CallConv == CallingConv::Fast));
    unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
-                                                          false);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
    unsigned ArgOffset = LinkageSize;
    // Area that is at least reserved in caller of this function.
    unsigned MinReservedArea = ArgOffset;
@@ -3062,9 +3104,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
      unsigned ArgSize = ObjSize;
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
-    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
-    CurArgIdx = Ins[ArgNo].OrigArgIndex;
-
+    if (Ins[ArgNo].isOrigArg()) {
+      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+    }
      unsigned CurArgOffset = ArgOffset;
  
      // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
@@ -3085,6 +3128,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
      // FIXME the codegen can be much improved in some cases.
      // We do not have to keep everything in memory.
      if (Flags.isByVal()) {
+      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
        // ObjSize is the true size, ArgSize rounded up to multiple of registers.
        ObjSize = Flags.getByValSize();
        ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -3273,7 +3318,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
    // call optimized functions' reserved stack space needs to be aligned so that
    // taking the difference between two stack areas will result in an aligned
    // stack.
-  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
    FuncInfo->setMinReservedArea(MinReservedArea);
  
    // If the function takes variable number of arguments, make a frame index for
@@ -3428,8 +3474,9 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
    if (SPDiff) {
      // Calculate the new stack slot for the return address.
      int SlotSize = isPPC64 ? 8 : 4;
-    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
-                                                                   isDarwinABI);
+    const PPCFrameLowering *FL =
+        MF.getSubtarget<PPCSubtarget>().getFrameLowering();
+    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
      int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
                                                            NewRetAddrLoc, true);
      EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
@@ -3441,8 +3488,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
      // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
      // slot as the FP is never overwritten.
      if (isDarwinABI) {
-      int NewFPLoc =
-        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
        int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
                                                            true);
        SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
@@ -3726,6 +3772,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
                                     MPI.getWithOffset(8), false, false,
                                     LoadsInv, 8);
  
+      setUsesTOCBasePtr(DAG);
        SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
                                          InFlag);
        Chain = TOCVal.getValue(0);
@@ -3764,23 +3811,6 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
    if (Callee.getNode()) {
      Ops.push_back(Chain);
      Ops.push_back(Callee);
-
-    // If this is a call to __tls_get_addr, find the symbol whose address
-    // is to be taken and add it to the list.  This will be used to 
-    // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
-    // We find the symbol by walking the chain to the CopyFromReg, walking
-    // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
-    // pulling the symbol from that node.
-    if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
-      if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
-        assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
-        SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
-        SDValue TGTAddr = AddI->getOperand(1);
-        assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
-               "Didn't find target global TLS address where we expected one");
-        Ops.push_back(TGTAddr);
-        CallOpc = PPCISD::CALL_TLS;
-      }
    }
    // If this is a tail call add stack pointer delta.
    if (isTailCall)
@@ -3792,9 +3822,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
      Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                    RegsToPass[i].second.getValueType()));
  
-  // Direct calls in the ELFv2 ABI need the TOC register live into the call.
-  if (Callee.getNode() && isELFv2ABI && !IsPatchPoint)
+  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+  // into the call.
+  if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
+    setUsesTOCBasePtr(DAG);
      Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+  }
  
    return CallOpc;
  }
@@ -3867,7 +3900,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                                SmallVectorImpl<SDValue> &InVals,
                                ImmutableCallSite *CS) const {
  
-  bool isELFv2ABI = Subtarget.isELFv2ABI();
    std::vector<EVT> NodeTys;
    SmallVector<SDValue, 8> Ops;
    unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
@@ -3886,8 +3918,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
       getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
  
    // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
    const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
    assert(Mask && "Missing call preserved mask for calling convention");
    Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3932,7 +3963,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
  
        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
        SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
-      unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
        SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
        SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
  
@@ -3941,12 +3972,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
        Ops.insert(std::next(Ops.begin()), AddTOC);
      } else if ((CallOpc == PPCISD::CALL) &&
                 (!isLocalCall(Callee) ||
-                DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+                DAG.getTarget().getRelocationModel() == Reloc::PIC_))
        // Otherwise insert NOP for non-local calls.
        CallOpc = PPCISD::CALL_NOP;
-    } else if (CallOpc == PPCISD::CALL_TLS)
-      // For 64-bit SVR4, TLS calls are always non-local.
-      CallOpc = PPCISD::CALL_NOP_TLS;
    }
  
    Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
@@ -4041,7 +4069,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                   *DAG.getContext());
  
    // Reserve space for the linkage area on the stack.
-  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
                         PtrByteSize);
  
    if (isVarArg) {
@@ -4270,13 +4298,39 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        CallConv == CallingConv::Fast)
      MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
  
+  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+         "fastcc not supported on varargs functions");
+
    // Count how many bytes are to be pushed on the stack, including the linkage
    // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
    // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
    // area is 32 bytes reserved space for [SP][CR][LR][TOC].
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
-                                                          isELFv2ABI);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
    unsigned NumBytes = LinkageSize;
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+  static const MCPhysReg GPR[] = {
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+  static const MCPhysReg *FPR = GetFPR();
+
+  static const MCPhysReg VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+  static const MCPhysReg VSRH[] = {
+    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+  };
+
+  const unsigned NumGPRs = array_lengthof(GPR);
+  const unsigned NumFPRs = 13;
+  const unsigned NumVRs  = array_lengthof(VR);
+
+  // When using the fast calling convention, we don't provide backing for
+  // arguments that will be in registers.
+  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
  
    // Add up all the space actually used.
    for (unsigned i = 0; i != NumOps; ++i) {
@@ -4284,6 +4338,35 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      EVT ArgVT = Outs[i].VT;
      EVT OrigVT = Outs[i].ArgVT;
  
+    if (CallConv == CallingConv::Fast) {
+      if (Flags.isByVal())
+        NumGPRsUsed += (Flags.getByValSize()+7)/8;
+      else
+        switch (ArgVT.getSimpleVT().SimpleTy) {
+        default: llvm_unreachable("Unexpected ValueType for argument!");
+        case MVT::i1:
+        case MVT::i32:
+        case MVT::i64:
+          if (++NumGPRsUsed <= NumGPRs)
+            continue;
+          break;
+        case MVT::f32:
+        case MVT::f64:
+          if (++NumFPRsUsed <= NumFPRs)
+            continue;
+          break;
+        case MVT::v4f32:
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v16i8:
+        case MVT::v2f64:
+        case MVT::v2i64:
+          if (++NumVRsUsed <= NumVRs)
+            continue;
+          break;
+        }
+    }
+
      /* Respect alignment of argument on the stack.  */
      unsigned Align =
        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
@@ -4307,7 +4390,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
    // Tail call needs the stack to be aligned.
    if (getTargetMachine().Options.GuaranteedTailCallOpt &&
        CallConv == CallingConv::Fast)
-    NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
  
    // Calculate by how many bytes the stack has to be adjusted in case of tail
    // call optimization.
@@ -4340,26 +4423,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
    // must be stored to our stack, and loaded into integer regs as well, if
    // any integer regs are available for argument passing.
    unsigned ArgOffset = LinkageSize;
-  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
-
-  static const MCPhysReg GPR[] = {
-    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
-    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
-  };
-  static const MCPhysReg *FPR = GetFPR();
-
-  static const MCPhysReg VR[] = {
-    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
-    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
-  };
-  static const MCPhysReg VSRH[] = {
-    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
-    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
-  };
-
-  const unsigned NumGPRs = array_lengthof(GPR);
-  const unsigned NumFPRs = 13;
-  const unsigned NumVRs  = array_lengthof(VR);
  
    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
    SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -4371,22 +4434,31 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      EVT ArgVT = Outs[i].VT;
      EVT OrigVT = Outs[i].ArgVT;
  
-    /* Respect alignment of argument on the stack.  */
-    unsigned Align =
-      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
-    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
-
-    /* Compute GPR index associated with argument offset.  */
-    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
-    GPR_idx = std::min(GPR_idx, NumGPRs);
-
      // PtrOff will be used to store the current argument to the stack if a
      // register cannot be found for it.
      SDValue PtrOff;
  
-    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    // We re-align the argument offset for each argument, except when using the
+    // fast calling convention, when we need to make sure we do that only when
+    // we'll actually use a stack slot.
+    auto ComputePtrOff = [&]() {
+      /* Respect alignment of argument on the stack.  */
+      unsigned Align =
+        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
  
-    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+      PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+    };
+
+    if (CallConv != CallingConv::Fast) {
+      ComputePtrOff();
+
+      /* Compute GPR index associated with argument offset.  */
+      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+      GPR_idx = std::min(GPR_idx, NumGPRs);
+    }
  
      // Promote integers to 64-bit values.
      if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
@@ -4411,6 +4483,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        if (Size == 0)
          continue;
  
+      if (CallConv == CallingConv::Fast)
+        ComputePtrOff();
+
        // All aggregates smaller than 8 bytes must be passed right-justified.
        if (Size==1 || Size==2 || Size==4) {
          EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4419,7 +4494,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                          MachinePointerInfo(), VT,
                                          false, false, false, 0);
            MemOpChains.push_back(Load.getValue(1));
-          RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
  
            ArgOffset += PtrByteSize;
            continue;
@@ -4481,7 +4556,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                     MachinePointerInfo(),
                                     false, false, false, 0);
          MemOpChains.push_back(Load.getValue(1));
-        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
  
          // Done with this argument.
          ArgOffset += PtrByteSize;
@@ -4517,13 +4592,19 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        // passed directly.  Clang may use those instead of "byval" aggregate
        // types to avoid forcing arguments to memory unnecessarily.
        if (GPR_idx != NumGPRs) {
-        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
        } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                           true, isTailCall, false, MemOpChains,
                           TailCallArguments, dl);
+        if (CallConv == CallingConv::Fast)
+          ArgOffset += PtrByteSize;
        }
-      ArgOffset += PtrByteSize;
+      if (CallConv != CallingConv::Fast)
+        ArgOffset += PtrByteSize;
        break;
      case MVT::f32:
      case MVT::f64: {
@@ -4537,6 +4618,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        // then the parameter save area.  For now, put all arguments to vararg
        // routines always in both locations (FPR *and* GPR or stack slot).
        bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+      bool NeededLoad = false;
  
        // First load the argument into the next available FPR.
        if (FPR_idx != NumFPRs)
@@ -4545,7 +4627,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        // Next, load the argument into GPR or stack slot if needed.
        if (!NeedGPROrStack)
          ;
-      else if (GPR_idx != NumGPRs) {
+      else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
+        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+        // once we support fp <-> gpr moves.
+
          // In the non-vararg case, this can only ever happen in the
          // presence of f32 array types, since otherwise we never run
          // out of FPRs before running out of GPRs.
@@ -4584,8 +4669,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
            ArgVal = SDValue();
  
          if (ArgVal.getNode())
-          RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
        } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
          // Single-precision floating-point values are mapped to the
          // second (rightmost) word of the stack doubleword.
          if (Arg.getValueType() == MVT::f32 &&
@@ -4597,14 +4685,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                           true, isTailCall, false, MemOpChains,
                           TailCallArguments, dl);
+
+        NeededLoad = true;
        }
        // When passing an array of floats, the array occupies consecutive
        // space in the argument area; only round up to the next doubleword
        // at the end of the array.  Otherwise, each float takes 8 bytes.
-      ArgOffset += (Arg.getValueType() == MVT::f32 &&
-                    Flags.isInConsecutiveRegs()) ? 4 : 8;
-      if (Flags.isInConsecutiveRegsLast())
-        ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      if (CallConv != CallingConv::Fast || NeededLoad) {
+        ArgOffset += (Arg.getValueType() == MVT::f32 &&
+                      Flags.isInConsecutiveRegs()) ? 4 : 8;
+        if (Flags.isInConsecutiveRegsLast())
+          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      }
        break;
      }
      case MVT::v4f32:
@@ -4663,11 +4755,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
  
          RegsToPass.push_back(std::make_pair(VReg, Arg));
        } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                           true, isTailCall, true, MemOpChains,
                           TailCallArguments, dl);
+        if (CallConv == CallingConv::Fast)
+          ArgOffset += 16;
        }
-      ArgOffset += 16;
+
+      if (CallConv != CallingConv::Fast)
+        ArgOffset += 16;
        break;
      }
    }
@@ -4685,9 +4784,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        !isFunctionGlobalAddress(Callee) &&
        !isa<ExternalSymbolSDNode>(Callee)) {
      // Load r2 into a virtual register and store it to the TOC save area.
+    setUsesTOCBasePtr(DAG);
      SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
      // TOC save area offset.
-    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+    unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
      SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
      Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
@@ -4749,8 +4849,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    // Count how many bytes are to be pushed on the stack, including the linkage
    // area, and parameter passing area.  We start with 24/48 bytes, which is
    // prereserved space for [SP][CR][LR][3 x unused].
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
-                                                          false);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
    unsigned NumBytes = LinkageSize;
  
    // Add up all the space actually used.
@@ -4795,7 +4894,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    // Tail call needs the stack to be aligned.
    if (getTargetMachine().Options.GuaranteedTailCallOpt &&
        CallConv == CallingConv::Fast)
-    NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
  
    // Calculate by how many bytes the stack has to be adjusted in case of tail
    // call optimization.
@@ -5208,7 +5307,6 @@ SDValue
  PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
    MachineFunction &MF = DAG.getMachineFunction();
    bool isPPC64 = Subtarget.isPPC64();
-  bool isDarwinABI = Subtarget.isDarwinABI();
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
  
    // Get current frame pointer save index.  The users of this index will be
@@ -5219,7 +5317,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
    // If the frame pointer save index hasn't been defined yet.
    if (!RASI) {
      // Find out what the fix offset of the frame pointer save area.
-    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+    int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
      // Allocate the frame index for frame pointer save area.
      RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
      // Save the result.
@@ -5232,7 +5330,6 @@ SDValue
  PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
    MachineFunction &MF = DAG.getMachineFunction();
    bool isPPC64 = Subtarget.isPPC64();
-  bool isDarwinABI = Subtarget.isDarwinABI();
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
  
    // Get current frame pointer save index.  The users of this index will be
@@ -5243,9 +5340,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
    // If the frame pointer save index hasn't been defined yet.
    if (!FPSI) {
      // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
-                                                           isDarwinABI);
-
+    int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
      // Allocate the frame index for frame pointer save area.
      FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
      // Save the result.
@@ -5451,10 +5546,11 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
    switch (Op.getSimpleValueType().SimpleTy) {
    default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
    case MVT::i32:
-    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
-                        (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
-                                                   PPCISD::FCTIDZ),
-                      dl, MVT::f64, Src);
+    Tmp = DAG.getNode(
+        Op.getOpcode() == ISD::FP_TO_SINT
+            ? PPCISD::FCTIWZ
+            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+        dl, MVT::f64, Src);
      break;
    case MVT::i64:
      assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
@@ -5594,13 +5690,14 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
  
    // If we have FCFIDS, then use it when converting to single-precision.
    // Otherwise, convert to double-precision and then round.
-  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDU : PPCISD::FCFID);
-  MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   MVT::f32 : MVT::f64;
+  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+                                                            : PPCISD::FCFIDS)
+                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+                                                            : PPCISD::FCFID);
+  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                  ? MVT::f32
+                  : MVT::f64;
  
    if (Op.getOperand(0).getValueType() == MVT::i64) {
      SDValue SINT = Op.getOperand(0);
@@ -6706,7 +6803,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                             SmallVectorImpl<SDValue>&Results,
                                             SelectionDAG &DAG) const {
-  const TargetMachine &TM = getTargetMachine();
    SDLoc dl(N);
    switch (N->getOpcode()) {
    default:
@@ -6737,8 +6833,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
      break;
    }
    case ISD::VAARG: {
-    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
-        || TM.getSubtarget<PPCSubtarget>().isPPC64())
+    if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
        return;
  
      EVT VT = N->getValueType(0);
@@ -6820,8 +6915,7 @@ MachineBasicBlock *
  PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
                                      bool is64bit, unsigned BinOpcode) const {
    // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  
    const BasicBlock *LLVM_BB = BB->getBasicBlock();
    MachineFunction *F = BB->getParent();
@@ -6882,8 +6976,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
                                              bool is8bit,    // operation
                                              unsigned BinOpcode) const {
    // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
    // In 64 bit mode we have to use 64 bits for addresses, even though the
    // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
    // registers without caring whether they're 32 or 64, but here we're
@@ -7010,8 +7103,7 @@ llvm::MachineBasicBlock*
  PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
                                      MachineBasicBlock *MBB) const {
    DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  
    MachineFunction *MF = MBB->getParent();
    MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7084,6 +7176,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
    unsigned BufReg = MI->getOperand(1).getReg();
  
    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+    setUsesTOCBasePtr(*MBB->getParent());
      MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
              .addReg(PPC::X2)
              .addImm(TOCOffset)
@@ -7094,23 +7187,21 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
    // Naked functions never have a base pointer, and so we use r1. For all
    // other functions, this decision must be delayed until during PEI.
    unsigned BaseReg;
-  if (MF->getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::Naked))
+  if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
      BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
    else
      BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
  
    MIB = BuildMI(*thisMBB, MI, DL,
                  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
-          .addReg(BaseReg)
-          .addImm(BPOffset)
-          .addReg(BufReg);
+            .addReg(BaseReg)
+            .addImm(BPOffset)
+            .addReg(BufReg);
    MIB.setMemRefs(MMOBegin, MMOEnd);
  
    // Setup
    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
-  const PPCRegisterInfo *TRI =
-      getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
+  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
    MIB.addRegMask(TRI->getNoPreservedMask());
  
    BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -7124,8 +7215,9 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
  
    // mainMBB:
    //  mainDstReg = 0
-  MIB = BuildMI(mainMBB, DL,
-    TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+  MIB =
+      BuildMI(mainMBB, DL,
+              TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
  
    // Store IP
    if (Subtarget.isPPC64()) {
@@ -7159,8 +7251,7 @@ MachineBasicBlock *
  PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
                                       MachineBasicBlock *MBB) const {
    DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  
    MachineFunction *MF = MBB->getParent();
    MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7179,10 +7270,13 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
    // Since FP is only updated here but NOT referenced, it's treated as GPR.
    unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
    unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
-  unsigned BP  = (PVT == MVT::i64) ? PPC::X30 :
-                  (Subtarget.isSVR4ABI() &&
-                   MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
-                     PPC::R29 : PPC::R30);
+  unsigned BP =
+      (PVT == MVT::i64)
+          ? PPC::X30
+          : (Subtarget.isSVR4ABI() &&
+                     MF->getTarget().getRelocationModel() == Reloc::PIC_
+                 ? PPC::R29
+                 : PPC::R30);
  
    MachineInstrBuilder MIB;
  
@@ -7245,6 +7339,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
  
    // Reload TOC
    if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
+    setUsesTOCBasePtr(*MBB->getParent());
      MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
              .addImm(TOCOffset)
              .addReg(BufReg);
@@ -7265,8 +7360,20 @@ MachineBasicBlock *
  PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
    if (MI->getOpcode() == TargetOpcode::STACKMAP ||
-      MI->getOpcode() == TargetOpcode::PATCHPOINT)
+      MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
+        MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+      // Call lowering should have added an r2 operand to indicate a dependence
+      // on the TOC base pointer value. It can't however, because there is no
+      // way to mark the dependence as implicit there, and so the stackmap code
+      // will confuse it with a regular operand. Instead, add the dependence
+      // here.
+      setUsesTOCBasePtr(*BB->getParent());
+      MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
+    }
+
      return emitPatchPoint(MI, BB);
+  }
  
    if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
        MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
@@ -7276,8 +7383,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      return emitEHSjLjLongJmp(MI, BB);
    }
  
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  
    // To "insert" these instructions we actually have to insert their
    // control-flow patterns.
@@ -7288,9 +7394,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    MachineFunction *F = BB->getParent();
  
    if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
-                                 MI->getOpcode() == PPC::SELECT_CC_I8 ||
-                                 MI->getOpcode() == PPC::SELECT_I4 ||
-                                 MI->getOpcode() == PPC::SELECT_I8)) {
+                              MI->getOpcode() == PPC::SELECT_CC_I8 ||
+                              MI->getOpcode() == PPC::SELECT_I4 ||
+                              MI->getOpcode() == PPC::SELECT_I8)) {
      SmallVector<MachineOperand, 2> Cond;
      if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
          MI->getOpcode() == PPC::SELECT_CC_I8)
@@ -7300,8 +7406,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      Cond.push_back(MI->getOperand(1));
  
      DebugLoc dl = MI->getDebugLoc();
-    const TargetInstrInfo *TII =
-        getTargetMachine().getSubtargetImpl()->getInstrInfo();
      TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
                        Cond, MI->getOperand(2).getReg(),
                        MI->getOperand(3).getReg());
@@ -7762,7 +7866,7 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
                                              bool &UseOneConstNR) const {
    EVT VT = Operand.getValueType();
    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
        (VT == MVT::v2f64 && Subtarget.hasVSX())) {
      // Convergence is quadratic, so we essentially double the number of digits
@@ -7783,7 +7887,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
                                              unsigned &RefinementSteps) const {
    EVT VT = Operand.getValueType();
    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+      (VT == MVT::f64 && Subtarget.hasFRE()) ||
        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
        (VT == MVT::v2f64 && Subtarget.hasVSX())) {
      // Convergence is quadratic, so we essentially double the number of digits
@@ -7995,8 +8099,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
    SelectionDAG &DAG = DCI.DAG;
    SDLoc dl(N);
  
-  assert(Subtarget.useCRBits() &&
-         "Expecting to be tracking CR bits");
+  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
    // If we're tracking CR bits, we need to be careful that we don't have:
    //   trunc(binary-ops(zext(x), zext(y)))
    // or
@@ -8292,10 +8395,8 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
        N->getValueType(0) != MVT::i64)
      return SDValue();
  
-  if (!((N->getOperand(0).getValueType() == MVT::i1 &&
-        Subtarget.useCRBits()) ||
-       (N->getOperand(0).getValueType() == MVT::i32 &&
-        Subtarget.isPPC64())))
+  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
+        (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
      return SDValue();
  
    if (N->getOperand(0).getOpcode() != ISD::AND &&
@@ -8580,13 +8681,14 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
  
    // If we have FCFIDS, then use it when converting to single-precision.
    // Otherwise, convert to double-precision and then round.
-  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDU : PPCISD::FCFID);
-  MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   MVT::f32 : MVT::f64;
+  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+                                                            : PPCISD::FCFIDS)
+                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+                                                            : PPCISD::FCFID);
+  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                  ? MVT::f32
+                  : MVT::f64;
  
    // If we're converting from a float, to an int, and back to a float again,
    // then we don't need the store/load pair at all.
@@ -8719,7 +8821,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
  
  SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
-  const TargetMachine &TM = getTargetMachine();
    SelectionDAG &DAG = DCI.DAG;
    SDLoc dl(N);
    switch (N->getOpcode()) {
@@ -8756,8 +8857,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      return combineFPToIntToFP(N, DCI);
    case ISD::STORE: {
      // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
-    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
-        !cast<StoreSDNode>(N)->isTruncatingStore() &&
+    if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
          N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
          N->getOperand(1).getValueType() == MVT::i32 &&
          N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
@@ -8788,8 +8888,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          N->getOperand(1).getNode()->hasOneUse() &&
          (N->getOperand(1).getValueType() == MVT::i32 ||
           N->getOperand(1).getValueType() == MVT::i16 ||
-         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
-          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
            N->getOperand(1).getValueType() == MVT::i64))) {
        SDValue BSwapOp = N->getOperand(1).getOperand(0);
        // Do an any-extend to 32-bits if this is a half-word input.
@@ -8810,8 +8909,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      EVT VT = N->getOperand(1).getValueType();
      if (VT.isSimple()) {
        MVT StoreVT = VT.getSimpleVT();
-      if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-          TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
            (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
             StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
          return expandVSXStoreForLE(N, DCI);
@@ -8825,8 +8923,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      // For little endian, VSX loads require generating lxvd2x/xxswapd.
      if (VT.isSimple()) {
        MVT LoadVT = VT.getSimpleVT();
-      if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-          TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
            (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
             LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
          return expandVSXLoadForLE(N, DCI);
@@ -8834,12 +8931,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
  
      Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
      unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
-    if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
-        TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+    if (ISD::isNON_EXTLoad(N) && VT.isVector() && Subtarget.hasAltivec() &&
          // P8 and later hardware should just use LOAD.
-        !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
-        (VT == MVT::v16i8 || VT == MVT::v8i16 ||
-         VT == MVT::v4i32 || VT == MVT::v4f32) &&
+        !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+                                     VT == MVT::v4i32 || VT == MVT::v4f32) &&
          LD->getAlignment() < ABIAlignment) {
        // This is a type-legal unaligned Altivec load.
        SDValue Chain = LD->getChain();
@@ -8950,40 +9045,41 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      }
      }
      break;
-  case ISD::INTRINSIC_WO_CHAIN: {
-    bool isLittleEndian = Subtarget.isLittleEndian();
-    Intrinsic::ID Intr = (isLittleEndian ?
-                          Intrinsic::ppc_altivec_lvsr :
-                          Intrinsic::ppc_altivec_lvsl);
-    if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
-        N->getOperand(1)->getOpcode() == ISD::ADD) {
-      SDValue Add = N->getOperand(1);
-
-      if (DAG.MaskedValueIsZero(Add->getOperand(1),
-            APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
-              Add.getValueType().getScalarType().getSizeInBits()))) {
-        SDNode *BasePtr = Add->getOperand(0).getNode();
-        for (SDNode::use_iterator UI = BasePtr->use_begin(),
-             UE = BasePtr->use_end(); UI != UE; ++UI) {
-          if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
-              cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
-                Intr) {
-            // We've found another LVSL/LVSR, and this address is an aligned
-            // multiple of that one. The results will be the same, so use the
-            // one we've just found instead.
-
-            return SDValue(*UI, 0);
+    case ISD::INTRINSIC_WO_CHAIN: {
+      bool isLittleEndian = Subtarget.isLittleEndian();
+      Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
+                                           : Intrinsic::ppc_altivec_lvsl);
+      if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
+          N->getOperand(1)->getOpcode() == ISD::ADD) {
+        SDValue Add = N->getOperand(1);
+
+        if (DAG.MaskedValueIsZero(
+                Add->getOperand(1),
+                APInt::getAllOnesValue(4 /* 16 byte alignment */)
+                    .zext(
+                        Add.getValueType().getScalarType().getSizeInBits()))) {
+          SDNode *BasePtr = Add->getOperand(0).getNode();
+          for (SDNode::use_iterator UI = BasePtr->use_begin(),
+                                    UE = BasePtr->use_end();
+               UI != UE; ++UI) {
+            if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+                cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+                    Intr) {
+              // We've found another LVSL/LVSR, and this address is an aligned
+              // multiple of that one. The results will be the same, so use the
+              // one we've just found instead.
+
+              return SDValue(*UI, 0);
+            }
            }
          }
        }
      }
-    }
  
      break;
    case ISD::INTRINSIC_W_CHAIN: {
      // For little endian, VSX loads require generating lxvd2x/xxswapd.
-    if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-        TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
        switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
        default:
          break;
@@ -8996,8 +9092,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
    }
    case ISD::INTRINSIC_VOID: {
      // For little endian, VSX stores require generating xxswapd/stxvd2x.
-    if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-        TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
        switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
        default:
          break;
@@ -9013,8 +9108,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
          N->getOperand(0).hasOneUse() &&
          (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
-         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
-          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
            N->getValueType(0) == MVT::i64))) {
        SDValue Load = N->getOperand(0);
        LoadSDNode *LD = cast<LoadSDNode>(Load);
@@ -9305,9 +9399,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
      if (!ML)
        break;
  
-    const PPCInstrInfo *TII =
-      static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()->
-                                          getInstrInfo());
+    const PPCInstrInfo *TII = Subtarget.getInstrInfo();
  
      // For small loops (between 5 and 8 instructions), align to a 32-byte
      // boundary so that the entire loop fits in one instruction-cache line.
@@ -9457,8 +9549,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
    // the AsmName field from *RegisterInfo.td, then this would not be necessary.
    if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
        PPC::GPRCRegClass.contains(R.first)) {
-    const TargetRegisterInfo *TRI =
-        getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
      return std::make_pair(TRI->getMatchingSuperReg(R.first,
                              PPC::sub_32, &PPC::G8RCRegClass),
                            &PPC::G8RCRegClass);
@@ -9602,14 +9693,12 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
    FuncInfo->setLRStoreRequired();
    bool isPPC64 = Subtarget.isPPC64();
-  bool isDarwinABI = Subtarget.isDarwinABI();
  
    if (Depth > 0) {
      SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
      SDValue Offset =
-
-      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
-                      isPPC64? MVT::i64 : MVT::i32);
+        DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(),
+                        isPPC64 ? MVT::i64 : MVT::i32);
      return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                         DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                     FrameAddr, Offset),
@@ -9637,8 +9726,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
    // Naked functions never have a frame pointer, and so we use r1. For all
    // other functions, this decision must be delayed until during PEI.
    unsigned FrameReg;
-  if (MF.getFunction()->getAttributes().hasAttribute(
-        AttributeSet::FunctionIndex, Attribute::Naked))
+  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
      FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
    else
      FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
@@ -9666,7 +9754,7 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
    bool is64Bit = isPPC64 && VT == MVT::i64;
    unsigned Reg = StringSwitch<unsigned>(RegName)
                     .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
-                   .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
+                   .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
                     .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
                                    (is64Bit ? PPC::X13 : PPC::R13))
                     .Default(0);
@@ -9910,16 +9998,8 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
    // site. Hence we include LR in the scratch registers, which are in turn added
    // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
    // to CTR, which is used by any indirect call.
-  if (Subtarget.isELFv2ABI()) {
-    static const MCPhysReg ScratchRegs[] = {
-      PPC::X12, PPC::LR8, PPC::CTR8, 0
-    };
-
-    return ScratchRegs;
-  }
-
    static const MCPhysReg ScratchRegs[] = {
-    PPC::X12, PPC::X11, PPC::LR8, PPC::CTR8, 0
+    PPC::X12, PPC::LR8, PPC::CTR8, 0
    };
  
    return ScratchRegs;