Mark most PPC register classes to avoid write-after-write.

[oota-llvm.git] / lib / Target / PowerPC / PPCISelLowering.cpp
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 8f623b859b55451c3fa395a8fcd6f573896f47ff..48feb98be9bd61d9a99e26f7de298691ddf67d00 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14,28 +14,26 @@
  #include "PPCISelLowering.h"
  #include "PPCMachineFunctionInfo.h"
  #include "PPCPerfectShuffle.h"
-#include "PPCPredicates.h"
  #include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
  #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/VectorExtras.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
  #include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetOptions.h"
  using namespace llvm;
  
  static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
@@ -53,9 +51,11 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
                                                ISD::ArgFlagsTy &ArgFlags,
                                                CCState &State);
  
-static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
-cl::desc("enable preincrement load/store generation on PPC (experimental)"),
-                                     cl::Hidden);
+static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
+cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
  
  static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
    if (TM.getSubtargetImpl()->isDarwin())
@@ -78,9 +78,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
  
    // Set up the register classes.
-  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
-  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
-  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
+  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
  
    // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
    setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -104,6 +104,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    // from FP_ROUND:  that rounds to nearest, this rounds to zero.
    setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
  
+  // We do not currently implment this libm ops for PowerPC.
+  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+
    // PowerPC has no SREM/UREM instructions
    setOperationAction(ISD::SREM, MVT::i32, Expand);
    setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -125,10 +132,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::FCOS , MVT::f64, Expand);
    setOperationAction(ISD::FREM , MVT::f64, Expand);
    setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Expand);
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);
    setOperationAction(ISD::FREM , MVT::f32, Expand);
    setOperationAction(ISD::FPOW , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Expand);
  
    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
  
@@ -145,9 +154,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
    setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
    setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
    setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
  
    // PowerPC does not have ROTR
    setOperationAction(ISD::ROTR, MVT::i32   , Expand);
@@ -209,16 +222,30 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::TRAP, MVT::Other, Legal);
  
    // TRAMPOLINE is custom lowered.
-  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
  
    // VASTART needs to be custom lowered to use the VarArgsFrameIndex
    setOperationAction(ISD::VASTART           , MVT::Other, Custom);
  
-  // VAARG is custom lowered with the 32-bit SVR4 ABI.
-  if (    TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
-      && !TM.getSubtarget<PPCSubtarget>().isPPC64())
-    setOperationAction(ISD::VAARG, MVT::Other, Custom);
-  else
+  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+    if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+      // VAARG always uses double-word chunks, so promote anything smaller.
+      setOperationAction(ISD::VAARG, MVT::i1, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::i8, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::i16, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::i32, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::Other, Expand);
+    } else {
+      // VAARG is custom lowered with the 32-bit SVR4 ABI.
+      setOperationAction(ISD::VAARG, MVT::Other, Custom);
+      setOperationAction(ISD::VAARG, MVT::i64, Custom);
+    }
+  } else
      setOperationAction(ISD::VAARG, MVT::Other, Expand);
  
    // Use the default implementation.
@@ -267,7 +294,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  
    if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
      // 64-bit PowerPC implementations can support i64 types directly
-    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
      // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
      setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
      // 64-bit PowerPC wants to expand i128 shifts itself.
@@ -329,7 +356,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
        setOperationAction(ISD::FPOW, VT, Expand);
        setOperationAction(ISD::CTPOP, VT, Expand);
        setOperationAction(ISD::CTLZ, VT, Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
        setOperationAction(ISD::CTTZ, VT, Expand);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
      }
  
      // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -343,10 +372,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
      setOperationAction(ISD::STORE , MVT::v4i32, Legal);
  
-    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
-    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
-    addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
-    addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
+    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
+    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
+    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
  
      setOperationAction(ISD::MUL, MVT::v4f32, Legal);
      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
@@ -362,8 +391,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
    }
  
-  setShiftAmountType(MVT::i32);
+  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport())
+    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+
+  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
+  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
    setBooleanContents(ZeroOrOneBooleanContent);
+  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
  
    if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
      setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -395,17 +430,34 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
    }
  
+  setMinFunctionAlignment(2);
+  if (PPCSubTarget.isDarwin())
+    setPrefFunctionAlignment(4);
+
+  setInsertFencesForAtomic(true);
+
+  setSchedulingPreference(Sched::Hybrid);
+
    computeRegisterProperties();
  }
  
  /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
  /// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
    const TargetMachine &TM = getTargetMachine();
    // Darwin passes everything on 4 byte boundary.
    if (TM.getSubtarget<PPCSubtarget>().isDarwin())
      return 4;
-  // FIXME SVR4 TBD
+
+  // 16byte and wider vectors are passed on 16byte boundary.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    if (VTy->getBitWidth() >= 128)
+      return 16;
+
+  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
+   if (PPCSubTarget.isPPC64())
+     return 8;
+
    return 4;
  }
  
@@ -434,6 +486,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
    case PPCISD::STD_32:          return "PPCISD::STD_32";
    case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
+  case PPCISD::CALL_NOP_SVR4:   return "PPCISD::CALL_NOP_SVR4";
    case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
    case PPCISD::NOP:             return "PPCISD::NOP";
    case PPCISD::MTCTR:           return "PPCISD::MTCTR";
@@ -457,18 +510,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    }
  }
  
-MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
    return MVT::i32;
  }
  
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const {
-  if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin())
-    return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4;
-  else
-    return 2;
-}
-
  //===----------------------------------------------------------------------===//
  // Node matching predicates, for use by the tblgen matching code.
  //===----------------------------------------------------------------------===//
@@ -817,14 +862,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
      APInt LHSKnownZero, LHSKnownOne;
      APInt RHSKnownZero, RHSKnownOne;
      DAG.ComputeMaskedBits(N.getOperand(0),
-                          APInt::getAllOnesValue(N.getOperand(0)
-                            .getValueSizeInBits()),
                            LHSKnownZero, LHSKnownOne);
  
      if (LHSKnownZero.getBoolValue()) {
        DAG.ComputeMaskedBits(N.getOperand(1),
-                            APInt::getAllOnesValue(N.getOperand(1)
-                              .getValueSizeInBits()),
                              RHSKnownZero, RHSKnownOne);
        // If all of the bits are known zero on the LHS or RHS, the add won't
        // carry.
@@ -863,10 +904,11 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
        return true; // [r+i]
      } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
        // Match LOAD (ADD (X, Lo(G))).
-     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
               && "Cannot handle constant offsets yet!");
        Disp = N.getOperand(1).getOperand(0);  // The global address.
        assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
               Disp.getOpcode() == ISD::TargetConstantPool ||
               Disp.getOpcode() == ISD::TargetJumpTable);
        Base = N.getOperand(0);
@@ -879,10 +921,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
        // (for better address arithmetic) if the LHS and RHS of the OR are
        // provably disjoint.
        APInt LHSKnownZero, LHSKnownOne;
-      DAG.ComputeMaskedBits(N.getOperand(0),
-                            APInt::getAllOnesValue(N.getOperand(0)
-                                                   .getValueSizeInBits()),
-                            LHSKnownZero, LHSKnownOne);
+      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
  
        if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
          // If all of the bits are known zero on the LHS or RHS, the add won't
@@ -900,7 +939,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
      short Imm;
      if (isIntS16Immediate(CN, Imm)) {
        Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
-      Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                             CN->getValueType(0));
        return true;
      }
  
@@ -948,7 +988,8 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
    }
  
    // Otherwise, do it the hard way, using R0 as the base register.
-  Base = DAG.getRegister(PPC::R0, N.getValueType());
+  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                         N.getValueType());
    Index = N;
    return true;
  }
@@ -968,7 +1009,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
    if (N.getOpcode() == ISD::ADD) {
      short imm = 0;
      if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
-      Disp =  DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+      Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
        if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
        } else {
@@ -977,7 +1018,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
        return true; // [r+i]
      } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
        // Match LOAD (ADD (X, Lo(G))).
-     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
               && "Cannot handle constant offsets yet!");
        Disp = N.getOperand(1).getOperand(0);  // The global address.
        assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
@@ -993,10 +1034,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
        // (for better address arithmetic) if the LHS and RHS of the OR are
        // provably disjoint.
        APInt LHSKnownZero, LHSKnownOne;
-      DAG.ComputeMaskedBits(N.getOperand(0),
-                            APInt::getAllOnesValue(N.getOperand(0)
-                                                   .getValueSizeInBits()),
-                            LHSKnownZero, LHSKnownOne);
+      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
        if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
          // If all of the bits are known zero on the LHS or RHS, the add won't
          // carry.
@@ -1013,7 +1051,8 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
        short Imm;
        if (isIntS16Immediate(CN, Imm)) {
          Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
-        Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                               CN->getValueType(0));
          return true;
        }
  
@@ -1048,8 +1087,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
                                                    SDValue &Offset,
                                                    ISD::MemIndexedMode &AM,
                                                    SelectionDAG &DAG) const {
-  // Disabled by default for now.
-  if (!EnablePPCPreinc) return false;
+  if (DisablePPCPreinc) return false;
  
    SDValue Ptr;
    EVT VT;
@@ -1067,7 +1105,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
    if (VT.isVector())
      return false;
  
-  // TODO: Check reg+reg first.
+  if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+    if (isa<StoreSDNode>(N)) {
+      AM = ISD::PRE_INC;
+      return true;
+    }
+
+    // FIXME: reg+reg preinc loads
+    return false;
+  }
  
    // LDU/STU use reg+imm*4, others use reg+imm.
    if (VT != MVT::i64) {
@@ -1186,6 +1232,30 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
    return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
  }
  
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  DebugLoc dl = GA->getDebugLoc();
+  const GlobalValue *GV = GA->getGlobal();
+  EVT PtrVT = getPointerTy();
+  bool is64bit = PPCSubTarget.isPPC64();
+
+  TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                             PPCII::MO_TPREL16_HA);
+  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                             PPCII::MO_TPREL16_LO);
+
+  if (model != TLSModel::LocalExec)
+    llvm_unreachable("only local-exec TLS mode supported");
+  SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+                                   is64bit ? MVT::i64 : MVT::i32);
+  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+}
+
  SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
    EVT PtrVT = Op.getValueType();
@@ -1215,7 +1285,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
    // extra load to get the address of the global.
    if (MOHiFlag & PPCII::MO_NLP_FLAG)
      Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
-                      false, false, 0);
+                      false, false, false, 0);
    return Ptr;
  }
  
@@ -1264,13 +1334,119 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
  
  SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                                        const PPCSubtarget &Subtarget) const {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  DebugLoc dl = Node->getDebugLoc();
+
+  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+  // gpr_index
+  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = GprIndex.getValue(1);
+
+  if (VT == MVT::i64) {
+    // Check if GprIndex is even
+    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+                                 DAG.getConstant(1, MVT::i32));
+    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
+    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+                                          DAG.getConstant(1, MVT::i32));
+    // Align GprIndex to be even if it isn't
+    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+                           GprIndex);
+  }
  
-  llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
-  return SDValue(); // Not reached
+  // fpr index is 1 byte after gpr
+  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                               DAG.getConstant(1, MVT::i32));
+
+  // fpr
+  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = FprIndex.getValue(1);
+
+  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                       DAG.getConstant(8, MVT::i32));
+
+  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                        DAG.getConstant(4, MVT::i32));
+
+  // areas
+  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+                                     MachinePointerInfo(), false, false,
+                                     false, 0);
+  InChain = OverflowArea.getValue(1);
+
+  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+                                    MachinePointerInfo(), false, false,
+                                    false, 0);
+  InChain = RegSaveArea.getValue(1);
+
+  // select overflow_area if index > 8
+  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+  // adjustment constant gpr_index * 4/8
+  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+                                    VT.isInteger() ? GprIndex : FprIndex,
+                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                                    MVT::i32));
+
+  // OurReg = RegSaveArea + RegConstant
+  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+                               RegConstant);
+
+  // Floating types are 32 bytes into RegSaveArea
+  if (VT.isFloatingPoint())
+    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+                         DAG.getConstant(32, MVT::i32));
+
+  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   VT.isInteger() ? GprIndex : FprIndex,
+                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+                                                   MVT::i32));
+
+  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+                              VT.isInteger() ? VAListPtr : FprPtr,
+                              MachinePointerInfo(SV),
+                              MVT::i8, false, false, 0);
+
+  // determine if we should load from reg_save_area or overflow_area
+  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+  // increase overflow_area by 4/8 if gpr/fpr > 8
+  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                          MVT::i32));
+
+  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+                             OverflowAreaPlusN);
+
+  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+                              OverflowAreaPtr,
+                              MachinePointerInfo(),
+                              MVT::i32, false, false, 0);
+
+  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 
+                     false, false, false, 0);
  }
  
-SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
-                                           SelectionDAG &DAG) const {
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+                                                SelectionDAG &DAG) const {
    SDValue Chain = Op.getOperand(0);
    SDValue Trmp = Op.getOperand(1); // trampoline
    SDValue FPtr = Op.getOperand(2); // nested function
@@ -1279,7 +1455,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    bool isPPC64 = (PtrVT == MVT::i64);
-  const Type *IntPtrTy =
+  Type *IntPtrTy =
      DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
                                                               *DAG.getContext());
  
@@ -1298,17 +1474,18 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
    Entry.Node = Nest; Args.push_back(Entry);
  
    // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
-  std::pair<SDValue, SDValue> CallResult =
-    LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
-                false, false, false, false, 0, CallingConv::C, false,
-                /*isReturnValueUsed=*/true,
+  TargetLowering::CallLoweringInfo CLI(Chain,
+                                       Type::getVoidTy(*DAG.getContext()),
+                                       false, false, false, false, 0,
+                                       CallingConv::C,
+                /*isTailCall=*/false,
+                                       /*doesNotRet=*/false,
+                                       /*isReturnValueUsed=*/true,
                  DAG.getExternalSymbol("__trampoline_setup", PtrVT),
                  Args, DAG, dl);
+  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
  
-  SDValue Ops[] =
-    { CallResult.first, CallResult.second };
-
-  return DAG.getMergeValues(Ops, 2, dl);
+  return CallResult.second;
  }
  
  SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
@@ -1422,7 +1599,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
                                              CCValAssign::LocInfo &LocInfo,
                                              ISD::ArgFlagsTy &ArgFlags,
                                              CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
    };
@@ -1449,7 +1626,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
                                                CCValAssign::LocInfo &LocInfo,
                                                ISD::ArgFlagsTy &ArgFlags,
                                                CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
      PPC::F8
    };
@@ -1473,8 +1650,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
  
  /// GetFPR - Get the set of FP registers that should be allocated for arguments,
  /// on Darwin.
-static const unsigned *GetFPR() {
-  static const unsigned FPR[] = {
+static const uint16_t *GetFPR() {
+  static const uint16_t FPR[] = {
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
      PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
    };
@@ -1555,13 +1732,14 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                       (CallConv == CallingConv::Fast));
    unsigned PtrByteSize = 4;
  
    // Assign locations to all of the incoming arguments.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
-                 *DAG.getContext());
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
  
    // Reserve space for the linkage area on the stack.
    CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
@@ -1573,31 +1751,31 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
  
      // Arguments stored in registers.
      if (VA.isRegLoc()) {
-      TargetRegisterClass *RC;
+      const TargetRegisterClass *RC;
        EVT ValVT = VA.getValVT();
  
        switch (ValVT.getSimpleVT().SimpleTy) {
          default:
            llvm_unreachable("ValVT not supported by formal arguments Lowering");
          case MVT::i32:
-          RC = PPC::GPRCRegisterClass;
+          RC = &PPC::GPRCRegClass;
            break;
          case MVT::f32:
-          RC = PPC::F4RCRegisterClass;
+          RC = &PPC::F4RCRegClass;
            break;
          case MVT::f64:
-          RC = PPC::F8RCRegisterClass;
+          RC = &PPC::F8RCRegClass;
            break;
          case MVT::v16i8:
          case MVT::v8i16:
          case MVT::v4i32:
          case MVT::v4f32:
-          RC = PPC::VRRCRegisterClass;
+          RC = &PPC::VRRCRegClass;
            break;
        }
  
        // Transform the arguments stored in physical registers into virtual ones.
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
        SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
  
        InVals.push_back(ArgValue);
@@ -1613,7 +1791,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
        InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
                                     MachinePointerInfo(),
-                                   false, false, 0));
+                                   false, false, false, 0));
      }
    }
  
@@ -1621,8 +1799,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
    // Aggregates passed by value are stored in the local variable space of the
    // caller's stack frame, right above the parameter list area.
    SmallVector<CCValAssign, 16> ByValArgLocs;
-  CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(),
-                      ByValArgLocs, *DAG.getContext());
+  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
  
    // Reserve stack space for the allocations in CCInfo.
    CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -1654,13 +1832,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
    // If the function takes variable number of arguments, make a frame index for
    // the start of the first vararg value... for expansion of llvm.va_start.
    if (isVarArg) {
-    static const unsigned GPArgRegs[] = {
+    static const uint16_t GPArgRegs[] = {
        PPC::R3, PPC::R4, PPC::R5, PPC::R6,
        PPC::R7, PPC::R8, PPC::R9, PPC::R10,
      };
      const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
  
-    static const unsigned FPArgRegs[] = {
+    static const uint16_t FPArgRegs[] = {
        PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
        PPC::F8
      };
@@ -1689,7 +1867,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
        // Get an existing live-in vreg, or add a new one.
        unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
        if (!VReg)
-        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl);
+        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
  
        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1708,7 +1886,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
        // Get an existing live-in vreg, or add a new one.
        unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
        if (!VReg)
-        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl);
+        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
  
        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1745,25 +1923,26 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    bool isPPC64 = PtrVT == MVT::i64;
    // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                       (CallConv == CallingConv::Fast));
    unsigned PtrByteSize = isPPC64 ? 8 : 4;
  
    unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
    // Area that is at least reserved in caller of this function.
    unsigned MinReservedArea = ArgOffset;
  
-  static const unsigned GPR_32[] = {           // 32-bit registers.
+  static const uint16_t GPR_32[] = {           // 32-bit registers.
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
    };
-  static const unsigned GPR_64[] = {           // 64-bit registers.
+  static const uint16_t GPR_64[] = {           // 64-bit registers.
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
      PPC::X7, PPC::X8, PPC::X9, PPC::X10,
    };
  
-  static const unsigned *FPR = GetFPR();
+  static const uint16_t *FPR = GetFPR();
  
-  static const unsigned VR[] = {
+  static const uint16_t VR[] = {
      PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
      PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
    };
@@ -1774,7 +1953,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
  
    unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
  
-  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
  
    // In 32-bit non-varargs functions, the stack space for vectors is after the
    // stack space for non-vectors.  We do not use this space unless we have
@@ -1788,12 +1967,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
      for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
           ++ArgNo) {
        EVT ObjectVT = Ins[ArgNo].VT;
-      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
        ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
  
        if (Flags.isByVal()) {
          // ObjSize is the true size, ArgSize rounded up to multiple of regs.
-        ObjSize = Flags.getByValSize();
+        unsigned ObjSize = Flags.getByValSize();
          unsigned ArgSize =
                  ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
          VecArgOffset += ArgSize;
@@ -1872,7 +2050,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
        InVals.push_back(FIN);
        if (ObjSize==1 || ObjSize==2) {
          if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                              MachinePointerInfo(),
@@ -1891,7 +2073,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
          // to memory.  ArgVal will be address of the beginning of
          // the object.
          if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
            int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -1914,7 +2100,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
      case MVT::i32:
        if (!isPPC64) {
          if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
            ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
            ++GPR_idx;
          } else {
@@ -1928,7 +2114,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
        // FALLTHROUGH
      case MVT::i64:  // PPC64
        if (GPR_idx != Num_GPR_Regs) {
-        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
  
          if (ObjectVT == MVT::i32) {
@@ -1966,9 +2152,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
          unsigned VReg;
  
          if (ObjectVT == MVT::f32)
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
          else
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
  
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
          ++FPR_idx;
@@ -1986,7 +2172,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
        // Note that vector arguments in registers don't reserve stack space,
        // except in varargs functions.
        if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl);
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
          if (isVarArg) {
            while ((ArgOffset % 16) != 0) {
@@ -2022,7 +2208,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                        isImmutable);
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
        ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
-                           false, false, 0);
+                           false, false, false, 0);
      }
  
      InVals.push_back(ArgVal);
@@ -2064,9 +2250,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
        unsigned VReg;
  
        if (isPPC64)
-        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
        else
-        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
  
        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2143,9 +2329,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                        PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
  
    // Tail call needs the stack to be aligned.
-  if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
-    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
-      getStackAlignment();
+  if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+    unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+      getFrameLowering()->getStackAlignment();
      unsigned AlignMask = TargetAlign-1;
      NumBytes = (NumBytes + AlignMask) & ~AlignMask;
    }
@@ -2154,7 +2340,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
  }
  
  /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
-/// adjusted to accomodate the arguments for the tailcall.
+/// adjusted to accommodate the arguments for the tailcall.
  static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
                                     unsigned ParamSize) {
  
@@ -2179,7 +2365,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                       bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg> &Ins,
                                                       SelectionDAG& DAG) const {
-  if (!GuaranteedTailCallOpt)
+  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
      return false;
  
    // Variable argument functions are not supported.
@@ -2327,7 +2513,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
      EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
      LROpOut = getReturnAddrFrameIndex(DAG);
      LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
-                          false, false, 0);
+                          false, false, false, 0);
      Chain = SDValue(LROpOut.getNode(), 1);
  
      // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
@@ -2335,7 +2521,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
      if (isDarwinABI) {
        FPOpOut = getFramePointerFrameIndex(DAG);
        FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
        Chain = SDValue(FPOpOut.getNode(), 1);
      }
    }
@@ -2395,7 +2581,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
    // Emit a sequence of copyto/copyfrom virtual registers for arguments that
    // might overwrite each other in case of tail call optimization.
    SmallVector<SDValue, 8> MemOpChains2;
-  // Do not flag preceeding copytoreg stuff together with the following stuff.
+  // Do not flag preceding copytoreg stuff together with the following stuff.
    InFlag = SDValue();
    StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
                                      MemOpChains2, dl);
@@ -2443,7 +2629,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
      if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
        unsigned OpFlags = 0;
        if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
-          PPCSubTarget.getDarwinVers() < 9 &&
+          (PPCSubTarget.getTargetTriple().isMacOSX() &&
+           PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
            (G->getGlobal()->isDeclaration() ||
             G->getGlobal()->isWeakForLinker())) {
          // PC-relative references to external symbols should go through $stub,
@@ -2466,7 +2653,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
      unsigned char OpFlags = 0;
  
      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
-        PPCSubTarget.getDarwinVers() < 9) {
+        (PPCSubTarget.getTargetTriple().isMacOSX() &&
+         PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
        // PC-relative references to external symbols should go through $stub,
        // unless we're building with the leopard linker or later, which
        // automatically synthesizes these stubs.
@@ -2562,7 +2750,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
      Callee.setNode(0);
      // Add CTR register as callee so a bctr can be emitted later.
      if (isTailCall)
-      Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT));
+      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
    }
  
    // If this is a direct call, pass the chain and the callee.
@@ -2591,8 +2779,8 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                     SmallVectorImpl<SDValue> &InVals) const {
  
    SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
-                    RVLocs, *DAG.getContext());
+  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                    getTargetMachine(), RVLocs, *DAG.getContext());
    CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
  
    // Copy all of the result registers out of their specified physreg.
@@ -2630,7 +2818,14 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
    // the stack. Account for this here so these bytes can be pushed back on in
    // PPCRegisterInfo::eliminateCallFramePseudoInstr.
    int BytesCalleePops =
-    (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+    (CallConv == CallingConv::Fast &&
+     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
  
    if (InFlag.getNode())
      Ops.push_back(InFlag);
@@ -2641,8 +2836,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
      // to the liveout set for the function.
      if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
        SmallVector<CCValAssign, 16> RVLocs;
-      CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
-                     *DAG.getContext());
+      CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                     getTargetMachine(), RVLocs, *DAG.getContext());
        CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
        for (unsigned i = 0; i != RVLocs.size(); ++i)
          DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
@@ -2658,9 +2853,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
      return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
    }
  
-  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
    // Add a NOP immediately after the branch instruction when using the 64-bit
    // SVR4 ABI. At link time, if caller and callee are in a different module and
    // thus have a different TOC, the call will be replaced with a call to a stub
@@ -2669,8 +2861,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
    // which restores the TOC of the caller from the TOC save slot of the current
    // stack frame. If caller and callee belong to the same module (and have the
    // same TOC), the NOP will remain unchanged.
+
+  bool needsTOCRestore = false;
    if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
-    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
      if (CallOpc == PPCISD::BCTRL_SVR4) {
        // This is a call through a function pointer.
        // Restore the caller TOC from the save area into R2.
@@ -2681,14 +2874,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
        // since r2 is a reserved register (which prevents the register allocator
        // from allocating it), resulting in an additional register being
        // allocated and an unnecessary move instruction being generated.
-      Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
-      InFlag = Chain.getValue(1);
-    } else {
+      needsTOCRestore = true;
+    } else if (CallOpc == PPCISD::CALL_SVR4) {
        // Otherwise insert NOP.
-      InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+      CallOpc = PPCISD::CALL_NOP_SVR4;
      }
    }
  
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  if (needsTOCRestore) {
+    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+    Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                               DAG.getIntPtrConstant(BytesCalleePops, true),
                               InFlag);
@@ -2700,14 +2901,19 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
  }
  
  SDValue
-PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                             CallingConv::ID CallConv, bool isVarArg,
-                             bool &isTailCall,
-                             const SmallVectorImpl<ISD::OutputArg> &Outs,
-                             const SmallVectorImpl<SDValue> &OutVals,
-                             const SmallVectorImpl<ISD::InputArg> &Ins,
-                             DebugLoc dl, SelectionDAG &DAG,
+PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG                     = CLI.DAG;
+  DebugLoc &dl                          = CLI.DL;
+  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
+  SDValue Chain                         = CLI.Chain;
+  SDValue Callee                        = CLI.Callee;
+  bool &isTailCall                      = CLI.IsTailCall;
+  CallingConv::ID CallConv              = CLI.CallConv;
+  bool isVarArg                         = CLI.IsVarArg;
+
    if (isTailCall)
      isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                     Ins, DAG);
@@ -2746,7 +2952,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
    // and restoring the callers stack pointer in this functions epilog. This is
    // done because by tail calling the called function might overwrite the value
    // in this function's (MF) stack pointer stack slot 0(SP).
-  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv == CallingConv::Fast)
      MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
  
    // Count how many bytes are to be pushed on the stack, including the linkage
@@ -2755,8 +2962,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
  
    // Assign locations to all of the outgoing arguments.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
-                 ArgLocs, *DAG.getContext());
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
  
    // Reserve space for the linkage area on the stack.
    CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
@@ -2795,8 +3002,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
  
    // Assign locations to all of the outgoing aggregate by value arguments.
    SmallVector<CCValAssign, 16> ByValArgLocs;
-  CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs,
-                      *DAG.getContext());
+  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
  
    // Reserve stack space for the allocations in CCInfo.
    CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -2832,6 +3039,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
    SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
    SmallVector<SDValue, 8> MemOpChains;
  
+  bool seenFloatArg = false;
    // Walk the register/memloc assignments, inserting copies/loads.
    for (unsigned i = 0, j = 0, e = ArgLocs.size();
         i != e;
@@ -2876,6 +3084,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
      }
  
      if (VA.isRegLoc()) {
+      seenFloatArg |= VA.getLocVT().isFloatingPoint();
        // Put argument in a physical register.
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
      } else {
@@ -2902,6 +3111,14 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                          &MemOpChains[0], MemOpChains.size());
  
+  // Set CR6 to true if this is a vararg call with floating args passed in
+  // registers.
+  if (isVarArg) {
+    SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
+                                     dl, MVT::i32), 0);
+    RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+  }
+
    // Build a sequence of copy-to-reg nodes chained together with token chain
    // and flag operands which copy the outgoing args into the appropriate regs.
    SDValue InFlag;
@@ -2911,13 +3128,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
      InFlag = Chain.getValue(1);
    }
  
-  // Set CR6 to true if this is a vararg call.
-  if (isVarArg) {
-    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
    if (isTailCall)
      PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
                      false, TailCallArguments);
@@ -2950,7 +3160,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    // and restoring the callers stack pointer in this functions epilog. This is
    // done because by tail calling the called function might overwrite the value
    // in this function's (MF) stack pointer stack slot 0(SP).
-  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv == CallingConv::Fast)
      MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
  
    unsigned nAltivecParamsAtEnd = 0;
@@ -2999,17 +3210,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
    unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
  
-  static const unsigned GPR_32[] = {           // 32-bit registers.
+  static const uint16_t GPR_32[] = {           // 32-bit registers.
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
    };
-  static const unsigned GPR_64[] = {           // 64-bit registers.
+  static const uint16_t GPR_64[] = {           // 64-bit registers.
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
      PPC::X7, PPC::X8, PPC::X9, PPC::X10,
    };
-  static const unsigned *FPR = GetFPR();
+  static const uint16_t *FPR = GetFPR();
  
-  static const unsigned VR[] = {
+  static const uint16_t VR[] = {
      PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
      PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
    };
@@ -3017,7 +3228,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    const unsigned NumFPRs = 13;
    const unsigned NumVRs  = array_lengthof(VR);
  
-  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
  
    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
    SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -3091,7 +3302,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
          if (GPR_idx != NumGPRs) {
            SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
                                       MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
            MemOpChains.push_back(Load.getValue(1));
            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
            ArgOffset += PtrByteSize;
@@ -3129,7 +3340,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
            // Float varargs are always shadowed in available integer registers
            if (GPR_idx != NumGPRs) {
              SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
-                                       MachinePointerInfo(), false, false, 0);
+                                       MachinePointerInfo(), false, false,
+                                       false, 0);
              MemOpChains.push_back(Load.getValue(1));
              RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
            }
@@ -3138,7 +3350,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
              PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
              SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
                                         MachinePointerInfo(),
-                                       false, false, 0);
+                                       false, false, false, 0);
              MemOpChains.push_back(Load.getValue(1));
              RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
            }
@@ -3187,7 +3399,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
          if (VR_idx != NumVRs) {
            SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
                                       MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
            MemOpChains.push_back(Load.getValue(1));
            RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
          }
@@ -3198,7 +3410,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
            SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                    DAG.getConstant(i, PtrVT));
            SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
            MemOpChains.push_back(Load.getValue(1));
            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
          }
@@ -3295,6 +3507,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                      Ins, InVals);
  }
  
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+                                  MachineFunction &MF, bool isVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+                 RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
  SDValue
  PPCTargetLowering::LowerReturn(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
@@ -3303,8 +3526,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
                                 DebugLoc dl, SelectionDAG &DAG) const {
  
    SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
-                 RVLocs, *DAG.getContext());
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), RVLocs, *DAG.getContext());
    CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
  
    // If this is the first return lowered for this function, add the regs to the
@@ -3351,7 +3574,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
    // Load the old link SP.
    SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
                                     MachinePointerInfo(),
-                                   false, false, 0);
+                                   false, false, false, 0);
  
    // Restore the stack pointer.
    Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
@@ -3542,7 +3765,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
      FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                          DAG.getConstant(4, FIPtr.getValueType()));
    return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
  }
  
  SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -3586,7 +3809,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
                              Ops, 4, MVT::i64, MMO);
    // Load the value as a double.
    SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
-                           false, false, 0);
+                           false, false, false, 0);
  
    // FCFID it and return it.
    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3638,7 +3861,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
    SDValue Four = DAG.getConstant(4, PtrVT);
    SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
    SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
  
    // Transform as necessary
    SDValue CWD1 =
@@ -4104,8 +4327,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
  
    // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
    // perfect shuffle table to emit an optimal matching sequence.
-  SmallVector<int, 16> PermMask;
-  SVOp->getMask(PermMask);
+  ArrayRef<int> PermMask = SVOp->getMask();
  
    unsigned PFIndexes[4];
    bool isFourElementShuffle = true;
@@ -4309,7 +4531,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                 false, false, 0);
    // Load it out.
    return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
  }
  
  SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -4379,10 +4601,11 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
    case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress:   llvm_unreachable("TLS not implemented for PPC");
+  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
    case ISD::SETCC:              return LowerSETCC(Op, DAG);
-  case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
+  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
+  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
    case ISD::VASTART:
      return LowerVASTART(Op, DAG, PPCSubTarget);
  
@@ -4416,17 +4639,31 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
    case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
    }
-  return SDValue();
  }
  
  void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                             SmallVectorImpl<SDValue>&Results,
                                             SelectionDAG &DAG) const {
+  const TargetMachine &TM = getTargetMachine();
    DebugLoc dl = N->getDebugLoc();
    switch (N->getOpcode()) {
    default:
-    assert(false && "Do not know how to custom type legalize this operation!");
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
+  case ISD::VAARG: {
+    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+        || TM.getSubtarget<PPCSubtarget>().isPPC64())
+      return;
+
+    EVT VT = N->getValueType(0);
+
+    if (VT == MVT::i64) {
+      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+      Results.push_back(NewNode);
+      Results.push_back(NewNode.getValue(1));
+    }
      return;
+  }
    case ISD::FP_ROUND_INREG: {
      assert(N->getValueType(0) == MVT::ppcf128);
      assert(N->getOperand(0).getValueType() == MVT::ppcf128);
@@ -4572,6 +4809,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
    // registers without caring whether they're 32 or 64, but here we're
    // doing actual arithmetic on the addresses.
    bool is64bit = PPCSubTarget.isPPC64();
+  unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
  
    const BasicBlock *LLVM_BB = BB->getBasicBlock();
    MachineFunction *F = BB->getParent();
@@ -4635,8 +4873,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
    //   bne- loopMBB
    //   fallthrough --> exitMBB
    //   srw dest, tmpDest, shift
-
-  if (ptrA!=PPC::R0) {
+  if (ptrA != ZeroReg) {
      Ptr1Reg = RegInfo.createVirtualRegister(RC);
      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
        .addReg(ptrA).addReg(ptrB);
@@ -4666,7 +4903,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
  
    BB = loopMBB;
    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
-    .addReg(PPC::R0).addReg(PtrReg);
+    .addReg(ZeroReg).addReg(PtrReg);
    if (BinOpcode)
      BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
        .addReg(Incr2Reg).addReg(TmpDestReg);
@@ -4676,8 +4913,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
      .addReg(TmpReg).addReg(MaskReg);
    BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
      .addReg(Tmp3Reg).addReg(Tmp2Reg);
-  BuildMI(BB, dl, TII->get(PPC::STWCX))
-    .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
    BuildMI(BB, dl, TII->get(PPC::BCC))
      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
    BB->addSuccessor(loopMBB);
@@ -4686,7 +4923,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
    //  exitMBB:
    //   ...
    BB = exitMBB;
-  BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
+  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+    .addReg(ShiftReg);
    return BB;
  }
  
@@ -4934,6 +5172,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
      unsigned Ptr1Reg;
      unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+    unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
      //  thisMBB:
      //   ...
      //   fallthrough --> loopMBB
@@ -4966,7 +5205,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      //   stwcx. tmpDest, ptr
      // exitBB:
      //   srw dest, tmpDest, shift
-    if (ptrA!=PPC::R0) {
+    if (ptrA != ZeroReg) {
        Ptr1Reg = RegInfo.createVirtualRegister(RC);
        BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
          .addReg(ptrA).addReg(ptrB);
@@ -5003,7 +5242,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  
      BB = loop1MBB;
      BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
-        .addReg(PPC::R0).addReg(PtrReg);
+        .addReg(ZeroReg).addReg(PtrReg);
      BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
          .addReg(TmpDestReg).addReg(MaskReg);
      BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
@@ -5019,7 +5258,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
          .addReg(Tmp2Reg).addReg(NewVal3Reg);
      BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
-        .addReg(PPC::R0).addReg(PtrReg);
+        .addReg(ZeroReg).addReg(PtrReg);
      BuildMI(BB, dl, TII->get(PPC::BCC))
        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
      BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
@@ -5028,13 +5267,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  
      BB = midMBB;
      BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
-      .addReg(PPC::R0).addReg(PtrReg);
+      .addReg(ZeroReg).addReg(PtrReg);
      BB->addSuccessor(exitMBB);
  
      //  exitMBB:
      //   ...
      BB = exitMBB;
-    BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
+    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+      .addReg(ShiftReg);
    } else {
      llvm_unreachable("Unexpected instr type to insert");
    }
@@ -5309,12 +5549,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
  //===----------------------------------------------------------------------===//
  
  void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                       const APInt &Mask,
                                                         APInt &KnownZero,
                                                         APInt &KnownOne,
                                                         const SelectionDAG &DAG,
                                                         unsigned Depth) const {
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
    switch (Op.getOpcode()) {
    default: break;
    case PPCISD::LBRX: {
@@ -5377,7 +5616,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
      // but allow it at the lowest weight.
    if (CallOperandVal == NULL)
      return CW_Default;
-  const Type *type = CallOperandVal->getType();
+  Type *type = CallOperandVal->getType();
    // Look at the constraint type.
    switch (*constraint) {
    default:
@@ -5415,18 +5654,18 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
      case 'b':   // R1-R31
      case 'r':   // R0-R31
        if (VT == MVT::i64 && PPCSubTarget.isPPC64())
-        return std::make_pair(0U, PPC::G8RCRegisterClass);
-      return std::make_pair(0U, PPC::GPRCRegisterClass);
+        return std::make_pair(0U, &PPC::G8RCRegClass);
+      return std::make_pair(0U, &PPC::GPRCRegClass);
      case 'f':
        if (VT == MVT::f32)
-        return std::make_pair(0U, PPC::F4RCRegisterClass);
-      else if (VT == MVT::f64)
-        return std::make_pair(0U, PPC::F8RCRegisterClass);
+        return std::make_pair(0U, &PPC::F4RCRegClass);
+      if (VT == MVT::f64)
+        return std::make_pair(0U, &PPC::F8RCRegClass);
        break;
      case 'v':
-      return std::make_pair(0U, PPC::VRRCRegisterClass);
+      return std::make_pair(0U, &PPC::VRRCRegClass);
      case 'y':   // crrc
-      return std::make_pair(0U, PPC::CRRCRegisterClass);
+      return std::make_pair(0U, &PPC::CRRCRegClass);
      }
    }
  
@@ -5436,10 +5675,16 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
  
  /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
  /// vector.  If it is invalid, don't add anything to Ops.
-void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     std::string &Constraint,
                                                       std::vector<SDValue>&Ops,
                                                       SelectionDAG &DAG) const {
    SDValue Result(0,0);
+
+  // Only support length 1 constraints.
+  if (Constraint.length() > 1) return;
+
+  char Letter = Constraint[0];
    switch (Letter) {
    default: break;
    case 'I':
@@ -5495,13 +5740,13 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
    }
  
    // Handle standard constraint letters.
-  TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG);
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
  }
  
  // isLegalAddressingMode - Return true if the addressing mode represented
  // by AM is legal for this target, for a load/store of the specified type.
  bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                              const Type *Ty) const {
+                                              Type *Ty) const {
    // FIXME: PPC does not allow r+i addressing modes for vectors!
  
    // PPC allows a sign-extended 16-bit immediate field.
@@ -5537,12 +5782,12 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
  /// isLegalAddressImmediate - Return true if the integer value can be used
  /// as the offset of the target addressing mode for load / store of the
  /// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
    // PPC allows a sign-extended 16-bit immediate field.
    return (V > -(1 << 16) && V < (1 << 16)-1);
  }
  
-bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
    return false;
  }
  
@@ -5571,13 +5816,13 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
      return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                         DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                     FrameAddr, Offset),
-                       MachinePointerInfo(), false, false, 0);
+                       MachinePointerInfo(), false, false, false, 0);
    }
  
    // Just load the return address off the stack.
    SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
  }
  
  SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -5591,7 +5836,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    MFI->setFrameAddressIsTaken(true);
-  bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+  bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
+               MFI->hasVarSizedObjects()) &&
                    MFI->getStackSize() &&
                    !MF.getFunction()->hasFnAttr(Attribute::Naked);
    unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
@@ -5600,7 +5846,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
                                           PtrVT);
    while (Depth--)
      FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
-                            FrameAddr, MachinePointerInfo(), false, false, 0);
+                            FrameAddr, MachinePointerInfo(), false, false,
+                            false, 0);
    return FrameAddr;
  }
  
@@ -5616,7 +5863,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
  /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
  /// means there isn't a need to check it against alignment requirement,
  /// probably because the source does not need to be loaded. If
-/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// 'IsZeroVal' is true, that means it's safe to return a
  /// non-scalar-integer type, e.g. empty string source, constant, or loaded
  /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
  /// constant so it does not need to be loaded.
@@ -5624,7 +5871,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
  /// target-independent logic.
  EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                             unsigned DstAlign, unsigned SrcAlign,
-                                           bool NonScalarIntSafe,
+                                           bool IsZeroVal,
                                             bool MemcpyStrSrc,
                                             MachineFunction &MF) const {
    if (this->PPCSubTarget.isPPC64()) {
@@ -5633,3 +5880,11 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
      return MVT::i32;
    }
  }
+
+Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
+  if (DisableILPPref)
+    return TargetLowering::getSchedulingPreference(N);
+
+  return Sched::ILP;
+}
+