X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCISelLowering.cpp;h=d196c10fae772d8a83a299261e43376973703088;hb=2823b3e70ee7a5ed7482c45c503659a16a879a61;hp=beeb0f8c42a676104d2d82044cd0226ed01652fd;hpb=8a2d3ca7dff8f37ee0f1fc0042f47c194045183d;p=oota-llvm.git

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index beeb0f8c42a..d196c10fae7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- PPC32ISelLowering.cpp - PPC32 DAG Lowering Implementation ---------===//
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,30 +7,35 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the PPC32ISelLowering class.
+// This file implements the PPCISelLowering class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC32ISelLowering.h"
-#include "PPC32TargetMachine.h"
+#include "PPCISelLowering.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 using namespace llvm;
 
-PPC32TargetLowering::PPC32TargetLowering(TargetMachine &TM)
+PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
   : TargetLowering(TM) {
     
   // Fold away setcc operations if possible.
   setSetCCIsExpensive();
+  setPow2DivIsCheap();
   
+  // Use _setjmp/_longjmp instead of setjmp/longjmp.
+  setUseUnderscoreSetJmpLongJmp(true);
+    
   // Set up the register classes.
-  addRegisterClass(MVT::i32, PPC32::GPRCRegisterClass);
-  addRegisterClass(MVT::f32, PPC32::FPRCRegisterClass);
-  addRegisterClass(MVT::f64, PPC32::FPRCRegisterClass);
+  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
   
   // PowerPC has no intrinsics for these particular operations
   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
@@ -48,13 +53,13 @@ PPC32TargetLowering::PPC32TargetLowering(TargetMachine &TM)
   // We don't support sin/cos/sqrt/fmod
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
-  setOperationAction(ISD::SREM , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
-  setOperationAction(ISD::SREM , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
   
   // If we're enabling GP optimizations, use hardware square root
-  if (!TM.getSubtarget<PPCSubtarget>().isGigaProcessor()) {
+  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
   }
@@ -71,18 +76,44 @@ PPC32TargetLowering::PPC32TargetLowering(TargetMachine &TM)
   // PowerPC wants to turn select_cc of FP into fsel when possible.
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-
+  
   // PowerPC does not have BRCOND* which requires SetCC
   setOperationAction(ISD::BRCOND,       MVT::Other, Expand);
   setOperationAction(ISD::BRCONDTWOWAY, MVT::Other, Expand);
   
-  // PowerPC does not have FP_TO_UINT
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
-  
+  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
   // PowerPC does not have [U|S]INT_TO_FP
   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 
+  // PowerPC does not have truncstore for i1.
+  setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
+  
+  if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
+    // They also have instructions for converting between i64 and fp.
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+  } else {
+    // PowerPC does not have FP_TO_UINT on 32 bit implementations.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  }
+
+  if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
+    // 64 bit PowerPC implementations can support i64 types directly
+    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+  } else {
+    // 32 bit PowerPC wants to expand i64 shifts itself.
+    setOperationAction(ISD::SHL, MVT::i64, Custom);
+    setOperationAction(ISD::SRL, MVT::i64, Custom);
+    setOperationAction(ISD::SRA, MVT::i64, Custom);
+  }
+  
   setSetCCResultContents(ZeroOrOneSetCCResult);
   
   computeRegisterProperties();
@@ -103,78 +134,215 @@ static bool isFloatingPointZero(SDOperand Op) {
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
-SDOperand PPC32TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
   default: assert(0 && "Wasn't expecting to be able to lower this!"); 
-  case ISD::SELECT_CC:
+  case ISD::FP_TO_SINT: {
+    assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
+    SDOperand Src = Op.getOperand(0);
+    if (Src.getValueType() == MVT::f32)
+      Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
+    
+    switch (Op.getValueType()) {
+    default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+    case MVT::i32:
+      Op = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
+      break;
+    case MVT::i64:
+      Op = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
+      break;
+    }
+   
+    int FrameIdx =
+      DAG.getMachineFunction().getFrameInfo()->CreateStackObject(8, 8);
+    SDOperand FI = DAG.getFrameIndex(FrameIdx, MVT::i32);
+    SDOperand ST = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
+                               Op, FI, DAG.getSrcValue(0));
+    if (Op.getOpcode() == PPCISD::FCTIDZ) {
+      Op = DAG.getLoad(MVT::i64, ST, FI, DAG.getSrcValue(0));
+    } else {
+      FI = DAG.getNode(ISD::ADD, MVT::i32, FI, DAG.getConstant(4, MVT::i32));
+      Op = DAG.getLoad(MVT::i32, ST, FI, DAG.getSrcValue(0));
+    }
+    return Op;
+  }
+  case ISD::SINT_TO_FP: {
+    assert(MVT::i64 == Op.getOperand(0).getValueType() && 
+           "Unhandled SINT_TO_FP type in custom expander!");
+    int FrameIdx =
+      DAG.getMachineFunction().getFrameInfo()->CreateStackObject(8, 8);
+    SDOperand FI = DAG.getFrameIndex(FrameIdx, MVT::i32);
+    SDOperand ST = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
+                               Op.getOperand(0), FI, DAG.getSrcValue(0));
+    SDOperand LD = DAG.getLoad(MVT::f64, ST, FI, DAG.getSrcValue(0));
+    SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, LD);
+    if (MVT::f32 == Op.getValueType())
+      FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+    return FP;
+  }
+  case ISD::SELECT_CC: {
     // Turn FP only select_cc's into fsel instructions.
-    if (MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
-        MVT::isFloatingPoint(Op.getOperand(2).getValueType())) {
-      ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-      MVT::ValueType ResVT = Op.getValueType();
-      MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
-      SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
-      SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
+    if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
+        !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
+      break;
+    
+    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+    
+    // Cannot handle SETEQ/SETNE.
+    if (CC == ISD::SETEQ || CC == ISD::SETNE) break;
+    
+    MVT::ValueType ResVT = Op.getValueType();
+    MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
+    SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+    SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
 
-      // If the RHS of the comparison is a 0.0, we don't need to do the
-      // subtraction at all.
-      if (isFloatingPointZero(RHS))
-        switch (CC) {
-        default: assert(0 && "Invalid FSEL condition"); abort();
-        case ISD::SETULT:
-        case ISD::SETLT:
-          std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
-        case ISD::SETUGE:
-        case ISD::SETGE:
-          return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
-        case ISD::SETUGT:
-        case ISD::SETGT:
-          std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
-        case ISD::SETULE:
-        case ISD::SETLE:
-          return DAG.getNode(PPCISD::FSEL, ResVT,
-                             DAG.getNode(ISD::FNEG, ResVT, LHS), TV, FV);
-        }
-      
+    // If the RHS of the comparison is a 0.0, we don't need to do the
+    // subtraction at all.
+    if (isFloatingPointZero(RHS))
       switch (CC) {
       default: assert(0 && "Invalid FSEL condition"); abort();
       case ISD::SETULT:
       case ISD::SETLT:
-        return DAG.getNode(PPCISD::FSEL, ResVT,
-                           DAG.getNode(ISD::SUB, CmpVT, LHS, RHS), FV, TV);
+        std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
       case ISD::SETUGE:
       case ISD::SETGE:
-        return DAG.getNode(PPCISD::FSEL, ResVT,
-                           DAG.getNode(ISD::SUB, CmpVT, LHS, RHS), TV, FV);
+        if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+          LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
+        return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
       case ISD::SETUGT:
       case ISD::SETGT:
-        return DAG.getNode(PPCISD::FSEL, ResVT,
-                           DAG.getNode(ISD::SUB, CmpVT, RHS, LHS), FV, TV);
+        std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
       case ISD::SETULE:
       case ISD::SETLE:
+        if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+          LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
         return DAG.getNode(PPCISD::FSEL, ResVT,
-                           DAG.getNode(ISD::SUB, CmpVT, RHS, LHS), TV, FV);
+                           DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
       }
+    
+    SDOperand Cmp;
+    switch (CC) {
+    default: assert(0 && "Invalid FSEL condition"); abort();
+    case ISD::SETULT:
+    case ISD::SETLT:
+      Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
+      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
+    case ISD::SETUGE:
+    case ISD::SETGE:
+      Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
+      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
+    case ISD::SETUGT:
+    case ISD::SETGT:
+      Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
+      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
+    case ISD::SETULE:
+    case ISD::SETLE:
+      Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
+      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
     }
-    break;    
+    break;
+  }
+  case ISD::SHL: {
+    assert(Op.getValueType() == MVT::i64 &&
+           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
+    // The generic code does a fine job expanding shift by a constant.
+    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
+    
+    // Otherwise, expand into a bunch of logical ops.  Note that these ops
+    // depend on the PPC behavior for oversized shift amounts.
+    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                               DAG.getConstant(0, MVT::i32));
+    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                               DAG.getConstant(1, MVT::i32));
+    SDOperand Amt = Op.getOperand(1);
+    
+    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+                                 DAG.getConstant(32, MVT::i32), Amt);
+    SDOperand Tmp2 = DAG.getNode(ISD::SHL, MVT::i32, Hi, Amt);
+    SDOperand Tmp3 = DAG.getNode(ISD::SRL, MVT::i32, Lo, Tmp1);
+    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+                                 DAG.getConstant(-32U, MVT::i32));
+    SDOperand Tmp6 = DAG.getNode(ISD::SHL, MVT::i32, Lo, Tmp5);
+    SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
+    SDOperand OutLo = DAG.getNode(ISD::SHL, MVT::i32, Lo, Amt);
+    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
+  }
+  case ISD::SRL: {
+    assert(Op.getValueType() == MVT::i64 &&
+           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
+    // The generic code does a fine job expanding shift by a constant.
+    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
+    
+    // Otherwise, expand into a bunch of logical ops.  Note that these ops
+    // depend on the PPC behavior for oversized shift amounts.
+    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                               DAG.getConstant(0, MVT::i32));
+    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                               DAG.getConstant(1, MVT::i32));
+    SDOperand Amt = Op.getOperand(1);
+    
+    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+                                 DAG.getConstant(32, MVT::i32), Amt);
+    SDOperand Tmp2 = DAG.getNode(ISD::SRL, MVT::i32, Lo, Amt);
+    SDOperand Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, Hi, Tmp1);
+    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+                                 DAG.getConstant(-32U, MVT::i32));
+    SDOperand Tmp6 = DAG.getNode(ISD::SRL, MVT::i32, Hi, Tmp5);
+    SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
+    SDOperand OutHi = DAG.getNode(ISD::SRL, MVT::i32, Hi, Amt);
+    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
+  }    
+  case ISD::SRA: {
+    assert(Op.getValueType() == MVT::i64 &&
+           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
+    // The generic code does a fine job expanding shift by a constant.
+    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
+      
+    // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
+    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                               DAG.getConstant(0, MVT::i32));
+    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                               DAG.getConstant(1, MVT::i32));
+    SDOperand Amt = Op.getOperand(1);
+    
+    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+                                 DAG.getConstant(32, MVT::i32), Amt);
+    SDOperand Tmp2 = DAG.getNode(ISD::SRL, MVT::i32, Lo, Amt);
+    SDOperand Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, Hi, Tmp1);
+    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+                                 DAG.getConstant(-32U, MVT::i32));
+    SDOperand Tmp6 = DAG.getNode(ISD::SRA, MVT::i32, Hi, Tmp5);
+    SDOperand OutHi = DAG.getNode(ISD::SRA, MVT::i32, Hi, Amt);
+    SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
+                                      Tmp4, Tmp6, ISD::SETLE);
+    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
+  }
   }
   return SDOperand();
 }
 
 std::vector<SDOperand>
-PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
   //
   // add beautiful description of PPC stack frame format, or at least some docs
   //
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineBasicBlock& BB = MF.front();
+  SSARegMap *RegMap = MF.getSSARegMap();
   std::vector<SDOperand> ArgValues;
   
-  // Due to the rather complicated nature of the PowerPC ABI, rather than a
-  // fixed size array of physical args, for the sake of simplicity let the STL
-  // handle tracking them for us.
-  std::vector<unsigned> argVR, argPR, argOp;
   unsigned ArgOffset = 24;
   unsigned GPR_remaining = 8;
   unsigned FPR_remaining = 13;
@@ -199,63 +367,74 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
     MVT::ValueType ObjectVT = getValueType(I->getType());
     
     switch (ObjectVT) {
-      default: assert(0 && "Unhandled argument type!");
-      case MVT::i1:
-      case MVT::i8:
-      case MVT::i16:
-      case MVT::i32:
-        ObjSize = 4;
-        if (!ArgLive) break;
-          if (GPR_remaining > 0) {
-            MF.addLiveIn(GPR[GPR_idx]);
-            argt = newroot = DAG.getCopyFromReg(DAG.getRoot(),
-                                                GPR[GPR_idx], MVT::i32);
-            if (ObjectVT != MVT::i32)
-              argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, newroot);
-          } else {
-            needsLoad = true;
-          }
-            break;
-      case MVT::i64: ObjSize = 8;
-        if (!ArgLive) break;
-          if (GPR_remaining > 0) {
-            SDOperand argHi, argLo;
-            MF.addLiveIn(GPR[GPR_idx]);
-            argHi = DAG.getCopyFromReg(DAG.getRoot(), GPR[GPR_idx], MVT::i32);
-            // If we have two or more remaining argument registers, then both halves
-            // of the i64 can be sourced from there.  Otherwise, the lower half will
-            // have to come off the stack.  This can happen when an i64 is preceded
-            // by 28 bytes of arguments.
-            if (GPR_remaining > 1) {
-              MF.addLiveIn(GPR[GPR_idx+1]);
-              argLo = DAG.getCopyFromReg(argHi, GPR[GPR_idx+1], MVT::i32);
-            } else {
-              int FI = MFI->CreateFixedObject(4, ArgOffset+4);
-              SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
-              argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
-                                  DAG.getSrcValue(NULL));
-            }
-            // Build the outgoing arg thingy
-            argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
-            newroot = argLo;
-          } else {
-            needsLoad = true;
-          }
-            break;
-      case MVT::f32:
-      case MVT::f64:
-        ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
-        if (!ArgLive) break;
-          if (FPR_remaining > 0) {
-            MF.addLiveIn(FPR[FPR_idx]);
-            argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), 
-                                                FPR[FPR_idx], ObjectVT);
-            --FPR_remaining;
-            ++FPR_idx;
-          } else {
-            needsLoad = true;
-          }
-            break;
+    default: assert(0 && "Unhandled argument type!");
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      ObjSize = 4;
+      if (!ArgLive) break;
+      if (GPR_remaining > 0) {
+        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+        MF.addLiveIn(GPR[GPR_idx], VReg);
+        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
+        if (ObjectVT != MVT::i32) {
+          unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext 
+                                                       : ISD::AssertZext;
+          argt = DAG.getNode(AssertOp, MVT::i32, argt, 
+                             DAG.getValueType(ObjectVT));
+          argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
+        }
+      } else {
+        needsLoad = true;
+      }
+      break;
+    case MVT::i64: ObjSize = 8;
+      if (!ArgLive) break;
+      if (GPR_remaining > 0) {
+        SDOperand argHi, argLo;
+        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+        MF.addLiveIn(GPR[GPR_idx], VReg);
+        argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
+        // If we have two or more remaining argument registers, then both halves
+        // of the i64 can be sourced from there.  Otherwise, the lower half will
+        // have to come off the stack.  This can happen when an i64 is preceded
+        // by 28 bytes of arguments.
+        if (GPR_remaining > 1) {
+          unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+          MF.addLiveIn(GPR[GPR_idx+1], VReg);
+          argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
+        } else {
+          int FI = MFI->CreateFixedObject(4, ArgOffset+4);
+          SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
+          argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
+                              DAG.getSrcValue(NULL));
+        }
+        // Build the outgoing arg thingy
+        argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
+        newroot = argLo;
+      } else {
+        needsLoad = true;
+      }
+      break;
+    case MVT::f32:
+    case MVT::f64:
+      ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
+      if (!ArgLive) break;
+      if (FPR_remaining > 0) {
+        unsigned VReg;
+        if (ObjectVT == MVT::f32)
+          VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
+        else
+          VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
+        MF.addLiveIn(FPR[FPR_idx], VReg);
+        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
+        --FPR_remaining;
+        ++FPR_idx;
+      } else {
+        needsLoad = true;
+      }
+      break;
     }
     
     // We need to load the argument to a virtual register if we determined above
@@ -296,8 +475,9 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
     // result of va_next.
     std::vector<SDOperand> MemOps;
     for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
-      MF.addLiveIn(GPR[GPR_idx]);
-      SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), GPR[GPR_idx], MVT::i32);
+      unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+      MF.addLiveIn(GPR[GPR_idx], VReg);
+      SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
       SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
                                     Val, FIN, DAG.getSrcValue(NULL));
       MemOps.push_back(Store);
@@ -332,11 +512,11 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
 }
 
 std::pair<SDOperand, SDOperand>
-PPC32TargetLowering::LowerCallTo(SDOperand Chain,
-                                 const Type *RetTy, bool isVarArg,
-                                 unsigned CallingConv, bool isTailCall,
-                                 SDOperand Callee, ArgListTy &Args,
-                                 SelectionDAG &DAG) {
+PPCTargetLowering::LowerCallTo(SDOperand Chain,
+                               const Type *RetTy, bool isVarArg,
+                               unsigned CallingConv, bool isTailCall,
+                               SDOperand Callee, ArgListTy &Args,
+                               SelectionDAG &DAG) {
   // args_to_use will accumulate outgoing args for the ISD::CALL case in
   // SelectExpr to use to put the arguments in the appropriate registers.
   std::vector<SDOperand> args_to_use;
@@ -349,26 +529,27 @@ PPC32TargetLowering::LowerCallTo(SDOperand Chain,
     Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain,
                         DAG.getConstant(NumBytes, getPointerTy()));
   } else {
-    for (unsigned i = 0, e = Args.size(); i != e; ++i)
+    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
       switch (getValueType(Args[i].second)) {
-        default: assert(0 && "Unknown value type!");
-        case MVT::i1:
-        case MVT::i8:
-        case MVT::i16:
-        case MVT::i32:
-        case MVT::f32:
-          NumBytes += 4;
-          break;
-        case MVT::i64:
-        case MVT::f64:
-          NumBytes += 8;
-          break;
+      default: assert(0 && "Unknown value type!");
+      case MVT::i1:
+      case MVT::i8:
+      case MVT::i16:
+      case MVT::i32:
+      case MVT::f32:
+        NumBytes += 4;
+        break;
+      case MVT::i64:
+      case MVT::f64:
+        NumBytes += 8;
+        break;
       }
+    }
         
-        // Just to be safe, we'll always reserve the full 24 bytes of linkage area
-        // plus 32 bytes of argument space in case any called code gets funky on us.
-        // (Required by ABI to support var arg)
-        if (NumBytes < 56) NumBytes = 56;
+    // Just to be safe, we'll always reserve the full 24 bytes of linkage area
+    // plus 32 bytes of argument space in case any called code gets funky on us.
+    // (Required by ABI to support var arg)
+    if (NumBytes < 56) NumBytes = 56;
     
     // Adjust the stack pointer for the new arguments...
     // These operations are automatically eliminated by the prolog/epilog pass
@@ -398,102 +579,102 @@ PPC32TargetLowering::LowerCallTo(SDOperand Chain,
       MVT::ValueType ArgVT = getValueType(Args[i].second);
       
       switch (ArgVT) {
-        default: assert(0 && "Unexpected ValueType for argument!");
-        case MVT::i1:
-        case MVT::i8:
-        case MVT::i16:
-          // Promote the integer to 32 bits.  If the input type is signed use a
-          // sign extend, otherwise use a zero extend.
-          if (Args[i].second->isSigned())
-            Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
-          else
-            Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
-          // FALL THROUGH
-        case MVT::i32:
+      default: assert(0 && "Unexpected ValueType for argument!");
+      case MVT::i1:
+      case MVT::i8:
+      case MVT::i16:
+        // Promote the integer to 32 bits.  If the input type is signed use a
+        // sign extend, otherwise use a zero extend.
+        if (Args[i].second->isSigned())
+          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
+        else
+          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
+        // FALL THROUGH
+      case MVT::i32:
+        if (GPR_remaining > 0) {
+          args_to_use.push_back(Args[i].first);
+          --GPR_remaining;
+        } else {
+          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
+                                       Args[i].first, PtrOff,
+                                       DAG.getSrcValue(NULL)));
+        }
+        ArgOffset += 4;
+        break;
+      case MVT::i64:
+        // If we have one free GPR left, we can place the upper half of the i64
+        // in it, and store the other half to the stack.  If we have two or more
+        // free GPRs, then we can pass both halves of the i64 in registers.
+        if (GPR_remaining > 0) {
+          SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
+                                     Args[i].first, DAG.getConstant(1, MVT::i32));
+          SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
+                                     Args[i].first, DAG.getConstant(0, MVT::i32));
+          args_to_use.push_back(Hi);
+          --GPR_remaining;
           if (GPR_remaining > 0) {
-            args_to_use.push_back(Args[i].first);
+            args_to_use.push_back(Lo);
             --GPR_remaining;
           } else {
+            SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
+            PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
             MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
-                                         Args[i].first, PtrOff,
-                                         DAG.getSrcValue(NULL)));
+                                         Lo, PtrOff, DAG.getSrcValue(NULL)));
           }
-          ArgOffset += 4;
-          break;
-        case MVT::i64:
-          // If we have one free GPR left, we can place the upper half of the i64
-          // in it, and store the other half to the stack.  If we have two or more
-          // free GPRs, then we can pass both halves of the i64 in registers.
-          if (GPR_remaining > 0) {
-            SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
-                                       Args[i].first, DAG.getConstant(1, MVT::i32));
-            SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
-                                       Args[i].first, DAG.getConstant(0, MVT::i32));
-            args_to_use.push_back(Hi);
-            --GPR_remaining;
+        } else {
+          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
+                                       Args[i].first, PtrOff,
+                                       DAG.getSrcValue(NULL)));
+        }
+        ArgOffset += 8;
+        break;
+      case MVT::f32:
+      case MVT::f64:
+        if (FPR_remaining > 0) {
+          args_to_use.push_back(Args[i].first);
+          --FPR_remaining;
+          if (isVarArg) {
+            SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
+                                          Args[i].first, PtrOff,
+                                          DAG.getSrcValue(NULL));
+            MemOps.push_back(Store);
+            // Float varargs are always shadowed in available integer registers
             if (GPR_remaining > 0) {
-              args_to_use.push_back(Lo);
+              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
+                                           DAG.getSrcValue(NULL));
+              MemOps.push_back(Load);
+              args_to_use.push_back(Load);
               --GPR_remaining;
-            } else {
+            }
+            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
               SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
               PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
-              MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
-                                           Lo, PtrOff, DAG.getSrcValue(NULL)));
+              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
+                                           DAG.getSrcValue(NULL));
+              MemOps.push_back(Load);
+              args_to_use.push_back(Load);
+              --GPR_remaining;
             }
           } else {
-            MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
-                                         Args[i].first, PtrOff,
-                                         DAG.getSrcValue(NULL)));
-          }
-          ArgOffset += 8;
-          break;
-        case MVT::f32:
-        case MVT::f64:
-          if (FPR_remaining > 0) {
-            args_to_use.push_back(Args[i].first);
-            --FPR_remaining;
-            if (isVarArg) {
-              SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
-                                            Args[i].first, PtrOff,
-                                            DAG.getSrcValue(NULL));
-              MemOps.push_back(Store);
-              // Float varargs are always shadowed in available integer registers
-              if (GPR_remaining > 0) {
-                SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
-                                             DAG.getSrcValue(NULL));
-                MemOps.push_back(Load);
-                args_to_use.push_back(Load);
-                --GPR_remaining;
-              }
-              if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
-                SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
-                PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
-                SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
-                                             DAG.getSrcValue(NULL));
-                MemOps.push_back(Load);
-                args_to_use.push_back(Load);
-                --GPR_remaining;
-              }
-            } else {
-              // If we have any FPRs remaining, we may also have GPRs remaining.
-              // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
-              // GPRs.
-              if (GPR_remaining > 0) {
-                args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
-                --GPR_remaining;
-              }
-              if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
-                args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
-                --GPR_remaining;
-              }
+            // If we have any FPRs remaining, we may also have GPRs remaining.
+            // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+            // GPRs.
+            if (GPR_remaining > 0) {
+              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+              --GPR_remaining;
+            }
+            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
+              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+              --GPR_remaining;
             }
-          } else {
-            MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
-                                         Args[i].first, PtrOff,
-                                         DAG.getSrcValue(NULL)));
           }
-          ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
-          break;
+        } else {
+          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
+                                       Args[i].first, PtrOff,
+                                       DAG.getSrcValue(NULL)));
+        }
+        ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
+        break;
       }
     }
     if (!MemOps.empty())
@@ -502,20 +683,52 @@ PPC32TargetLowering::LowerCallTo(SDOperand Chain,
   
   std::vector<MVT::ValueType> RetVals;
   MVT::ValueType RetTyVT = getValueType(RetTy);
+  MVT::ValueType ActualRetTyVT = RetTyVT;
+  if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
+    ActualRetTyVT = MVT::i32;   // Promote result to i32.
+    
   if (RetTyVT != MVT::isVoid)
-    RetVals.push_back(RetTyVT);
+    RetVals.push_back(ActualRetTyVT);
   RetVals.push_back(MVT::Other);
   
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+  
   SDOperand TheCall = SDOperand(DAG.getCall(RetVals,
                                             Chain, Callee, args_to_use), 0);
   Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
                       DAG.getConstant(NumBytes, getPointerTy()));
-  return std::make_pair(TheCall, Chain);
+  SDOperand RetVal = TheCall;
+  
+  // If the result is a small value, add a note so that we keep track of the
+  // information about whether it is sign or zero extended.
+  if (RetTyVT != ActualRetTyVT) {
+    RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
+                         MVT::i32, RetVal, DAG.getValueType(RetTyVT));
+    RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
+  }
+  
+  return std::make_pair(RetVal, Chain);
+}
+
+SDOperand PPCTargetLowering::LowerReturnTo(SDOperand Chain, SDOperand Op,
+                                           SelectionDAG &DAG) {
+  if (Op.getValueType() == MVT::i64) {
+    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op, 
+                               DAG.getConstant(1, MVT::i32));
+    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op,
+                               DAG.getConstant(0, MVT::i32));
+    return DAG.getNode(ISD::RET, MVT::Other, Chain, Lo, Hi);
+  } else {
+    return DAG.getNode(ISD::RET, MVT::Other, Chain, Op);
+  }
 }
 
-SDOperand PPC32TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
-                                            Value *VAListV, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
+                                          Value *VAListV, SelectionDAG &DAG) {
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
@@ -524,9 +737,9 @@ SDOperand PPC32TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
 }
 
 std::pair<SDOperand,SDOperand>
-PPC32TargetLowering::LowerVAArg(SDOperand Chain,
-                                SDOperand VAListP, Value *VAListV,
-                                const Type *ArgTy, SelectionDAG &DAG) {
+PPCTargetLowering::LowerVAArg(SDOperand Chain,
+                              SDOperand VAListP, Value *VAListV,
+                              const Type *ArgTy, SelectionDAG &DAG) {
   MVT::ValueType ArgVT = getValueType(ArgTy);
   
   SDOperand VAList =
@@ -548,7 +761,7 @@ PPC32TargetLowering::LowerVAArg(SDOperand Chain,
 }
 
 
-std::pair<SDOperand, SDOperand> PPC32TargetLowering::
+std::pair<SDOperand, SDOperand> PPCTargetLowering::
 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
                         SelectionDAG &DAG) {
   assert(0 && "LowerFrameReturnAddress unimplemented");
@@ -556,10 +769,11 @@ LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
 }
 
 MachineBasicBlock *
-PPC32TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
-                                             MachineBasicBlock *BB) {
+PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+                                           MachineBasicBlock *BB) {
   assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
-          MI->getOpcode() == PPC::SELECT_CC_FP) &&
+          MI->getOpcode() == PPC::SELECT_CC_F4 ||
+          MI->getOpcode() == PPC::SELECT_CC_F8) &&
          "Unexpected instr type to insert");
   
   // To "insert" a SELECT_CC instruction, we actually have to insert the diamond