Back out the previous change to SelectBranchCC, since there are cases it

[oota-llvm.git] / lib / Target / PowerPC / PPC64ISelSimple.cpp
diff --git a/lib/Target/PowerPC/PPC64ISelSimple.cpp b/lib/Target/PowerPC/PPC64ISelSimple.cpp

index 72593a23f5d0f1a3d807c1792ef72fc8dbba5420..30531b6eeefd5a8740aa1717f0ff10ce336dbea5 100644 (file)
--- a/lib/Target/PowerPC/PPC64ISelSimple.cpp
+++ b/lib/Target/PowerPC/PPC64ISelSimple.cpp
@@ -26,8 +26,8 @@
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Support/InstVisitor.h"
-#include "Support/Debug.h"
-#include "Support/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
  #include <vector>
  using namespace llvm;
  
@@ -73,7 +73,7 @@ static inline TypeClass getClassB(const Type *Ty) {
  }
  
  namespace {
-  struct ISel : public FunctionPass, InstVisitor<ISel> {
+  struct PPC64ISel : public FunctionPass, InstVisitor<PPC64ISel> {
      PPC64TargetMachine &TM;
      MachineFunction *F;                 // The function we are compiling into
      MachineBasicBlock *BB;              // The current MBB we are compiling
@@ -82,9 +82,8 @@ namespace {
      std::map<Value*, unsigned> RegMap;  // Mapping between Values and SSA Regs
  
      // External functions used in the Module
-    Function *fmodfFn, *fmodFn, *__cmpdi2Fn, *__moddi3Fn, *__divdi3Fn, 
-      *__umoddi3Fn,  *__udivdi3Fn, *__fixsfdiFn, *__fixdfdiFn, *__fixunssfdiFn,
-      *__fixunsdfdiFn, *__floatdisfFn, *__floatdidfFn, *mallocFn, *freeFn;
+    Function *fmodfFn, *fmodFn, *__cmpdi2Fn, *__fixsfdiFn, *__fixdfdiFn, 
+      *__fixunssfdiFn, *__fixunsdfdiFn, *mallocFn, *freeFn;
  
      // MBBMap - Mapping between LLVM BB -> Machine BB
      std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
@@ -93,8 +92,11 @@ namespace {
      // FrameIndex for the alloca.
      std::map<AllocaInst*, unsigned> AllocaMap;
  
-    ISel(TargetMachine &tm) : TM(reinterpret_cast<PPC64TargetMachine&>(tm)), 
-      F(0), BB(0) {}
+    // Target configuration data
+    const unsigned ParameterSaveAreaOffset, MaxArgumentStackSpace;
+
+    PPC64ISel(TargetMachine &tm):TM(reinterpret_cast<PPC64TargetMachine&>(tm)), 
+      F(0), BB(0), ParameterSaveAreaOffset(24), MaxArgumentStackSpace(32) {}
  
      bool doInitialization(Module &M) {
        // Add external functions that we may call
@@ -110,14 +112,6 @@ namespace {
        fmodFn = M.getOrInsertFunction("fmod", d, d, d, 0);
        // int __cmpdi2(long, long);
        __cmpdi2Fn = M.getOrInsertFunction("__cmpdi2", i, l, l, 0);
-      // long __moddi3(long, long);
-      __moddi3Fn = M.getOrInsertFunction("__moddi3", l, l, l, 0);
-      // long __divdi3(long, long);
-      __divdi3Fn = M.getOrInsertFunction("__divdi3", l, l, l, 0);
-      // unsigned long __umoddi3(unsigned long, unsigned long);
-      __umoddi3Fn = M.getOrInsertFunction("__umoddi3", ul, ul, ul, 0);
-      // unsigned long __udivdi3(unsigned long, unsigned long);
-      __udivdi3Fn = M.getOrInsertFunction("__udivdi3", ul, ul, ul, 0);
        // long __fixsfdi(float)
        __fixsfdiFn = M.getOrInsertFunction("__fixsfdi", l, f, 0);
        // long __fixdfdi(double)
@@ -126,10 +120,6 @@ namespace {
        __fixunssfdiFn = M.getOrInsertFunction("__fixunssfdi", ul, f, 0);
        // unsigned long __fixunsdfdi(double)
        __fixunsdfdiFn = M.getOrInsertFunction("__fixunsdfdi", ul, d, 0);
-      // float __floatdisf(long)
-      __floatdisfFn = M.getOrInsertFunction("__floatdisf", f, l, 0);
-      // double __floatdidf(long)
-      __floatdidfFn = M.getOrInsertFunction("__floatdidf", d, l, 0);
        // void* malloc(size_t)
        mallocFn = M.getOrInsertFunction("malloc", voidPtr, Type::UIntTy, 0);
        // void free(void*)
@@ -407,8 +397,8 @@ static AllocaInst *dyn_castFixedAlloca(Value *V) {
  
  /// getReg - This method turns an LLVM value into a register number.
  ///
-unsigned ISel::getReg(Value *V, MachineBasicBlock *MBB,
-                      MachineBasicBlock::iterator IPt) {
+unsigned PPC64ISel::getReg(Value *V, MachineBasicBlock *MBB,
+                           MachineBasicBlock::iterator IPt) {
    if (Constant *C = dyn_cast<Constant>(V)) {
      unsigned Reg = makeAnotherReg(V->getType());
      copyConstantToRegister(MBB, IPt, C, Reg);
@@ -433,7 +423,7 @@ unsigned ISel::getReg(Value *V, MachineBasicBlock *MBB,
  /// is okay to use as an immediate argument to a certain binary operator.
  ///
  /// Operator is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for Xor.
-bool ISel::canUseAsImmediateForOpcode(ConstantInt *CI, unsigned Operator) {
+bool PPC64ISel::canUseAsImmediateForOpcode(ConstantInt *CI, unsigned Operator) {
    ConstantSInt *Op1Cs;
    ConstantUInt *Op1Cu;
        
@@ -474,7 +464,7 @@ bool ISel::canUseAsImmediateForOpcode(ConstantInt *CI, unsigned Operator) {
  /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca
  /// that is to be statically allocated with the initial stack frame
  /// adjustment.
-unsigned ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
+unsigned PPC64ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
    // Already computed this?
    std::map<AllocaInst*, unsigned>::iterator I = AllocaMap.lower_bound(AI);
    if (I != AllocaMap.end() && I->first == AI) return I->second;
@@ -495,9 +485,9 @@ unsigned ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
  /// copyConstantToRegister - Output the instructions required to put the
  /// specified constant into the specified register.
  ///
-void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
-                                  MachineBasicBlock::iterator IP,
-                                  Constant *C, unsigned R) {
+void PPC64ISel::copyConstantToRegister(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator IP,
+                                       Constant *C, unsigned R) {
    if (C->getType()->isIntegral()) {
      unsigned Class = getClassB(C->getType());
  
@@ -584,8 +574,8 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
  
  /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from
  /// the stack into virtual registers.
-void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
-  unsigned ArgOffset = 24;
+void PPC64ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
+  unsigned ArgOffset = ParameterSaveAreaOffset;
    unsigned GPR_remaining = 8;
    unsigned FPR_remaining = 13;
    unsigned GPR_idx = 0, FPR_idx = 0;
@@ -600,7 +590,7 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
      
    MachineFrameInfo *MFI = F->getFrameInfo();
   
-  for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
+  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); I != E; ++I) {
      bool ArgLive = !I->use_empty();
      unsigned Reg = ArgLive ? getReg(*I) : 0;
      int FI;          // Frame object index
@@ -713,7 +703,7 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
  /// because we have to generate our sources into the source basic blocks, not
  /// the current one.
  ///
-void ISel::SelectPHINodes() {
+void PPC64ISel::SelectPHINodes() {
    const TargetInstrInfo &TII = *TM.getInstrInfo();
    const Function &LF = *F->getFunction();  // The LLVM function...
    for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
@@ -871,17 +861,17 @@ static unsigned getPPCOpcodeForSetCCNumber(unsigned Opcode) {
  }
  
  /// emitUCOM - emits an unordered FP compare.
-void ISel::emitUCOM(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
-                    unsigned LHS, unsigned RHS) {
+void PPC64ISel::emitUCOM(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+                         unsigned LHS, unsigned RHS) {
      BuildMI(*MBB, IP, PPC::FCMPU, 2, PPC::CR0).addReg(LHS).addReg(RHS);
  }
  
  /// EmitComparison - emits a comparison of the two operands, returning the
  /// extended setcc code to use.  The result is in CR0.
  ///
-unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
-                              MachineBasicBlock *MBB,
-                              MachineBasicBlock::iterator IP) {
+unsigned PPC64ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
+                                   MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator IP) {
    // The arguments are already supposed to be of the same type.
    const Type *CompTy = Op0->getType();
    unsigned Class = getClassB(CompTy);
@@ -953,7 +943,7 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
  /// visitSetCondInst - emit code to calculate the condition via
  /// EmitComparison(), and possibly store a 0 or 1 to a register as a result
  ///
-void ISel::visitSetCondInst(SetCondInst &I) {
+void PPC64ISel::visitSetCondInst(SetCondInst &I) {
    if (canFoldSetCCIntoBranchOrSelect(&I))
      return;
  
@@ -1017,7 +1007,7 @@ void ISel::visitSetCondInst(SetCondInst &I) {
      .addMBB(copy0MBB).addReg(TrueValue).addMBB(copy1MBB);
  }
  
-void ISel::visitSelectInst(SelectInst &SI) {
+void PPC64ISel::visitSelectInst(SelectInst &SI) {
    unsigned DestReg = getReg(SI);
    MachineBasicBlock::iterator MII = BB->end();
    emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(),
@@ -1028,10 +1018,10 @@ void ISel::visitSelectInst(SelectInst &SI) {
  /// expression support.
  /// FIXME: this is most likely broken in one or more ways.  Namely, PowerPC has
  /// no select instruction.  FSEL only works for comparisons against zero.
-void ISel::emitSelectOperation(MachineBasicBlock *MBB,
-                               MachineBasicBlock::iterator IP,
-                               Value *Cond, Value *TrueVal, Value *FalseVal,
-                               unsigned DestReg) {
+void PPC64ISel::emitSelectOperation(MachineBasicBlock *MBB,
+                                    MachineBasicBlock::iterator IP,
+                                    Value *Cond, Value *TrueVal,
+                                    Value *FalseVal, unsigned DestReg) {
    unsigned SelectClass = getClassB(TrueVal->getType());
    unsigned Opcode;
  
@@ -1105,7 +1095,7 @@ void ISel::emitSelectOperation(MachineBasicBlock *MBB,
  /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
  /// operand, in the specified target register.
  ///
-void ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
+void PPC64ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
    bool isUnsigned = VR.Ty->isUnsigned() || VR.Ty == Type::BoolTy;
  
    Value *Val = VR.Val;
@@ -1166,7 +1156,7 @@ void ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
  
  /// visitReturnInst - implemented with BLR
  ///
-void ISel::visitReturnInst(ReturnInst &I) {
+void PPC64ISel::visitReturnInst(ReturnInst &I) {
    // Only do the processing if this is a non-void return
    if (I.getNumOperands() > 0) {
      Value *RetVal = I.getOperand(0);
@@ -1202,7 +1192,7 @@ static inline BasicBlock *getBlockAfter(BasicBlock *BB) {
  /// jump to a block that is the immediate successor of the current block, we can
  /// just make a fall-through (but we don't currently).
  ///
-void ISel::visitBranchInst(BranchInst &BI) {
+void PPC64ISel::visitBranchInst(BranchInst &BI) {
    // Update machine-CFG edges
    BB->addSuccessor(MBBMap[BI.getSuccessor(0)]);
    if (BI.isConditional())
@@ -1265,12 +1255,12 @@ void ISel::visitBranchInst(BranchInst &BI) {
  /// and the return value as appropriate.  For the actual function call itself,
  /// it inserts the specified CallMI instruction into the stream.
  ///
-void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
-                  const std::vector<ValueRecord> &Args, bool isVarArg) {
+void PPC64ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
+                       const std::vector<ValueRecord> &Args, bool isVarArg) {
    // Count how many bytes are to be pushed on the stack, including the linkage
    // area, and parameter passing area.
-  unsigned NumBytes = 24;
-  unsigned ArgOffset = 24;
+  unsigned NumBytes = ParameterSaveAreaOffset;
+  unsigned ArgOffset = ParameterSaveAreaOffset;
  
    if (!Args.empty()) {
      for (unsigned i = 0, e = Args.size(); i != e; ++i)
@@ -1287,16 +1277,16 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
        default: assert(0 && "Unknown class!");
        }
  
-    // Just to be safe, we'll always reserve the full 32 bytes worth of
-    // argument passing space in case any called code gets funky on us.
-    if (NumBytes < 24 + 32) NumBytes = 24 + 32;
+    // Just to be safe, we'll always reserve the full argument passing space in
+    // case any called code gets funky on us.
+    if (NumBytes < ParameterSaveAreaOffset + MaxArgumentStackSpace) 
+      NumBytes = ParameterSaveAreaOffset + MaxArgumentStackSpace;
  
      // Adjust the stack pointer for the new arguments...
      // These functions are automatically eliminated by the prolog/epilog pass
      BuildMI(BB, PPC::ADJCALLSTACKDOWN, 1).addImm(NumBytes);
  
      // Arguments go on the stack in reverse order, as specified by the ABI.
-    // Offset to the paramater area on the stack is 24.
      int GPR_remaining = 8, FPR_remaining = 13;
      unsigned GPR_idx = 0, FPR_idx = 0;
      static const unsigned GPR[] = { 
@@ -1451,7 +1441,7 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
  
  
  /// visitCallInst - Push args on stack and do a procedure call instruction.
-void ISel::visitCallInst(CallInst &CI) {
+void PPC64ISel::visitCallInst(CallInst &CI) {
    MachineInstr *TheCall;
    Function *F = CI.getCalledFunction();
    if (F) {
@@ -1508,7 +1498,7 @@ static bool isOnlyUsedByUnorderedComparisons(Value *V) {
  /// function, lowering any calls to unknown intrinsic functions into the
  /// equivalent LLVM code.
  ///
-void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
+void PPC64ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
        if (CallInst *CI = dyn_cast<CallInst>(I++))
@@ -1529,7 +1519,6 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
            case Intrinsic::readio: {
              // On PPC, memory operations are in-order.  Lower this intrinsic
              // into a volatile load.
-            Instruction *Before = CI->getPrev();
              LoadInst * LI = new LoadInst(CI->getOperand(1), "", true, CI);
              CI->replaceAllUsesWith(LI);
              BB->getInstList().erase(CI);
@@ -1538,7 +1527,6 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
            case Intrinsic::writeio: {
              // On PPC, memory operations are in-order.  Lower this intrinsic
              // into a volatile store.
-            Instruction *Before = CI->getPrev();
              StoreInst *SI = new StoreInst(CI->getOperand(1),
                                            CI->getOperand(2), true, CI);
              CI->replaceAllUsesWith(SI);
@@ -1557,7 +1545,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
            }
  }
  
-void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
+void PPC64ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
    unsigned TmpReg1, TmpReg2, TmpReg3;
    switch (ID) {
    case Intrinsic::vastart:
@@ -1620,7 +1608,7 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
  /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for
  /// Xor.
  ///
-void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
+void PPC64ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
    unsigned DestReg = getReg(B);
    MachineBasicBlock::iterator MI = BB->end();
    Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1);
@@ -1631,10 +1619,10 @@ void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
  
  /// emitBinaryFPOperation - This method handles emission of floating point
  /// Add (0), Sub (1), Mul (2), and Div (3) operations.
-void ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
-                                 MachineBasicBlock::iterator IP,
-                                 Value *Op0, Value *Op1,
-                                 unsigned OperatorClass, unsigned DestReg) {
+void PPC64ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
+                                      MachineBasicBlock::iterator IP,
+                                      Value *Op0, Value *Op1,
+                                      unsigned OperatorClass, unsigned DestReg){
  
    static const unsigned OpcodeTab[][4] = {
      { PPC::FADDS, PPC::FSUBS, PPC::FMULS, PPC::FDIVS },  // Float
@@ -1663,18 +1651,20 @@ void ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
  /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
  /// and constant expression support.
  ///
-void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB,
-                                     MachineBasicBlock::iterator IP,
-                                     Value *Op0, Value *Op1,
-                                     unsigned OperatorClass, unsigned DestReg) {
+void PPC64ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB,
+                                          MachineBasicBlock::iterator IP,
+                                          Value *Op0, Value *Op1,
+                                          unsigned OperatorClass, 
+                                          unsigned DestReg) {
    unsigned Class = getClassB(Op0->getType());
  
    // Arithmetic and Bitwise operators
    static const unsigned OpcodeTab[] = {
      PPC::ADD, PPC::SUB, PPC::AND, PPC::OR, PPC::XOR
    };
+  // FIXME: Convert this to the version from PPC32ISel
    static const unsigned ImmOpcodeTab[] = {
-    PPC::ADDI, PPC::SUBI, PPC::ANDIo, PPC::ORI, PPC::XORI
+    PPC::ADDI, PPC::ADDI, PPC::ANDIo, PPC::ORI, PPC::XORI
    };
    static const unsigned RImmOpcodeTab[] = {
      PPC::ADDI, PPC::SUBFIC, PPC::ANDIo, PPC::ORI, PPC::XORI
@@ -1778,9 +1768,9 @@ static unsigned ExactLog2(unsigned Val) {
  /// doMultiply - Emit appropriate instructions to multiply together the
  /// Values Op0 and Op1, and put the result in DestReg.
  ///
-void ISel::doMultiply(MachineBasicBlock *MBB,
-                      MachineBasicBlock::iterator IP,
-                      unsigned DestReg, Value *Op0, Value *Op1) {
+void PPC64ISel::doMultiply(MachineBasicBlock *MBB,
+                           MachineBasicBlock::iterator IP,
+                           unsigned DestReg, Value *Op0, Value *Op1) {
    unsigned Class0 = getClass(Op0->getType());
    unsigned Class1 = getClass(Op1->getType());
    
@@ -1811,9 +1801,9 @@ void ISel::doMultiply(MachineBasicBlock *MBB,
  
  /// doMultiplyConst - This method will multiply the value in Op0 by the
  /// value of the ContantInt *CI
-void ISel::doMultiplyConst(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator IP,
-                           unsigned DestReg, Value *Op0, ConstantInt *CI) {
+void PPC64ISel::doMultiplyConst(MachineBasicBlock *MBB,
+                                MachineBasicBlock::iterator IP,
+                                unsigned DestReg, Value *Op0, ConstantInt *CI) {
    unsigned Class = getClass(Op0->getType());
  
    // Mul op0, 0 ==> 0
@@ -1849,7 +1839,7 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB,
    doMultiply(MBB, IP, DestReg, Op0, CI);
  }
  
-void ISel::visitMul(BinaryOperator &I) {
+void PPC64ISel::visitMul(BinaryOperator &I) {
    unsigned ResultReg = getReg(I);
  
    Value *Op0 = I.getOperand(0);
@@ -1859,8 +1849,9 @@ void ISel::visitMul(BinaryOperator &I) {
    emitMultiply(BB, IP, Op0, Op1, ResultReg);
  }
  
-void ISel::emitMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
-                        Value *Op0, Value *Op1, unsigned DestReg) {
+void PPC64ISel::emitMultiply(MachineBasicBlock *MBB, 
+                             MachineBasicBlock::iterator IP,
+                             Value *Op0, Value *Op1, unsigned DestReg) {
    TypeClass Class = getClass(Op0->getType());
  
    switch (Class) {
@@ -1888,7 +1879,7 @@ void ISel::emitMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
  /// select the result from a different register.  Note that both of these
  /// instructions work differently for signed and unsigned operands.
  ///
-void ISel::visitDivRem(BinaryOperator &I) {
+void PPC64ISel::visitDivRem(BinaryOperator &I) {
    unsigned ResultReg = getReg(I);
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
@@ -1897,10 +1888,10 @@ void ISel::visitDivRem(BinaryOperator &I) {
                        ResultReg);
  }
  
-void ISel::emitDivRemOperation(MachineBasicBlock *BB,
-                               MachineBasicBlock::iterator IP,
-                               Value *Op0, Value *Op1, bool isDiv,
-                               unsigned ResultReg) {
+void PPC64ISel::emitDivRemOperation(MachineBasicBlock *BB,
+                                    MachineBasicBlock::iterator IP,
+                                    Value *Op0, Value *Op1, bool isDiv,
+                                    unsigned ResultReg) {
    const Type *Ty = Op0->getType();
    unsigned Class = getClass(Ty);
    switch (Class) {
@@ -1938,22 +1929,7 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
        doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args, false);
      }
      return;
-  case cLong: {
-    static Function* const Funcs[] =
-      { __moddi3Fn, __divdi3Fn, __umoddi3Fn, __udivdi3Fn };
-    unsigned Op0Reg = getReg(Op0, BB, IP);
-    unsigned Op1Reg = getReg(Op1, BB, IP);
-    unsigned NameIdx = Ty->isUnsigned()*2 + isDiv;
-    MachineInstr *TheCall =
-      BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(Funcs[NameIdx], true);
-
-    std::vector<ValueRecord> Args;
-    Args.push_back(ValueRecord(Op0Reg, Type::LongTy));
-    Args.push_back(ValueRecord(Op1Reg, Type::LongTy));
-    doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args, false);
-    return;
-  }
-  case cByte: case cShort: case cInt:
+  case cLong: case cByte: case cShort: case cInt:
      break;          // Small integrals, handled below...
    default: assert(0 && "Unknown class!");
    }
@@ -1980,25 +1956,30 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
        if (log2V != 0 && Ty->isSigned()) {
          unsigned Op0Reg = getReg(Op0, BB, IP);
          unsigned TmpReg = makeAnotherReg(Op0->getType());
+        unsigned Opcode = Class == cLong ? PPC::SRADI : PPC::SRAWI;
          
-        BuildMI(*BB, IP, PPC::SRAWI, 2, TmpReg).addReg(Op0Reg).addImm(log2V);
+        BuildMI(*BB, IP, Opcode, 2, TmpReg).addReg(Op0Reg).addImm(log2V);
          BuildMI(*BB, IP, PPC::ADDZE, 1, ResultReg).addReg(TmpReg);
          return;
        }
      }
  
+  static const unsigned DivOpcodes[] = 
+    { PPC::DIVWU, PPC::DIVW, PPC::DIVDU, PPC::DIVD };
+
    unsigned Op0Reg = getReg(Op0, BB, IP);
    unsigned Op1Reg = getReg(Op1, BB, IP);
-  unsigned Opcode = Ty->isSigned() ? PPC::DIVW : PPC::DIVWU;
+  unsigned Opcode = DivOpcodes[2*(Class == cLong) + Ty->isSigned()];
    
    if (isDiv) {
      BuildMI(*BB, IP, Opcode, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
    } else { // Remainder
      unsigned TmpReg1 = makeAnotherReg(Op0->getType());
      unsigned TmpReg2 = makeAnotherReg(Op0->getType());
+    unsigned MulOpcode = Class == cLong ? PPC::MULLD : PPC::MULLW;
      
      BuildMI(*BB, IP, Opcode, 2, TmpReg1).addReg(Op0Reg).addReg(Op1Reg);
-    BuildMI(*BB, IP, PPC::MULLW, 2, TmpReg2).addReg(TmpReg1).addReg(Op1Reg);
+    BuildMI(*BB, IP, MulOpcode, 2, TmpReg2).addReg(TmpReg1).addReg(Op1Reg);
      BuildMI(*BB, IP, PPC::SUBF, 2, ResultReg).addReg(TmpReg2).addReg(Op0Reg);
    }
  }
@@ -2009,7 +1990,7 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
  /// shift values equal to 1. Even the general case is sort of special,
  /// because the shift amount has to be in CL, not just any old register.
  ///
-void ISel::visitShiftInst(ShiftInst &I) {
+void PPC64ISel::visitShiftInst(ShiftInst &I) {
    MachineBasicBlock::iterator IP = BB->end();
    emitShiftOperation(BB, IP, I.getOperand(0), I.getOperand(1),
                       I.getOpcode() == Instruction::Shl, I.getType(),
@@ -2019,10 +2000,11 @@ void ISel::visitShiftInst(ShiftInst &I) {
  /// emitShiftOperation - Common code shared between visitShiftInst and
  /// constant expression support.
  ///
-void ISel::emitShiftOperation(MachineBasicBlock *MBB,
-                              MachineBasicBlock::iterator IP,
-                              Value *Op, Value *ShiftAmount, bool isLeftShift,
-                              const Type *ResultTy, unsigned DestReg) {
+void PPC64ISel::emitShiftOperation(MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator IP,
+                                   Value *Op, Value *ShiftAmount, 
+                                   bool isLeftShift, const Type *ResultTy, 
+                                   unsigned DestReg) {
    unsigned SrcReg = getReg (Op, MBB, IP);
    bool isSigned = ResultTy->isSigned ();
    unsigned Class = getClass (ResultTy);
@@ -2094,7 +2076,7 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB,
  /// mapping of LLVM classes to PPC load instructions, with the exception of
  /// signed byte loads, which need a sign extension following them.
  ///
-void ISel::visitLoadInst(LoadInst &I) {
+void PPC64ISel::visitLoadInst(LoadInst &I) {
    // Immediate opcodes, for reg+imm addressing
    static const unsigned ImmOpcodes[] = { 
      PPC::LBZ, PPC::LHZ, PPC::LWZ, 
@@ -2182,7 +2164,7 @@ void ISel::visitLoadInst(LoadInst &I) {
  
  /// visitStoreInst - Implement LLVM store instructions
  ///
-void ISel::visitStoreInst(StoreInst &I) {
+void PPC64ISel::visitStoreInst(StoreInst &I) {
    // Immediate opcodes, for reg+imm addressing
    static const unsigned ImmOpcodes[] = {
      PPC::STB, PPC::STH, PPC::STW, 
@@ -2237,7 +2219,7 @@ void ISel::visitStoreInst(StoreInst &I) {
  /// visitCastInst - Here we have various kinds of copying with or without sign
  /// extension going on.
  ///
-void ISel::visitCastInst(CastInst &CI) {
+void PPC64ISel::visitCastInst(CastInst &CI) {
    Value *Op = CI.getOperand(0);
  
    unsigned SrcClass = getClassB(Op->getType());
@@ -2266,10 +2248,10 @@ void ISel::visitCastInst(CastInst &CI) {
  /// emitCastOperation - Common code shared between visitCastInst and constant
  /// expression cast support.
  ///
-void ISel::emitCastOperation(MachineBasicBlock *MBB,
-                             MachineBasicBlock::iterator IP,
-                             Value *Src, const Type *DestTy,
-                             unsigned DestReg) {
+void PPC64ISel::emitCastOperation(MachineBasicBlock *MBB,
+                                  MachineBasicBlock::iterator IP,
+                                  Value *Src, const Type *DestTy,
+                                  unsigned DestReg) {
    const Type *SrcTy = Src->getType();
    unsigned SrcClass = getClassB(SrcTy);
    unsigned DestClass = getClassB(DestTy);
@@ -2312,68 +2294,60 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
    // Handle casts from integer to floating point now...
    if (DestClass == cFP32 || DestClass == cFP64) {
  
-    // Emit a library call for long to float conversion
-    if (SrcClass == cLong) {
-      std::vector<ValueRecord> Args;
-      Args.push_back(ValueRecord(SrcReg, SrcTy));
-      Function *floatFn = (DestClass == cFP32) ? __floatdisfFn : __floatdidfFn;
-      MachineInstr *TheCall =
-        BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(floatFn, true);
-      doCall(ValueRecord(DestReg, DestTy), TheCall, Args, false);
-      return;
-    }
-    
-    // Make sure we're dealing with a full 32 bits
-    unsigned TmpReg = makeAnotherReg(Type::IntTy);
-    promote32(TmpReg, ValueRecord(SrcReg, SrcTy));
-
-    SrcReg = TmpReg;
-    
      // Spill the integer to memory and reload it from there.
-    // Also spill room for a special conversion constant
-    int ConstantFrameIndex = 
-      F->getFrameInfo()->CreateStackObject(Type::DoubleTy, TM.getTargetData());
+    unsigned TmpReg = makeAnotherReg(Type::DoubleTy);
      int ValueFrameIdx =
        F->getFrameInfo()->CreateStackObject(Type::DoubleTy, TM.getTargetData());
  
-    unsigned constantHi = makeAnotherReg(Type::IntTy);
-    unsigned constantLo = makeAnotherReg(Type::IntTy);
-    unsigned ConstF = makeAnotherReg(Type::DoubleTy);
-    unsigned TempF = makeAnotherReg(Type::DoubleTy);
-    
-    if (!SrcTy->isSigned()) {
-      BuildMI(*BB, IP, PPC::LIS, 1, constantHi).addSImm(0x4330);
-      BuildMI(*BB, IP, PPC::LI, 1, constantLo).addSImm(0);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantHi), 
-                        ConstantFrameIndex);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantLo), 
-                        ConstantFrameIndex, 4);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantHi), 
-                        ValueFrameIdx);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(SrcReg), 
-                        ValueFrameIdx, 4);
-      addFrameReference(BuildMI(*BB, IP, PPC::LFD, 2, ConstF), 
-                        ConstantFrameIndex);
-      addFrameReference(BuildMI(*BB, IP, PPC::LFD, 2, TempF), ValueFrameIdx);
-      BuildMI(*BB, IP, PPC::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF);
-    } else {
-      unsigned TempLo = makeAnotherReg(Type::IntTy);
-      BuildMI(*BB, IP, PPC::LIS, 1, constantHi).addSImm(0x4330);
-      BuildMI(*BB, IP, PPC::LIS, 1, constantLo).addSImm(0x8000);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantHi), 
-                        ConstantFrameIndex);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantLo), 
-                        ConstantFrameIndex, 4);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantHi), 
-                        ValueFrameIdx);
-      BuildMI(*BB, IP, PPC::XORIS, 2, TempLo).addReg(SrcReg).addImm(0x8000);
-      addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(TempLo), 
-                        ValueFrameIdx, 4);
-      addFrameReference(BuildMI(*BB, IP, PPC::LFD, 2, ConstF), 
-                        ConstantFrameIndex);
-      addFrameReference(BuildMI(*BB, IP, PPC::LFD, 2, TempF), ValueFrameIdx);
-      BuildMI(*BB, IP, PPC::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF);
+    if (SrcClass == cLong) {
+      if (SrcTy->isSigned()) {
+        addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(SrcReg), 
+                          ValueFrameIdx);
+        addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TmpReg), 
+                          ValueFrameIdx);
+        BuildMI(*MBB, IP, PPC::FCFID, 1, DestReg).addReg(TmpReg);
+      } else {
+        unsigned Scale = getReg(ConstantFP::get(Type::DoubleTy, 0x1p32));
+        unsigned TmpHi = makeAnotherReg(Type::IntTy);
+        unsigned TmpLo = makeAnotherReg(Type::IntTy);
+        unsigned FPLow = makeAnotherReg(Type::DoubleTy);
+        unsigned FPTmpHi = makeAnotherReg(Type::DoubleTy);
+        unsigned FPTmpLo = makeAnotherReg(Type::DoubleTy);
+        int OtherFrameIdx = F->getFrameInfo()->CreateStackObject(Type::DoubleTy, 
+                                                            TM.getTargetData());
+        BuildMI(*MBB, IP, PPC::RLDICL, 3, TmpHi).addReg(SrcReg).addImm(32)
+          .addImm(32);
+        BuildMI(*MBB, IP, PPC::RLDICL, 3, TmpLo).addReg(SrcReg).addImm(0)
+          .addImm(32);
+        addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(TmpHi), 
+                          ValueFrameIdx);
+        addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(TmpLo), 
+                          OtherFrameIdx);
+        addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TmpReg), 
+                          ValueFrameIdx);
+        addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, FPLow), 
+                          OtherFrameIdx);
+        BuildMI(*MBB, IP, PPC::FCFID, 1, FPTmpHi).addReg(TmpReg);
+        BuildMI(*MBB, IP, PPC::FCFID, 1, FPTmpLo).addReg(FPLow);
+        BuildMI(*MBB, IP, PPC::FMADD, 3, DestReg).addReg(Scale).addReg(FPTmpHi)
+          .addReg(FPTmpLo);
+      }
+      return;
      }
+    
+    // FIXME: really want a promote64
+    unsigned IntTmp = makeAnotherReg(Type::IntTy);
+
+    if (SrcTy->isSigned())
+      BuildMI(*MBB, IP, PPC::EXTSW, 1, IntTmp).addReg(SrcReg);
+    else
+      BuildMI(*MBB, IP, PPC::RLDICL, 3, IntTmp).addReg(SrcReg).addImm(0)
+        .addImm(32);
+    addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(IntTmp), 
+                      ValueFrameIdx);
+    addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TmpReg), 
+                      ValueFrameIdx);
+    BuildMI(*MBB, IP, PPC::FCFID, 1, DestReg).addReg(TmpReg);
      return;
    }
  
@@ -2651,7 +2625,7 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
  
  /// visitVANextInst - Implement the va_next instruction...
  ///
-void ISel::visitVANextInst(VANextInst &I) {
+void PPC64ISel::visitVANextInst(VANextInst &I) {
    unsigned VAList = getReg(I.getOperand(0));
    unsigned DestReg = getReg(I);
  
@@ -2677,7 +2651,7 @@ void ISel::visitVANextInst(VANextInst &I) {
    BuildMI(BB, PPC::ADDI, 2, DestReg).addReg(VAList).addSImm(Size);
  }
  
-void ISel::visitVAArgInst(VAArgInst &I) {
+void PPC64ISel::visitVAArgInst(VAArgInst &I) {
    unsigned VAList = getReg(I.getOperand(0));
    unsigned DestReg = getReg(I);
  
@@ -2706,7 +2680,7 @@ void ISel::visitVAArgInst(VAArgInst &I) {
  
  /// visitGetElementPtrInst - instruction-select GEP instructions
  ///
-void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
+void PPC64ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
    if (canFoldGEPIntoLoadOrStore(&I))
      return;
  
@@ -2718,12 +2692,12 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
  /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
  /// constant expression GEP support.
  ///
-void ISel::emitGEPOperation(MachineBasicBlock *MBB,
-                            MachineBasicBlock::iterator IP,
-                            Value *Src, User::op_iterator IdxBegin,
-                            User::op_iterator IdxEnd, unsigned TargetReg,
-                            bool GEPIsFolded, ConstantSInt **RemainderPtr,
-                            unsigned *PendingAddReg) {
+void PPC64ISel::emitGEPOperation(MachineBasicBlock *MBB,
+                                 MachineBasicBlock::iterator IP,
+                                 Value *Src, User::op_iterator IdxBegin,
+                                 User::op_iterator IdxEnd, unsigned TargetReg,
+                                 bool GEPIsFolded, ConstantSInt **RemainderPtr,
+                                 unsigned *PendingAddReg) {
    const TargetData &TD = TM.getTargetData();
    const Type *Ty = Src->getType();
    unsigned basePtrReg = getReg(Src, MBB, IP);
@@ -2889,7 +2863,7 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
  /// visitAllocaInst - If this is a fixed size alloca, allocate space from the
  /// frame manager, otherwise do it the hard way.
  ///
-void ISel::visitAllocaInst(AllocaInst &I) {
+void PPC64ISel::visitAllocaInst(AllocaInst &I) {
    // If this is a fixed size alloca in the entry block for the function, we
    // statically stack allocate the space, so we don't need to do anything here.
    //
@@ -2932,7 +2906,7 @@ void ISel::visitAllocaInst(AllocaInst &I) {
  /// visitMallocInst - Malloc instructions are code generated into direct calls
  /// to the library malloc.
  ///
-void ISel::visitMallocInst(MallocInst &I) {
+void PPC64ISel::visitMallocInst(MallocInst &I) {
    unsigned AllocSize = TM.getTargetData().getTypeSize(I.getAllocatedType());
    unsigned Arg;
  
@@ -2956,7 +2930,7 @@ void ISel::visitMallocInst(MallocInst &I) {
  /// visitFreeInst - Free instructions are code gen'd to call the free libc
  /// function.
  ///
-void ISel::visitFreeInst(FreeInst &I) {
+void PPC64ISel::visitFreeInst(FreeInst &I) {
    std::vector<ValueRecord> Args;
    Args.push_back(ValueRecord(I.getOperand(0)));
    MachineInstr *TheCall = 
@@ -2968,5 +2942,5 @@ void ISel::visitFreeInst(FreeInst &I) {
  /// code representation is a very simple peep-hole fashion.
  ///
  FunctionPass *llvm::createPPC64ISelSimple(TargetMachine &TM) {
-  return new ISel(TM);
+  return new PPC64ISel(TM);
  }