X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCFastISel.cpp;h=2e524d604789853449a1d1816d31632d156157e6;hb=1bb9c8155ad075e5f78665470a1e7636d3b27d52;hp=a308adebb51479e8739a643d2db1164c7acaf334;hpb=7248968fa529726b44d41bd25403d50c74db4bc4;p=oota-llvm.git diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index a308adebb51..2e524d60478 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,12 +13,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppcfastisel" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCISelLowering.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" -#include "MCTargetDesc/PPCPredicates.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" @@ -28,17 +27,38 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +//===----------------------------------------------------------------------===// +// +// TBD: +// FastLowerArguments: Handle simple cases. +// PPCMaterializeGV: Handle TLS. +// SelectCall: Handle function pointers. +// SelectCall: Handle multi-register return values. +// SelectCall: Optimize away nops for local calls. +// processCallArgs: Handle bit-converted arguments. +// finishCall: Handle multi-register return values. +// PPCComputeAddress: Handle parameter references as FrameIndex's. +// PPCEmitCmp: Handle immediate as operand 1. +// SelectCall: Handle small byval arguments. +// SelectIntrinsicCall: Implement. +// SelectSelect: Implement. +// Consider factoring isTypeLegal into the base class. +// Implement switches and jump tables. +// +//===----------------------------------------------------------------------===// using namespace llvm; +#define DEBUG_TYPE "ppcfastisel" + namespace { typedef struct Address { @@ -61,12 +81,12 @@ typedef struct Address { } } Address; -class PPCFastISel : public FastISel { +class PPCFastISel final : public FastISel { const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; - const PPCSubtarget &PPCSubTarget; + const PPCSubtarget *PPCSubTarget; LLVMContext *Context; public: @@ -76,31 +96,29 @@ class PPCFastISel : public FastISel { TM(FuncInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - PPCSubTarget( - *((static_cast(&TM))->getSubtargetImpl()) - ), + PPCSubTarget(&TM.getSubtarget()), Context(&FuncInfo.Fn->getContext()) { } // Backend specific FastISel code. private: - virtual bool TargetSelectInstruction(const Instruction *I); - virtual unsigned TargetMaterializeConstant(const Constant *C); - virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); - virtual bool FastLowerArguments(); - virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); - virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill); - virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill); + bool TargetSelectInstruction(const Instruction *I) override; + unsigned TargetMaterializeConstant(const Constant *C) override; + unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) override; + bool FastLowerArguments() override; + unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; + unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); // Instruction selection routines. private: @@ -108,8 +126,14 @@ class PPCFastISel : public FastISel { bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectIToFP(const Instruction *I, bool IsSigned); + bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); + bool SelectCall(const Instruction *I); bool SelectRet(const Instruction *I); + bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); // Utility routines. @@ -134,9 +158,23 @@ class PPCFastISel : public FastISel { const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC); + unsigned PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned); + unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned); // Call handling routines. private: + bool processCallArgs(SmallVectorImpl &Args, + SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgVTs, + SmallVectorImpl &ArgFlags, + SmallVectorImpl &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg); + void finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes, bool IsVarArg); CCAssignFn *usePPC32CCs(unsigned Flag); private: @@ -243,7 +281,7 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // Given a value Obj, create an Address object Addr that represents its // address. Return false if we can't handle it. bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from @@ -285,24 +323,19 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { II != IE; ++II, ++GTI) { const Value *Op = *II; if (StructType *STy = dyn_cast(*GTI)) { - const StructLayout *SL = TD.getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); for (;;) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; break; } - if (isa(Op) && - (!isa(Op) || - FuncInfo.MBBMap[cast(Op)->getParent()] - == FuncInfo.MBB) && - isa(cast(Op)->getOperand(1))) { - // An add (in the same block) with a constant operand. Fold the - // constant. + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast(cast(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; @@ -372,7 +405,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, // register and continue. This should almost never happen. if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); Addr.Base.Reg = ResultReg; Addr.BaseType = Address::RegBase; @@ -464,13 +497,13 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); // Base reg with offset in range. } else if (UseOffset) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. @@ -494,7 +527,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LFS: Opc = PPC::LFSX; break; case PPC::LFD: Opc = PPC::LFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Addr.Base.Reg).addReg(IndexReg); } @@ -522,7 +555,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { // to constrain RA from using R0/X0 when this is not legal. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = - AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) @@ -580,18 +613,19 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg) - .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg) + .addImm(Addr.Offset) + .addFrameIndex(Addr.Base.FI) + .addMemOperand(MMO); // Base reg with offset in range. - } else if (UseOffset) { - if (Addr.Offset == 0 && Opc == PPC::STW8) - dbgs() << "Possible problem here.\n"; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + } else if (UseOffset) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. - } else { + else { // Get the RR opcode corresponding to the RI one. FIXME: It would be // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it // is hard to get at. @@ -607,7 +641,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STFS: Opc = PPC::STFSX; break; case PPC::STFD: Opc = PPC::STFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); } @@ -671,9 +705,9 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { CondReg)) return false; - BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC)) + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) .addImm(PPCPred).addReg(CondReg).addMBB(TBB); - FastEmitBranch(FBB, DL); + FastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -681,7 +715,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DL); + FastEmitBranch(Target, DbgLoc); return true; } @@ -704,6 +738,9 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return false; MVT SrcVT = SrcEVT.getSimpleVT(); + if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits()) + return false; + // See if operand 2 is an immediate encodeable in the compare. // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are @@ -778,15 +815,275 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, } if (!UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addReg(SrcReg2); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addImm(Imm); return true; } +// Attempt to fast-select a floating-point extend instruction. +bool PPCFastISel::SelectFPExt(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f32 || DestVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // No code is generated for a FP extend. + UpdateValueMap(I, SrcReg); + return true; +} + +// Attempt to fast-select a floating-point truncate instruction. +bool PPCFastISel::SelectFPTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f64 || DestVT != MVT::f32) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // Round the result to single precision. + unsigned DestReg = createResultReg(&PPC::F4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) + .addReg(SrcReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move an i32 or i64 value in a GPR to an f64 value in an FPR. +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte +// stack slot and 4-byte store/load sequence. Or just sext the 4-byte +// case to 8 bytes which produces tighter code but wastes stack space. +unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, + bool IsSigned) { + + // If necessary, extend 32-bit int to 64-bit. + if (SrcVT == MVT::i32) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return 0; + SrcReg = TmpReg; + } + + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the GPR. + if (!PPCEmitStore(MVT::i64, SrcReg, Addr)) + return 0; + + // Load the integer value into an FPR. The kind of load used depends + // on a number of conditions. + unsigned LoadOpc = PPC::LFD; + + if (SrcVT == MVT::i32) { + if (!IsSigned) { + LoadOpc = PPC::LFIWZX; + Addr.Offset = 4; + } else if (PPCSubTarget->hasLFIWAX()) { + LoadOpc = PPC::LFIWAX; + Addr.Offset = 4; + } + } + + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned ResultReg = 0; + if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select an integer-to-floating-point conversion. +bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { + MVT DstVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::f32 && DstVT != MVT::f64) + return false; + + Value *Src = I->getOperand(0); + EVT SrcEVT = TLI.getValueType(Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && + SrcVT != MVT::i32 && SrcVT != MVT::i64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // We can only lower an unsigned convert if we have the newer + // floating-point conversion operations. + if (!IsSigned && !PPCSubTarget->hasFPCVT()) + return false; + + // FIXME: For now we require the newer floating-point conversion operations + // (which are present only on P7 and A2 server models) when converting + // to single-precision float. Otherwise we have to generate a lot of + // fiddly code to avoid double rounding. If necessary, the fiddly code + // can be found in PPCTargetLowering::LowerINT_TO_FP(). + if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT()) + return false; + + // Extend the input if necessary. + if (SrcVT == MVT::i8 || SrcVT == MVT::i16) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return false; + SrcVT = MVT::i64; + SrcReg = TmpReg; + } + + // Move the integer value to an FPR. + unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned); + if (FPReg == 0) + return false; + + // Determine the opcode for the conversion. + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned DestReg = createResultReg(RC); + unsigned Opc; + + if (DstVT == MVT::f32) + Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS; + else + Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(FPReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move the floating-point value in SrcReg into an integer destination +// register, and return the register (or zero if we can't handle it). +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned) { + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + // Note that if have STFIWX available, we could use a 4-byte stack + // slot for i32, but this being fast-isel we'll just go with the + // easiest code gen possible. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the FPR. + if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) + return 0; + + // Reload it into a GPR. If we want an i32, modify the address + // to have a 4-byte offset so we load from the right place. + if (VT == MVT::i32) + Addr.Offset = 4; + + // Look at the currently assigned register for this instruction + // to determine the required register class. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select a floating-point-to-integer conversion. +bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { + MVT DstVT, SrcVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::i32 && DstVT != MVT::i64) + return false; + + // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. + if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT()) + return false; + + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + if (!isTypeLegal(SrcTy, SrcVT)) + return false; + + if (SrcVT != MVT::f32 && SrcVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // Convert f32 to f64 if necessary. This is just a meaningless copy + // to get the register class right. COPY_TO_REGCLASS is needed since + // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream. + const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); + if (InRC == &PPC::F4RCRegClass) { + unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) + .addReg(SrcReg).addImm(PPC::F8RCRegClassID); + SrcReg = TmpReg; + } + + // Determine the opcode for the conversion, which takes place + // entirely within FPRs. + unsigned DestReg = createResultReg(&PPC::F8RCRegClass); + unsigned Opc; + + if (DstVT == MVT::i32) + if (IsSigned) + Opc = PPC::FCTIWZ; + else + Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + else + Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Now move the integer value from a float register to an integer register. + unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); + if (IntReg == 0) + return false; + + UpdateValueMap(I, IntReg); + return true; +} + // Attempt to fast-select a binary integer operation that isn't already // handled automatically. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { @@ -868,8 +1165,10 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } if (UseImm) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) - .addReg(SrcReg1).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg) + .addReg(SrcReg1) + .addImm(Imm); UpdateValueMap(I, ResultReg); return true; } @@ -884,12 +1183,342 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { if (ISDOpcode == ISD::SUB) std::swap(SrcReg1, SrcReg2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2); UpdateValueMap(I, ResultReg); return true; } +// Handle arguments to a call that we're attempting to fast-select. +// Return false if the arguments are too complex for us at the moment. +bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, + SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgVTs, + SmallVectorImpl &ArgFlags, + SmallVectorImpl &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg) { + SmallVector ArgLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + + // Reserve space for the linkage area on the stack. + bool isELFv2ABI = PPCSubTarget->isELFv2ABI(); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, + isELFv2ABI); + CCInfo.AllocateStack(LinkageSize, 8); + + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); + + // Bail out if we can't handle any of the arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Skip vector arguments for now, as well as long double and + // uint128_t, and anything that isn't passed in a register. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || + !VA.isRegLoc() || VA.needsCustom()) + return false; + + // Skip bit-converted arguments for now. + if (VA.getLocInfo() == CCValAssign::BCvt) + return false; + } + + // Get a count of how many bytes are to be pushed onto the stack. + NumBytes = CCInfo.getNextStackOffset(); + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. + NumBytes = std::max(NumBytes, LinkageSize + 64); + + // Issue CALLSEQ_START. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TII.getCallFrameSetupOpcode())) + .addImm(NumBytes); + + // Prepare to assign register arguments. Every argument uses up a + // GPR protocol register even if it's passed in a floating-point + // register. + unsigned NextGPR = PPC::X3; + unsigned NextFPR = PPC::F1; + + // Process arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + unsigned Arg = ArgRegs[VA.getValNo()]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Handle argument promotion and bitcasts. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false)) + llvm_unreachable("Failed to emit a sext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::AExt: + case CCValAssign::ZExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true)) + llvm_unreachable("Failed to emit a zext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::BCvt: { + // FIXME: Not yet handled. + llvm_unreachable("Should have bailed before getting here!"); + break; + } + } + + // Copy this argument to the appropriate register. + unsigned ArgReg; + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { + ArgReg = NextFPR++; + ++NextGPR; + } else + ArgReg = NextGPR++; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg); + RegArgs.push_back(ArgReg); + } + + return true; +} + +// For a call that we've determined we can fast-select, finish the +// call sequence and generate a copy to obtain the return value (if any). +void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes, bool IsVarArg) { + // Issue CallSEQ_END. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TII.getCallFrameDestroyOpcode())) + .addImm(NumBytes).addImm(0); + + // Next, generate a copy to obtain the return value. + // FIXME: No multi-register return values yet, though I don't foresee + // any real difficulties there. + if (RetVT != MVT::isVoid) { + SmallVector RVLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + CCValAssign &VA = RVLocs[0]; + assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); + assert(VA.isRegLoc() && "Can only return in registers!"); + + MVT DestVT = VA.getValVT(); + MVT CopyVT = DestVT; + + // Ints smaller than a register still arrive in a full 64-bit + // register, so make sure we recognize this. + if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) + CopyVT = MVT::i64; + + unsigned SourcePhysReg = VA.getLocReg(); + unsigned ResultReg = 0; + + if (RetVT == CopyVT) { + const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); + ResultReg = createResultReg(CpyRC); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + + // If necessary, round the floating result to single precision. + } else if (CopyVT == MVT::f64) { + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), + ResultReg).addReg(SourcePhysReg); + + // If only the low half of a general register is needed, generate + // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be + // used along the fast-isel path (not lowered), and downstream logic + // also doesn't like a direct subreg copy on a physical reg.) + } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) { + ResultReg = createResultReg(&PPC::GPRCRegClass); + // Convert physical register from G8RC to GPRC. + SourcePhysReg -= PPC::X0 - PPC::R0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + } + + assert(ResultReg && "ResultReg unset!"); + UsedRegs.push_back(SourcePhysReg); + UpdateValueMap(I, ResultReg); + } +} + +// Attempt to fast-select a call instruction. +bool PPCFastISel::SelectCall(const Instruction *I) { + const CallInst *CI = cast(I); + const Value *Callee = CI->getCalledValue(); + + // Can't handle inline asm. + if (isa(Callee)) + return false; + + // Allow SelectionDAG isel to handle tail calls. + if (CI->isTailCall()) + return false; + + // Obtain calling convention. + ImmutableCallSite CS(CI); + CallingConv::ID CC = CS.getCallingConv(); + + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + bool IsVarArg = FTy->isVarArg(); + + // Not ready for varargs yet. + if (IsVarArg) + return false; + + // Handle simple calls for now, with legal return types and + // those that can be extended. + Type *RetTy = I->getType(); + MVT RetVT; + if (RetTy->isVoidTy()) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && + RetVT != MVT::i8) + return false; + + // FIXME: No multi-register return values yet. + if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && + RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && + RetVT != MVT::f64) { + SmallVector RVLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + if (RVLocs.size() > 1) + return false; + } + + // Bail early if more than 8 arguments, as we only currently + // handle arguments passed in registers. + unsigned NumArgs = CS.arg_size(); + if (NumArgs > 8) + return false; + + // Set up the argument vectors. + SmallVector Args; + SmallVector ArgRegs; + SmallVector ArgVTs; + SmallVector ArgFlags; + + Args.reserve(NumArgs); + ArgRegs.reserve(NumArgs); + ArgVTs.reserve(NumArgs); + ArgFlags.reserve(NumArgs); + + for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); + II != IE; ++II) { + // FIXME: ARM does something for intrinsic calls here, check into that. + + unsigned AttrIdx = II - CS.arg_begin() + 1; + + // Only handle easy calls for now. It would be reasonably easy + // to handle <= 8-byte structures passed ByVal in registers, but we + // have to ensure they are right-justified in the register. + if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || + CS.paramHasAttr(AttrIdx, Attribute::StructRet) || + CS.paramHasAttr(AttrIdx, Attribute::Nest) || + CS.paramHasAttr(AttrIdx, Attribute::ByVal)) + return false; + + ISD::ArgFlagsTy Flags; + if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) + Flags.setSExt(); + if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) + Flags.setZExt(); + + Type *ArgTy = (*II)->getType(); + MVT ArgVT; + if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) + return false; + + if (ArgVT.isVector()) + return false; + + unsigned Arg = getRegForValue(*II); + if (Arg == 0) + return false; + + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + Flags.setOrigAlign(OriginalAlignment); + + Args.push_back(*II); + ArgRegs.push_back(Arg); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Process the arguments. + SmallVector RegArgs; + unsigned NumBytes; + + if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, + RegArgs, CC, NumBytes, IsVarArg)) + return false; + + // FIXME: No handling for function pointers yet. This requires + // implementing the function descriptor (OPD) setup. + const GlobalValue *GV = dyn_cast(Callee); + if (!GV) + return false; + + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + + // Add implicit physical register uses to the call. + for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) + MIB.addReg(RegArgs[II], RegState::Implicit); + + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (PPCSubTarget->isELFv2ABI()) + MIB.addReg(PPC::X2, RegState::Implicit); + + // Add a register mask with the call-preserved registers. Proper + // defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + + // Finish off the call including any return values. + SmallVector UsedRegs; + finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); + + // Set all unused physregs defs as dead. + static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + + return true; +} + // Attempt to fast-select a return instruction. bool PPCFastISel::SelectRet(const Instruction *I) { @@ -924,8 +1553,8 @@ bool PPCFastISel::SelectRet(const Instruction *I) { const Constant *C = cast(RV); unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); unsigned RetReg = ValLocs[0].getLocReg(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - RetReg).addReg(SrcReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); RetRegs.push_back(RetReg); } else { @@ -980,14 +1609,14 @@ bool PPCFastISel::SelectRet(const Instruction *I) { } } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetRegs[i]) .addReg(SrcReg); } } } - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BLR)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) @@ -1017,7 +1646,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); Opc = PPC::EXTSW_32_64; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Unsigned 32-bit extensions use RLWINM. @@ -1029,7 +1658,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); MB = 16; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); @@ -1042,7 +1671,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, MB = 48; else MB = 32; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICL_32_64), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); } @@ -1056,9 +1685,9 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { if (AddrReg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8)) .addReg(AddrReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); const IndirectBrInst *IB = cast(I); for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) @@ -1067,6 +1696,35 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { return true; } +// Attempt to fast-select an integer truncate instruction. +bool PPCFastISel::SelectTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) + return false; + + if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // The only interesting case is when we need to switch register classes. + if (SrcVT == MVT::i64) { + unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), + ResultReg).addReg(SrcReg, 0, PPC::sub_32); + SrcReg = ResultReg; + } + + UpdateValueMap(I, SrcReg); + return true; +} + // Attempt to fast-select an integer extend instruction. bool PPCFastISel::SelectIntExt(const Instruction *I) { Type *DestTy = I->getType(); @@ -1119,14 +1777,32 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { return SelectBranch(I); case Instruction::IndirectBr: return SelectIndirectBr(I); + case Instruction::FPExt: + return SelectFPExt(I); + case Instruction::FPTrunc: + return SelectFPTrunc(I); + case Instruction::SIToFP: + return SelectIToFP(I, /*IsSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*IsSigned*/ false); + case Instruction::FPToSI: + return SelectFPToI(I, /*IsSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*IsSigned*/ false); case Instruction::Add: return SelectBinaryIntOp(I, ISD::ADD); case Instruction::Or: return SelectBinaryIntOp(I, ISD::OR); case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); + case Instruction::Call: + if (dyn_cast(I)) + return false; + return SelectCall(I); case Instruction::Ret: return SelectRet(I); + case Instruction::Trunc: + return SelectTrunc(I); case Instruction::ZExt: case Instruction::SExt: return SelectIntExt(I); @@ -1147,7 +1823,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { return 0; // All FP constants are loaded from the constant pool. - unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); + unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); assert(Align > 0 && "Unexpectedly missing alignment information!"); unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Align); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -1163,19 +1839,28 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), TmpReg) .addConstantPoolIndex(Idx).addReg(PPC::X2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg).addMemOperand(MMO); } else { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) - .addReg(TmpReg) - .addMemOperand(MMO); + // But for large code model, we must generate a LDtocL followed + // by the LF[SD]. + if (CModel == CodeModel::Large) { + unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), + TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addImm(0).addReg(TmpReg2); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) + .addReg(TmpReg) + .addMemOperand(MMO); } return DestReg; @@ -1196,26 +1881,20 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // FIXME: Jump tables are not yet required because fast-isel doesn't // handle switches; if that changes, we need them as well. For now, // what follows assumes everything's a generic (or TLS) global address. - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias, use the aliasee for determining thread-locality. - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal(false)); - assert((GVar || isa(GV)) && "Unexpected GV subclass!"); - } // FIXME: We don't yet handle the complexity of TLS. - bool IsTLS = GVar && GVar->isThreadLocal(); - if (IsTLS) + if (GV->isThreadLocal()) return 0; // For small code model, generate a simple TOC load. if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg) - .addGlobalAddress(GV).addReg(PPC::X2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), + DestReg) + .addGlobalAddress(GV) + .addReg(PPC::X2); else { - // If the address is an externally defined symbol, a symbol with - // common or externally available linkage, a function address, or a + // If the address is an externally defined symbol, a symbol with common + // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: // LDtocL(GV, ADDIStocHA(%X2, GV)) @@ -1223,20 +1902,21 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // ADDItocL(ADDIStocHA(%X2, GV), GV) // Either way, start with the ADDIStocHA: unsigned HighPartReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - // !GVar implies a function address. An external variable is one - // without an initializer. // If/when switches are implemented, jump tables should be handled // on the "if" path here. - if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() || - GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage()) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + if (CModel == CodeModel::Large || + (GV->getType()->getElementType()->isFunctionTy() && + (GV->isDeclaration() || GV->isWeakForLinker())) || + GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); else // Otherwise generate the ADDItocL. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), DestReg).addReg(HighPartReg).addGlobalAddress(GV); } @@ -1254,21 +1934,21 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm, bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); if (isInt<16>(Imm)) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg) .addImm(Imm); else if (Lo) { // Both Lo and Hi have nonzero bits. unsigned TmpReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg) .addImm(Hi); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg) .addReg(TmpReg).addImm(Lo); } else // Just Hi bits. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg) .addImm(Hi); @@ -1308,7 +1988,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned TmpReg2; if (Imm) { TmpReg2 = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR), TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift); } else TmpReg2 = TmpReg1; @@ -1316,14 +1996,14 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned TmpReg3, Hi, Lo; if ((Hi = (Remainder >> 16) & 0xFFFF)) { TmpReg3 = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8), TmpReg3).addReg(TmpReg2).addImm(Hi); } else TmpReg3 = TmpReg2; if ((Lo = Remainder & 0xFFFF)) { unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8), ResultReg).addReg(TmpReg3).addImm(Lo); return ResultReg; } @@ -1335,6 +2015,15 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + const ConstantInt *CI = cast(C); + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) @@ -1348,7 +2037,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { if (isInt<16>(CI->getSExtValue())) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(CI->getSExtValue()); return ImmReg; } @@ -1379,15 +2068,30 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { return PPCMaterializeGV(GV, VT); else if (isa(C)) return PPCMaterializeInt(C, VT); - // TBD: Global values. return 0; } // Materialize the address created by an alloca into a register, and -// return the register number (or zero if we failed to handle it). TBD. +// return the register number (or zero if we failed to handle it). unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { - return AI && 0; + // Don't handle dynamic allocas. + if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; + + MVT VT; + if (!isLoadTypeLegal(AI->getType(), VT)) return 0; + + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), + ResultReg).addFrameIndex(SI->second).addImm(0); + return ResultReg; + } + + return 0; } // Fold loads into extends when possible. @@ -1461,7 +2165,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned ResultReg = MI->getOperand(0).getReg(); - if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt)) + if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt)) return false; MI->eraseFromParent(); @@ -1485,6 +2189,15 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { if (Opc != ISD::Constant) return 0; + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; @@ -1564,6 +2277,6 @@ namespace llvm { if (Subtarget->isPPC64() && Subtarget->isSVR4ABI()) return new PPCFastISel(FuncInfo, LibInfo); - return 0; + return nullptr; } }