#define DEBUG_TYPE "ppcfastisel"
#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCISelLowering.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+//===----------------------------------------------------------------------===//
+//
+// TBD:
+// FastLowerArguments: Handle simple cases.
+// PPCMaterializeGV: Handle TLS.
+// SelectCall: Handle function pointers.
+// SelectCall: Handle multi-register return values.
+// SelectCall: Optimize away nops for local calls.
+// processCallArgs: Handle bit-converted arguments.
+// finishCall: Handle multi-register return values.
+// PPCComputeAddress: Handle parameter references as FrameIndex's.
+// PPCEmitCmp: Handle immediate as operand 1.
+// SelectCall: Handle small byval arguments.
+// SelectIntrinsicCall: Implement.
+// SelectSelect: Implement.
+// Consider factoring isTypeLegal into the base class.
+// Implement switches and jump tables.
+//
+//===----------------------------------------------------------------------===//
using namespace llvm;
namespace {
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectIToFP(const Instruction *I, bool IsSigned);
+ bool SelectFPToI(const Instruction *I, bool IsSigned);
bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectCall(const Instruction *I);
bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
// Utility routines.
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC);
+ unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned);
+ unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
// Call handling routines.
private:
+ bool processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg);
+ void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg);
CCAssignFn *usePPC32CCs(unsigned Flag);
private:
TmpOffset += CI->getSExtValue() * S;
break;
}
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
return true;
}
+// Attempt to fast-select a floating-point extend instruction.
+bool PPCFastISel::SelectFPExt(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // No code is generated for a FP extend.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // Round the result to single precision.
+ unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
+ .addReg(SrcReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
+// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
+// case to 8 bytes which produces tighter code but wastes stack space.
+unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
+ bool IsSigned) {
+
+ // If necessary, extend 32-bit int to 64-bit.
+ if (SrcVT == MVT::i32) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return 0;
+ SrcReg = TmpReg;
+ }
+
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the GPR.
+ if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
+ return 0;
+
+ // Load the integer value into an FPR. The kind of load used depends
+ // on a number of conditions.
+ unsigned LoadOpc = PPC::LFD;
+
+ if (SrcVT == MVT::i32) {
+ Addr.Offset = 4;
+ if (!IsSigned)
+ LoadOpc = PPC::LFIWZX;
+ else if (PPCSubTarget.hasLFIWAX())
+ LoadOpc = PPC::LFIWAX;
+ }
+
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select an integer-to-floating-point conversion.
+bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
+ MVT DstVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::f32 && DstVT != MVT::f64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
+ SrcVT != MVT::i32 && SrcVT != MVT::i64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // We can only lower an unsigned convert if we have the newer
+ // floating-point conversion operations.
+ if (!IsSigned && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // FIXME: For now we require the newer floating-point conversion operations
+ // (which are present only on P7 and A2 server models) when converting
+ // to single-precision float. Otherwise we have to generate a lot of
+ // fiddly code to avoid double rounding. If necessary, the fiddly code
+ // can be found in PPCTargetLowering::LowerINT_TO_FP().
+ if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // Extend the input if necessary.
+ if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return false;
+ SrcVT = MVT::i64;
+ SrcReg = TmpReg;
+ }
+
+ // Move the integer value to an FPR.
+ unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
+ if (FPReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion.
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned Opc;
+
+ if (DstVT == MVT::f32)
+ Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
+ else
+ Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(FPReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move the floating-point value in SrcReg into an integer destination
+// register, and return the register (or zero if we can't handle it).
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned) {
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ // Note that if have STFIWX available, we could use a 4-byte stack
+ // slot for i32, but this being fast-isel we'll just go with the
+ // easiest code gen possible.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the FPR.
+ if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
+ return 0;
+
+ // Reload it into a GPR. If we want an i32, modify the address
+ // to have a 4-byte offset so we load from the right place.
+ if (VT == MVT::i32)
+ Addr.Offset = 4;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
+ MVT DstVT, SrcVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32 && DstVT != MVT::i64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Convert f32 to f64 if necessary. This is just a meaningless copy
+ // to get the register class right. COPY_TO_REGCLASS is needed since
+ // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
+ const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
+ if (InRC == &PPC::F4RCRegClass) {
+ unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
+ .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
+ SrcReg = TmpReg;
+ }
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+ unsigned Opc;
+
+ if (DstVT == MVT::i32)
+ if (IsSigned)
+ Opc = PPC::FCTIWZ;
+ else
+ Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ else
+ Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Now move the integer value from a float register to an integer register.
+ unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ if (IntReg == 0)
+ return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
// Attempt to fast-select a binary integer operation that isn't already
// handled automatically.
bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
return true;
}
+// Handle arguments to a call that we're attempting to fast-select.
+// Return false if the arguments are too complex for us at the moment.
+bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
+
+ // Bail out if we can't handle any of the arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Skip vector arguments for now, as well as long double and
+ // uint128_t, and anything that isn't passed in a register.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
+ !VA.isRegLoc() || VA.needsCustom())
+ return false;
+
+ // Skip bit-converted arguments for now.
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ return false;
+ }
+
+ // Get a count of how many bytes are to be pushed onto the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameSetupOpcode()))
+ .addImm(NumBytes);
+
+ // Prepare to assign register arguments. Every argument uses up a
+ // GPR protocol register even if it's passed in a floating-point
+ // register.
+ unsigned NextGPR = PPC::X3;
+ unsigned NextFPR = PPC::F1;
+
+ // Process arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Handle argument promotion and bitcasts.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
+ llvm_unreachable("Failed to emit a sext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
+ llvm_unreachable("Failed to emit a zext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ // FIXME: Not yet handled.
+ llvm_unreachable("Should have bailed before getting here!");
+ break;
+ }
+ }
+
+ // Copy this argument to the appropriate register.
+ unsigned ArgReg;
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
+ ArgReg = NextFPR++;
+ ++NextGPR;
+ } else
+ ArgReg = NextGPR++;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ArgReg).addReg(Arg);
+ RegArgs.push_back(ArgReg);
+ }
+
+ return true;
+}
+
+// For a call that we've determined we can fast-select, finish the
+// call sequence and generate a copy to obtain the return value (if any).
+void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg) {
+ // Issue CallSEQ_END.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameDestroyOpcode()))
+ .addImm(NumBytes).addImm(0);
+
+ // Next, generate a copy to obtain the return value.
+ // FIXME: No multi-register return values yet, though I don't foresee
+ // any real difficulties there.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ CCValAssign &VA = RVLocs[0];
+ assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ MVT DestVT = VA.getValVT();
+ MVT CopyVT = DestVT;
+
+ // Ints smaller than a register still arrive in a full 64-bit
+ // register, so make sure we recognize this.
+ if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
+ CopyVT = MVT::i64;
+
+ unsigned SourcePhysReg = VA.getLocReg();
+ unsigned ResultReg = 0;
+
+ if (RetVT == CopyVT) {
+ const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
+ ResultReg = createResultReg(CpyRC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+
+ // If necessary, round the floating result to single precision.
+ } else if (CopyVT == MVT::f64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
+ ResultReg).addReg(SourcePhysReg);
+
+ // If only the low half of a general register is needed, generate
+ // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
+ // used along the fast-isel path (not lowered), and downstream logic
+ // also doesn't like a direct subreg copy on a physical reg.)
+ } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
+ ResultReg = createResultReg(&PPC::GPRCRegClass);
+ // Convert physical register from G8RC to GPRC.
+ SourcePhysReg -= PPC::X0 - PPC::R0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+ }
+
+ assert(ResultReg && "ResultReg unset!");
+ UsedRegs.push_back(SourcePhysReg);
+ UpdateValueMap(I, ResultReg);
+ }
+}
+
+// Attempt to fast-select a call instruction.
+bool PPCFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall())
+ return false;
+
+ // Obtain calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool IsVarArg = FTy->isVarArg();
+
+ // Not ready for varargs yet.
+ if (IsVarArg)
+ return false;
+
+ // Handle simple calls for now, with legal return types and
+ // those that can be extended.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8)
+ return false;
+
+ // FIXME: No multi-register return values yet.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
+ RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
+ RetVT != MVT::f64) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ if (RVLocs.size() > 1)
+ return false;
+ }
+
+ // Bail early if more than 8 arguments, as we only currently
+ // handle arguments passed in registers.
+ unsigned NumArgs = CS.arg_size();
+ if (NumArgs > 8)
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+
+ Args.reserve(NumArgs);
+ ArgRegs.reserve(NumArgs);
+ ArgVTs.reserve(NumArgs);
+ ArgFlags.reserve(NumArgs);
+
+ for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
+ II != IE; ++II) {
+ // FIXME: ARM does something for intrinsic calls here, check into that.
+
+ unsigned AttrIdx = II - CS.arg_begin() + 1;
+
+ // Only handle easy calls for now. It would be reasonably easy
+ // to handle <= 8-byte structures passed ByVal in registers, but we
+ // have to ensure they are right-justified in the register.
+ if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
+ CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
+ CS.paramHasAttr(AttrIdx, Attribute::ByVal))
+ return false;
+
+ ISD::ArgFlagsTy Flags;
+ if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
+ Flags.setZExt();
+
+ Type *ArgTy = (*II)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
+ return false;
+
+ if (ArgVT.isVector())
+ return false;
+
+ unsigned Arg = getRegForValue(*II);
+ if (Arg == 0)
+ return false;
+
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*II);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Process the arguments.
+ SmallVector<unsigned, 8> RegArgs;
+ unsigned NumBytes;
+
+ if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, IsVarArg))
+ return false;
+
+ // FIXME: No handling for function pointers yet. This requires
+ // implementing the function descriptor (OPD) setup.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV)
+ return false;
+
+ // Build direct call with NOP for TOC restore.
+ // FIXME: We can and should optimize away the NOP for local calls.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::BL8_NOP));
+ // Add callee.
+ MIB.addGlobalAddress(GV);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
+ MIB.addReg(RegArgs[II], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
+
+ // Set all unused physregs defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
// Attempt to fast-select a return instruction.
bool PPCFastISel::SelectRet(const Instruction *I) {
return true;
}
-// Attempt to fast-select a compare instruction.
-bool PPCFastISel::SelectCmp(const Instruction *I) {
- const CmpInst *CI = cast<CmpInst>(I);
- Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
- if (!OptPPCPred)
+// Attempt to fast-select an integer truncate instruction.
+bool PPCFastISel::SelectTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
return false;
- unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
- if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
- CondReg))
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
return false;
- UpdateValueMap(I, CondReg);
+ // The only interesting case is when we need to switch register classes.
+ if (SrcVT == MVT::i64) {
+ unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(SrcReg, 0, PPC::sub_32);
+ SrcReg = ResultReg;
+ }
+
+ UpdateValueMap(I, SrcReg);
return true;
}
return SelectBranch(I);
case Instruction::IndirectBr:
return SelectIndirectBr(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*IsSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*IsSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*IsSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*IsSigned*/ false);
case Instruction::Add:
return SelectBinaryIntOp(I, ISD::ADD);
case Instruction::Or:
return SelectBinaryIntOp(I, ISD::OR);
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::Call:
+ if (dyn_cast<IntrinsicInst>(I))
+ return false;
+ return SelectCall(I);
case Instruction::Ret:
return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
- .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
- .addReg(TmpReg)
- .addMemOperand(MMO);
+ // But for large code model, we must generate a LDtocL followed
+ // by the LF[SD].
+ if (CModel == CodeModel::Large) {
+ unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg2);
+ } else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
+ .addReg(TmpReg)
+ .addMemOperand(MMO);
}
return DestReg;
// If GV is an alias, use the aliasee for determining thread-locality.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
- assert((GVar || isa<Function>(GV)) && "Unexpected GV subclass!");
}
// FIXME: We don't yet handle the complexity of TLS.
return PPCMaterializeGV(GV, VT);
else if (isa<ConstantInt>(C))
return PPCMaterializeInt(C, VT);
- // TBD: Global values.
return 0;
}
// Materialize the address created by an alloca into a register, and
-// return the register number (or zero if we failed to handle it). TBD.
+// return the register number (or zero if we failed to handle it).
unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
- return AI && 0;
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(SI->second).addImm(0);
+ return ResultReg;
+ }
+
+ return 0;
}
// Fold loads into extends when possible.