#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
- PPCRegInfo = TM.getRegisterInfo();
- PPCII = TM.getInstrInfo();
setPow2DivIsCheap();
Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
if (Subtarget->hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
- // frin does not implement "ties to even." Thus, this is safe only in
- // fast-math mode.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
- // These need to set FE_INEXACT, and use a custom inserter.
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- }
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, CTPOP or CTTZ
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
-
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ if (Subtarget->isSVR4ABI() && !isPPC64)
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
// Use the default implementation.
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {
setInsertFencesForAtomic(true);
- setSchedulingPreference(Sched::Hybrid);
+ if (Subtarget->enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
}
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
- const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ if (PPCSubTarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
- if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- if (VTy->getBitWidth() >= 128)
- return 16;
-
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- if (PPCSubTarget.isPPC64())
- return 8;
-
- return 4;
+ unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+ if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+ getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
- case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
return false;
}
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+ // FIXME: This does not handle the LWA case.
+ if (VT != MVT::i64)
+ return;
+
+ // NOTE: We'll exclude negative FIs here, which come from argument
+ // lowering, because there are no known test cases triggering this problem
+ // using packed structures (or similar). We can remove this exclusion if
+ // we find such a test case. The reason why this is so test-case driven is
+ // because this entire 'fixup' is only to prevent crashes (from the
+ // register scavenger) on not-really-valid inputs. For example, if we have:
+ // %a = alloca i1
+ // %b = bitcast i1* %a to i64*
+ // store i64* a, i64 b
+ // then the store should really be marked as 'align 1', but is not. If it
+ // were marked as 'align 1' then the indexed form would have been
+ // instruction-selected initially, and the problem this 'fixup' is preventing
+ // won't happen regardless.
+ if (FrameIdx < 0)
+ return;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ if (Align >= 4)
+ return;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasNonRISpills();
+}
+
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If Aligned is true, only accept displacements
Disp = DAG.getTargetConstant(imm, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
}
Disp = DAG.getTargetConstant(0, getPointerTy());
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else
Base = N;
return true; // [r+0]
}
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
unsigned &LoOpFlags, const GlobalValue *GV = 0) {
- HiOpFlags = PPCII::MO_HA16;
- LoOpFlags = PPCII::MO_LO16;
+ HiOpFlags = PPCII::MO_HA;
+ LoOpFlags = PPCII::MO_LO;
// Don't use the pic base if not in PIC relocation model. Or if we are on a
// non-darwin platform. We don't support PIC on other platforms yet.
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
+ PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ PPCII::MO_TPREL_LO);
SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
PtrVT, TGA, TPOffsetHi);
- return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
false, false, false, 0);
}
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+ // We have to copy the entire va_list struct:
+ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+ return DAG.getMemcpy(Op.getOperand(0), Op,
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, MVT::i32), 8, false, true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
return Op.getOperand(0);
#include "PPCGenCallingConv.inc"
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
return false;
}
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
InVals.push_back(FIN);
continue;
}
+
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ CurArgOffset = ArgOffset;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
static void
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue Chain,
- const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
- SmallVector<SDValue, 8> &MemOpChains,
+ const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+ SmallVectorImpl<SDValue> &MemOpChains,
SDLoc dl) {
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
SDValue Arg = TailCallArgs[i].Arg;
static void
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SDValue Arg, int SPDiff, unsigned ArgOffset,
- SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
SDValue Arg, SDValue PtrOff, int SPDiff,
unsigned ArgOffset, bool isPPC64, bool isTailCall,
- bool isVector, SmallVector<SDValue, 8> &MemOpChains,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+ bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
SDLoc dl) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
if (!isTailCall) {
void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
SDValue LROp, SDValue FPOp, bool isDarwinABI,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
MachineFunction &MF = DAG.getMachineFunction();
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
// Emit callseq_end just before tailcall node.
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
- SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &PPCSubTarget) {
bool isPPC64 = PPCSubTarget.isPPC64();
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
- InFlag);
+ InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- SDLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be moved somewhere else
// This must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
Chain = CallSeqStart = NewCallSeqStart;
Flags, DAG, dl);
// The MEMCPY must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
return NewCallSeqStart;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
if (Size == 0)
continue;
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
SDValue Tmp;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
- SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32),
CompNode.getValue(1));
case ISD::VAARG:
return LowerVAARG(Op, DAG, PPCSubTarget);
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG, PPCSubTarget);
+
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
return;
}
case ISD::FP_TO_SINT:
+ // LowerFP_TO_INT() can only handle f32 and f64.
+ if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
}
// thisMBB:
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
.addReg(PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
-
MIB.setMemRefs(MMOBegin, MMOEnd);
}
+ // Naked functions never have a base pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned BaseReg;
+ if (MF->getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
- MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+ const PPCRegisterInfo *TRI =
+ static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+ MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
unsigned BufReg = MI->getOperand(0).getReg();
}
MIB.setMemRefs(MMOBegin, MMOEnd);
- // FIXME: When we also support base pointers, that register must also be
- // restored here.
+ // Reload BP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
- MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+ Cond, MI->getOperand(2).getReg(),
+ MI->getOperand(3).getReg());
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
- } else if (MI->getOpcode() == PPC::FRINDrint ||
- MI->getOpcode() == PPC::FRINSrint) {
- bool isf32 = MI->getOpcode() == PPC::FRINSrint;
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- DebugLoc dl = MI->getDebugLoc();
-
- MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
-
- // Perform the rounding.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
- .addReg(Src);
-
- // Compare the results.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
- .addReg(Dest).addReg(Src);
-
- // If the results were not equal, then set the FPSCR XX bit.
- MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, midMBB);
- F->insert(It, exitMBB);
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
-
- BB->addSuccessor(midMBB);
- BB->addSuccessor(exitMBB);
-
- BB = midMBB;
-
- // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
- // the FI bit here because that will not automatically set XX also,
- // and XX is what libm interprets as the FE_INEXACT flag.
- BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
- BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
- BB->addSuccessor(exitMBB);
-
- BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
return SDValue();
}
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ EVT VT = LS->getMemoryVT();
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LS->getBasePtr();
+ SDValue BaseLoc = Base->getBasePtr();
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X+C
+ if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const GlobalValue *GV1 = NULL;
+ const GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+ SDValue Chain = LD->getChain();
+ EVT VT = LD->getMemoryVT();
+
+ SmallSet<SDNode *, 16> LoadRoots;
+ SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+ SmallSet<SDNode *, 16> Visited;
+
+ // First, search up the chain, branching to follow all token-factor operands.
+ // If we find a consecutive load, then we're done, otherwise, record all
+ // nodes just above the top-level loads and token factors.
+ while (!Queue.empty()) {
+ SDNode *ChainNext = Queue.pop_back_val();
+ if (!Visited.insert(ChainNext))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ if (!Visited.count(ChainLD->getChain().getNode()))
+ Queue.push_back(ChainLD->getChain().getNode());
+ } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+ for (SDNode::op_iterator O = ChainNext->op_begin(),
+ OE = ChainNext->op_end(); O != OE; ++O)
+ if (!Visited.count(O->getNode()))
+ Queue.push_back(O->getNode());
+ } else
+ LoadRoots.insert(ChainNext);
+ }
+
+ // Second, search down the chain, starting from the top-level nodes recorded
+ // in the first phase. These top-level nodes are the nodes just above all
+ // loads and token factors. Starting with their uses, recursively look though
+ // all loads (just the chain uses) and token factors to find a consecutive
+ // load.
+ Visited.clear();
+ Queue.clear();
+
+ for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+ IE = LoadRoots.end(); I != IE; ++I) {
+ Queue.push_back(*I);
+
+ while (!Queue.empty()) {
+ SDNode *LoadRoot = Queue.pop_back_val();
+ if (!Visited.insert(LoadRoot))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+ UE = LoadRoot->use_end(); UI != UE; ++UI)
+ if (((isa<LoadSDNode>(*UI) &&
+ cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+ Queue.push_back(*UI);
+ }
+ }
+
+ return false;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0)
+ if (RV.getNode() != 0) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
+
+ EVT VT = RV.getValueType();
+
+ SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+ Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+ }
+
+ SDValue ZeroCmp =
+ DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ DCI.AddToWorklist(ZeroCmp.getNode());
+ DCI.AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+ ZeroCmp, Zero, RV);
return RV;
+ }
}
}
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
- DCI.getDAGCombineLevel() == AfterLegalizeTypes &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
// require the next load to appear to be aligned, even though it
// is actually offset from the base pointer by a lesser amount.
int IncOffset = VT.getSizeInBits() / 8;
- int IncValue = IncOffset - 1;
+ int IncValue = IncOffset;
+
+ // Walk (both up and down) the chain looking for another load at the real
+ // (aligned) offset (the alignment of the other load does not matter in
+ // this case). If found, then do not use the offset reduction trick, as
+ // that will prevent the loads from being later combined (as they would
+ // otherwise be duplicates).
+ if (!findConsecutiveLoad(LD, DAG))
+ --IncValue;
+
SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- // FIXME: We might have another load (with a slightly-different
- // real offset) that we can reuse here.
SDValue ExtraLoad =
DAG.getLoad(VT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncOffset),
}
}
}
+
+ break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
}
}
- // If the user is a MFCR instruction, we know this is safe. Otherwise we
- // give up for right now.
- if (FlagUser->getOpcode() == PPCISD::MFCR)
+ // If the user is a MFOCRF instruction, we know this is safe.
+ // Otherwise we give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFOCRF)
return SDValue(VCMPoNode, 0);
}
break;
}
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
- // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
// lowering is done pre-legalize, because the legalizer lowers the predicate
// compare down to code that is difficult to reassemble.
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+ // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+ // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+ // register.
+ // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+ // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+ if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ PPC::GPRCRegClass.contains(R.first)) {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ return std::make_pair(TRI->getMatchingSuperReg(R.first,
+ PPC::sub_32, &PPC::G8RCRegClass),
+ &PPC::G8RCRegClass);
+ }
+
+ return R;
}
return true;
}
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
- case MVT::v4f32:
return true;
default:
break;
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref)
+ if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
}
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const {
+ return PPC::createFastISel(FuncInfo, LibInfo);
+}