//===----------------------------------------------------------------------===//
#include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
setPow2DivIsCheap();
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Legal);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+ for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+ MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
+ setTruncStoreAction(VT, InnerVT, Expand);
+ }
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ // Altivec does not contain unordered floating-point compare instructions
+ setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
- maxStoresPerMemset = 32;
- maxStoresPerMemsetOptSize = 16;
- maxStoresPerMemcpy = 32;
- maxStoresPerMemcpyOptSize = 8;
- maxStoresPerMemmove = 32;
- maxStoresPerMemmoveOptSize = 8;
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
}
}
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
case PPCISD::STD_32: return "PPCISD::STD_32";
- case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
- case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
- case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
- case PPCISD::NOP: return "PPCISD::NOP";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
- case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
- case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
+ case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
+ case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
+ case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+ case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
+ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
+ case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
+ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
+ case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+ case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
}
}
EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i32;
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
}
//===----------------------------------------------------------------------===//
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
SelectionDAG &DAG) const {
if (DisablePPCPreinc) return false;
+ bool isLoad = true;
SDValue Ptr;
EVT VT;
+ unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
-
+ Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
+ isLoad = false;
} else
return false;
if (VT.isVector())
return false;
- if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
AM = ISD::PRE_INC;
return true;
}
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
return false;
} else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
// reg + imm * 4.
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
return false;
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
- TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+ if (Model == TLSModel::LocalExec) {
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ }
+
+ if (!is64bit)
+ llvm_unreachable("only local-exec is currently supported for ppc32");
+
+ if (Model == TLSModel::InitialExec) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+ PtrVT, TGA, TPOffsetHi);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ }
+
+ if (Model == TLSModel::GeneralDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLS_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ }
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ if (Model == TLSModel::LocalDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLSLD_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+ Chain, ParmReg, TGA);
+ return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+ }
- if (model != TLSModel::LocalExec)
- llvm_unreachable("only local-exec TLS mode supported");
- SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
- SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
- return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ llvm_unreachable("Unknown TLS model!");
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
#include "PPCGenCallingConv.inc"
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
return false;
}
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
- CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
return Chain;
}
+// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+// value to MVT::i64 and then truncate to the correct register size.
+SDValue
+PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
+ SelectionDAG &DAG, SDValue ArgVal,
+ DebugLoc dl) const {
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+}
+
+// Set the size that is at least reserved in caller of this function. Tail
+// call optimized functions' reserved stack space needs to be aligned so that
+// taking the difference between two stack areas will result in an aligned
+// stack.
+void
+PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
+ unsigned nAltivecParamsAtEnd,
+ unsigned MinReservedArea,
+ bool isPPC64) const {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+ unsigned TargetAlign
+ = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+}
+
SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Chain,
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
- // All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize==1 || ObjSize==2) {
- CurArgOffset = CurArgOffset + (4 - ObjSize);
+ // Empty aggregate parameters do not take up registers. Examples:
+ // struct { } a;
+ // union { } b;
+ // int c[0];
+ // etc. However, we have to provide a place-holder in InVals, so
+ // pretend we have an 8-byte item at the current address for that
+ // purpose.
+ if (!ObjSize) {
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ continue;
}
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (ObjSize < PtrByteSize)
+ CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
// The value of the object is its address.
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
- if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+
+ if (ObjSize < 8) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- EVT ObjType = (ObjSize == 1 ? MVT::i8 :
- (ObjSize == 2 ? MVT::i16 : MVT::i32));
- SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg,
- CurArgOffset),
- ObjType, false, false, 0);
+ SDValue Store;
+
+ if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, CurArgOffset),
+ ObjType, false, false, 0);
+ } else {
+ // For sizes that don't fit a truncating store (3, 5, 6, 7),
+ // store the whole register as-is to the parameter save area
+ // slot. The address of the parameter was already calculated
+ // above (InVals.push_back(FIN)) to be the right-justified
+ // offset within the slot. For this store, we need a new
+ // frame index that points at the beginning of the slot.
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ }
+
MemOps.push_back(Store);
++GPR_idx;
}
-
+ // Whether we copied from a register or not, advance the offset
+ // into the parameter save area by a full doubleword.
ArgOffset += PtrByteSize;
-
continue;
}
+
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
// Store whatever pieces of the object are in registers
// to memory. ArgOffset will be the address of the beginning
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Shifted = Val;
-
- // For 64-bit SVR4, small structs come in right-adjusted.
- // Shift them left so the following logic works as expected.
- if (ObjSize < 8) {
- SDValue ShiftAmt = DAG.getConstant(64 - 8 * ObjSize, PtrVT);
- Shifted = DAG.getNode(ISD::SHL, dl, PtrVT, Val, ShiftAmt);
- }
-
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN,
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(FuncArg, ArgOffset),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
ArgOffset += PtrByteSize;
} else {
- ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ ArgOffset += ArgSize - j;
break;
}
}
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32) {
+ if (ObjectVT == MVT::i32)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
- if (Flags.isSExt())
- ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
- DAG.getValueType(ObjectVT));
- else if (Flags.isZExt())
- ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
- DAG.getValueType(ObjectVT));
-
- ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
- }
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
++GPR_idx;
} else {
++FPR_idx;
} else {
needsLoad = true;
+ ArgSize = PtrByteSize;
}
ArgOffset += 8;
}
// Set the size that is at least reserved in caller of this function. Tail
- // call optimized function's reserved stack space needs to be aligned so that
+ // call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- // Add the Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(true, true));
- unsigned TargetAlign
- = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
- FI->setMinReservedArea(MinReservedArea);
+ setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
int Depth = ArgOffset;
FuncInfo->setVarArgsFrameIndex(
- MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- Depth, true));
+ MFI->CreateFixedObject(PtrByteSize, Depth, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// If this function is vararg, store any remaining integer argument regs
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
- SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
}
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
+ // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
+ // When passing anonymous aggregates, this is currently not true.
+ // See LowerFormalArguments_64SVR4 for a fix.
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
else
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- EVT ObjType = (ObjSize == 1 ? MVT::i8 :
- (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(FuncArg,
CurArgOffset),
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32) {
+ if (ObjectVT == MVT::i32)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
- if (Flags.isSExt())
- ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
- DAG.getValueType(ObjectVT));
- else if (Flags.isZExt())
- ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
- DAG.getValueType(ObjectVT));
-
- ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
- }
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
++GPR_idx;
} else {
}
// Set the size that is at least reserved in caller of this function. Tail
- // call optimized function's reserved stack space needs to be aligned so that
+ // call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- // Add the Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
- FI->setMinReservedArea(MinReservedArea);
+ setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
- unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+ unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
// Thus for a call through a function pointer, the following actions need
// to be performed:
// 1. Save the TOC of the caller in the TOC save area of its stack
- // frame (this is done in LowerCall_Darwin_Or_64SVR4()).
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
// 2. Load the address of the function entry point from the function
// descriptor.
// 3. Load the TOC of the callee from the function descriptor into r2.
NodeTys.push_back(MVT::Other);
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
- CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+ CallOpc = PPCISD::BCTRL;
Callee.setNode(0);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
- EVT VT = VA.getValVT();
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyFromReg(Chain, dl,
- VA.getLocReg(), VT, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- InFlag = Chain.getValue(2);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
}
return Chain;
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
- // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
int BytesCalleePops =
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
// Emit tail call.
if (isTailCall) {
- // If this is the first return lowered for this function, add the regs
- // to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- if (CallOpc == PPCISD::BCTRL_SVR4) {
+ if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
// Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP_SVR4;
+ CallOpc = PPCISD::CALL_NOP;
}
}
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
- if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
- return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ }
- return LowerCall_Darwin_Or_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
}
SDValue
bool Result;
if (Outs[i].IsFixed) {
- Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
- CCInfo);
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
} else {
- Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo);
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
}
if (Result) {
}
} else {
// All arguments are treated the same.
- CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
}
// Assign locations to all of the outgoing aggregate by value arguments.
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
// Size of the linkage area, parameter list area and the part of the local
// space variable where copies of aggregates which are passed by value are
Ins, InVals);
}
+// Copy an argument into memory, being careful to do this outside the
+// call sequence for the call to which the argument belongs.
+SDValue
+PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart,
+ ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG,
+ DebugLoc dl) const {
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // The MEMCPY must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ return NewCallSeqStart;
+}
+
SDValue
-PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
+PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
-
- unsigned NumOps = Outs.size();
+ unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- bool isPPC64 = PtrVT == MVT::i64;
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
+ unsigned PtrByteSize = 8;
MachineFunction &MF = DAG.getMachineFunction();
unsigned nAltivecParamsAtEnd = 0;
// Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with 24/48 bytes, which is
- // prereserved space for [SP][CR][LR][3 x unused].
+ // area, and parameter passing area. We start with at least 48 bytes, which
+ // is reserved space for [SP][CR][LR][3 x unused].
+ // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
+ // of this call.
unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
- Outs, OutVals,
- nAltivecParamsAtEnd);
+ CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
+ Outs, OutVals, nAltivecParamsAtEnd);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
- SDValue StackPtr;
- if (isPPC64)
- StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
- else
- StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
// Figure out which arguments are going to go in registers, and which in
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const uint16_t GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- static const uint16_t GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
- const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
-
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- // On PPC64, promote integers to 64-bit values.
- if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // Promote integers to 64-bit values.
+ if (Arg.getValueType() == MVT::i32) {
// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
// struct x { short a; char b; }
// will have Size = 4. With #pragma pack(1), it will have Size = 3.
// These are the proper values we need for right-justifying the
- // aggregate in a parameter register for 64-bit SVR4.
+ // aggregate in a parameter register.
unsigned Size = Flags.getByValSize();
- // FOR DARWIN ONLY: Very small objects are passed right-justified.
- // Everything else is passed left-justified.
- // FOR 64-BIT SVR4: All aggregates smaller than 8 bytes must
- // be passed right-justified.
- if (Size==1 || Size==2 ||
- (Size==4 && isSVR4ABI)) {
+
+ // An empty aggregate parameter takes up no storage and no
+ // registers.
+ if (Size == 0)
+ continue;
+
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
- } else {
- SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
- SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
- CallSeqStart.getNode()->getOperand(0),
- Flags, DAG, dl);
- // This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
- DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
- NewCallSeqStart.getNode());
- Chain = CallSeqStart = NewCallSeqStart;
- ArgOffset += PtrByteSize;
+ continue;
}
+ }
+
+ if (GPR_idx == NumGPRs && Size < 8) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
continue;
}
// Copy entire object into memory. There are cases where gcc-generated
// registers. (This is not what the doc says.)
// FIXME: The above statement is likely due to a misunderstanding of the
- // documents. At least for 64-bit SVR4, all arguments must be copied
- // into the parameter area BY THE CALLEE in the event that the callee
- // takes the address of any formal argument. That has not yet been
- // implemented. However, it is reasonable to use the stack area as a
- // staging area for the register load.
-
- // Skip this for small aggregates under 64-bit SVR4, as we will use
- // the same slot for a right-justified copy, below.
- if (Size >= 8 || !isSVR4ABI) {
- SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
- CallSeqStart.getNode()->getOperand(0),
- Flags, DAG, dl);
- // This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
- DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
- NewCallSeqStart.getNode());
- Chain = CallSeqStart = NewCallSeqStart;
- }
-
- // FOR 64-BIT SVR4: When a register is available, pass the
- // aggregate right-justified.
- if (isSVR4ABI && Size < 8 && GPR_idx != NumGPRs) {
+ // documents. All arguments must be copied into the parameter area BY
+ // THE CALLEE in the event that the callee takes the address of any
+ // formal argument. That has not yet been implemented. However, it is
+ // reasonable to use the stack area as a staging area for the register
+ // load.
+
+ // Skip this for small aggregates, as we will use the same slot for a
+ // right-justified copy, below.
+ if (Size >= 8)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // When a register is available, pass a small aggregate right-justified.
+ if (Size < 8 && GPR_idx != NumGPRs) {
// The easiest way to get this right-justified in a register
// is to copy the structure into the rightmost portion of a
// local variable slot, then load the whole slot into the
// parameter save area instead of a new local variable.
SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
- SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
- CallSeqStart.getNode()->getOperand(0),
- Flags, DAG, dl);
-
- // Place the memcpy outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
- DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
- NewCallSeqStart.getNode());
- Chain = CallSeqStart = NewCallSeqStart;
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
// Load the slot into the register.
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
continue;
}
- // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
- // copy the pieces of the object that fit into registers from the
- // parameter save area.
+ // For aggregates larger than PtrByteSize, copy the pieces of the
+ // object that fit into registers from the parameter save area.
for (unsigned j=0; j<Size; j+=PtrByteSize) {
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, false, MemOpChains,
+ true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
ArgOffset += PtrByteSize;
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
if (isVarArg) {
- SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ // A single float or an aggregate containing only a single float
+ // must be passed right-justified in the stack doubleword, and
+ // in the GPR, if one is available.
+ SDValue StoreOff;
+ if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ } else
+ StoreOff = PtrOff;
+
+ SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
- SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
- MachinePointerInfo(),
- false, false, false, 0);
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- } else {
+ } else if (GPR_idx != NumGPRs)
// If we have any FPRs remaining, we may also have GPRs remaining.
- // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
- // GPRs.
- if (GPR_idx != NumGPRs)
- ++GPR_idx;
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
- !isPPC64) // PPC64 has 64-bit GPR's obviously :)
- ++GPR_idx;
- }
+ ++GPR_idx;
} else {
+ // Single-precision floating-point values are mapped to the
+ // second (rightmost) word of the stack doubleword.
+ if (Arg.getValueType() == MVT::f32) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ }
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, false, MemOpChains,
+ true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
- if (isPPC64)
- ArgOffset += 8;
- else
- ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ ArgOffset += 8;
break;
case MVT::v4f32:
case MVT::v4i32:
if (VR_idx != NumVRs) {
// Doesn't have GPR space allocated.
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
- } else if (nAltivecParamsAtEnd==0) {
- // We are emitting Altivec params in order.
+ } else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, true, MemOpChains,
+ true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
break;
}
}
- // If all Altivec parameters fit in registers, as they usually do,
- // they get stack space following the non-Altivec parameters. We
- // don't track this here because nobody below needs it.
- // If there are more Altivec parameters than fit in registers emit
- // the stores here.
- if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
- unsigned j = 0;
- // Offset is aligned; skip 1st 12 params which go in V registers.
- ArgOffset = ((ArgOffset+15)/16)*16;
- ArgOffset += 12*16;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- EVT ArgType = Outs[i].VT;
- if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
- ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
- if (++j > NumVRs) {
- SDValue PtrOff;
- // We are emitting Altivec params in order.
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, true, MemOpChains,
- TailCallArguments, dl);
- ArgOffset += 16;
- }
- }
- }
- }
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
- if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
+ if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
!dyn_cast<ExternalSymbolSDNode>(Callee) &&
!isBLACompatibleAddress(Callee, DAG)) {
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
+ // R12 must contain the address of an indirect callee. This does not
+ // mean the MTCTR instruction must use R12; it's easier to model this
+ // as an extra parameter, so do that.
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
- // On Darwin, R12 must contain the address of an indirect callee. This does
- // not mean the MTCTR instruction must use R12; it's easier to model this as
- // an extra parameter, so do that.
- if (!isTailCall &&
- !dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG))
- RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
- PPC::R12), Callee));
-
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
}
if (isTailCall)
- PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+ PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
Ins, InVals);
}
-bool
-PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
- RVLocs, Context);
- return CCInfo.CheckReturn(Outs, RetCC_PPC);
-}
-
SDValue
-PPCTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
+ Outs, OutVals,
+ nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const uint16_t GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const uint16_t GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ // Very small objects are passed right-justified. Everything else is
+ // passed left-justified.
+ if (Size==1 || Size==2) {
+ EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+ // copy the pieces of the object that fit into registers from the
+ // parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ } else
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // On Darwin, R12 must contain the address of an indirect callee. This does
+ // not mean the MTCTR instruction must use R12; it's easier to model this as
+ // an extra parameter, so do that.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG))
+ RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+ PPC::R12), Callee));
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- OutVals[i], Flag);
+
+ SDValue Arg = OutVals[i];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
if (Op.getOperand(0).getValueType() == MVT::i64) {
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue SINT = Op.getOperand(0);
+ // When converting to single-precision, we actually need to convert
+ // to double-precision first and then round to single-precision.
+ // To avoid double-rounding effects during that operation, we have
+ // to prepare the input operand. Bits that might be truncated when
+ // converting to double-precision are replaced by a bit that won't
+ // be lost at this stage, but is below the single-precision rounding
+ // position.
+ //
+ // However, if -enable-unsafe-fp-math is in effect, accept double
+ // rounding to avoid the extra overhead.
+ if (Op.getValueType() == MVT::f32 &&
+ !DAG.getTarget().Options.UnsafeFPMath) {
+
+ // Twiddle input to make sure the low 11 bits are zero. (If this
+ // is the case, we are guaranteed the value will fit into the 53 bit
+ // mantissa of an IEEE double-precision value without rounding.)
+ // If any of those low 11 bits were not zero originally, make sure
+ // bit 12 (value 2048) is set instead, so that the final rounding
+ // to single-precision gets the correct result.
+ SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ SINT, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Round, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+ Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ Round, DAG.getConstant(-2048, MVT::i64));
+
+ // However, we cannot use that value unconditionally: if the magnitude
+ // of the input value is small, the bit-twiddling we did above might
+ // end up visibly changing the output. Fortunately, in that case, we
+ // don't need to twiddle bits since the original input will convert
+ // exactly to double-precision floating-point already. Therefore,
+ // construct a conditional to use the original value if the top 11
+ // bits are all sign-bit copies, and use the rounded value computed
+ // above otherwise.
+ SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+ SINT, DAG.getConstant(53, MVT::i32));
+ Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Cond, DAG.getConstant(1, MVT::i64));
+ Cond = DAG.getSetCC(dl, MVT::i32,
+ Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+
+ SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+ }
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
if (Op.getValueType() == MVT::f32)
FP = DAG.getNode(ISD::FP_ROUND, dl,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- std::vector<EVT> NodeTys;
SDValue MFFSreg, InFlag;
// Save FP Control Word to register
- NodeTys.push_back(MVT::f64); // return register
- NodeTys.push_back(MVT::Glue); // unused in this context
+ EVT NodeTys[] = {
+ MVT::f64, // return register
+ MVT::Glue // unused in this context
+ };
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
// Save FP register to stack slot
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
- // tmp = VSPLTI[bhw], result = add tmp, tmp
- if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
- SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
- Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ EVT VT = Op.getValueType();
+ int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+ SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+ return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
}
}
- // Three instruction sequences.
-
- // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
- if (SextVal >= 0 && SextVal <= 31) {
- SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
- // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
- if (SextVal >= -31 && SextVal <= 0) {
- SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
-
return SDValue();
}
Op.getOperand(3), // RHS
DAG.getConstant(CompareOpc, MVT::i32)
};
- std::vector<EVT> VTs;
- VTs.push_back(Op.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Now that we have the comparison, emit a copy from the CR to a GPR.
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
return BB;
}
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
// Create the PPCISD altivec 'dot' comparison node.
- std::vector<EVT> VTs;
SDValue Ops[] = {
LHS.getOperand(2), // LHS of compare
LHS.getOperand(3), // RHS of compare
DAG.getConstant(CompareOpc, MVT::i32)
};
- VTs.push_back(LHS.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Unpack the result based on how the target uses it.
case 'v':
case 'y':
return C_RegisterClass;
+ case 'Z':
+ // FIXME: While Z does indicate a memory constraint, it specifically
+ // indicates an r+r address (used in conjunction with the 'y' modifier
+ // in the replacement string). Currently, we're forcing the base
+ // register to be r0 in the asm printer (which is interpreted as zero)
+ // and forming the complete address in the second register. This is
+ // suboptimal.
+ return C_Memory;
}
}
return TargetLowering::getConstraintType(Constraint);
case 'y':
weight = CW_Register;
break;
+ case 'Z':
+ weight = CW_Memory;
+ break;
}
return weight;
}
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &PPC::F4RCRegClass);
- if (VT == MVT::f64)
+ if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
break;
case 'v':
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects()) &&
- MFI->getStackSize() &&
- !MF.getFunction()->getFnAttributes().hasNakedAttr();
- unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
- (is31 ? PPC::R31 : PPC::R1);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (this->PPCSubTarget.isPPC64()) {
}
}
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd