#include "PPCPerfectShuffle.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ParameterAttributes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
cl::Hidden);
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
+ : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()),
+ PPCAtomicLabelIndex(0) {
setPow2DivIsCheap();
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
// PowerPC has no intrinsics for these particular operations
- setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
- setOperationAction(ISD::MEMSET, MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+ setOperationAction(ISD::ATOMIC_LAS , MVT::i32 , Custom);
+ setOperationAction(ISD::ATOMIC_LCS , MVT::i32 , Custom);
+ setOperationAction(ISD::ATOMIC_SWAP , MVT::i32 , Custom);
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ setOperationAction(ISD::ATOMIC_LAS , MVT::i64 , Custom);
+ setOperationAction(ISD::ATOMIC_LCS , MVT::i64 , Custom);
+ setOperationAction(ISD::ATOMIC_SWAP , MVT::i64 , Custom);
+ }
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
} else {
// 32-bit PowerPC wants to expand i64 shifts itself.
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
// add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
- setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
+ setOperationAction(ISD::ADD , VT, Legal);
+ setOperationAction(ISD::SUB , VT, Legal);
// We promote all shuffles to v16i8.
- setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
// We promote all non-typed operations to v4i32.
- setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32);
- setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32);
- setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32);
- setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32);
- setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);
- setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
// No other operations are legal.
- setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
- setSetCCResultType(MVT::i32);
setShiftAmountType(MVT::i32);
setSetCCResultContents(ZeroOrOneSetCCResult);
computeRegisterProperties();
}
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ TargetMachine &TM = getTargetMachine();
+ // Darwin passes everything on 4 byte boundary.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ return 4;
+ // FIXME Elf TBD
+ return 4;
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::CMP_UNRESERVE: return "PPCISD::CMP_UNRESERVE";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::MFFS: return "PPCISD::MFFS";
case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
+ case PPCISD::TAILCALL: return "PPCISD::TAILCALL";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
}
}
+
+MVT PPCTargetLowering::getSetCCResultType(const SDOperand &) const {
+ return MVT::i32;
+}
+
+
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
uint64_t Value = 0;
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
Value = CN->getValue();
- ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
+ ValSizeInBytes = CN->getValueType(0).getSizeInBits()/8;
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
Value = FloatToBits(CN->getValueAPF().convertToFloat());
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
// disjoint.
- uint64_t LHSKnownZero, LHSKnownOne;
- uint64_t RHSKnownZero, RHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
- if (LHSKnownZero) {
- DAG.ComputeMaskedBits(N.getOperand(1), ~0U, RHSKnownZero, RHSKnownOne);
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N.getOperand(1),
+ APInt::getAllOnesValue(N.getOperand(1)
+ .getValueSizeInBits()),
+ RHSKnownZero, RHSKnownOne);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
- if ((LHSKnownZero | RHSKnownZero) == ~0U) {
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
- uint64_t LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
- if ((LHSKnownZero|~(unsigned)imm) == ~0U) {
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
Base = N.getOperand(0);
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
- uint64_t LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
- if ((LHSKnownZero|~(unsigned)imm) == ~0U) {
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
Base = N.getOperand(0);
if (!EnablePPCPreinc) return false;
SDOperand Ptr;
- MVT::ValueType VT;
+ MVT VT;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
return false;
// PowerPC doesn't have preinc load/store instructions for vectors.
- if (MVT::isVector(VT))
+ if (VT.isVector())
return false;
// TODO: Check reg+reg first.
// LowerOperation implementation
//===----------------------------------------------------------------------===//
-static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType PtrVT = Op.getValueType();
+SDOperand PPCTargetLowering::LowerConstantPool(SDOperand Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
Constant *C = CP->getConstVal();
SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
return Lo;
}
-static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType PtrVT = Op.getValueType();
+SDOperand PPCTargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
SDOperand Zero = DAG.getConstant(0, PtrVT);
return Lo;
}
-static SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerGlobalTLSAddress(SDOperand Op,
+ SelectionDAG &DAG) {
assert(0 && "TLS not implemented for PPC.");
+ return SDOperand(); // Not reached
}
-static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType PtrVT = Op.getValueType();
+SDOperand PPCTargetLowering::LowerGlobalAddress(SDOperand Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
GlobalValue *GV = GSDN->getGlobal();
SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0);
}
-static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If we're comparing for equality to zero, expose the fact that this is
// fold the new nodes.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
if (C->isNullValue() && CC == ISD::SETEQ) {
- MVT::ValueType VT = Op.getOperand(0).getValueType();
+ MVT VT = Op.getOperand(0).getValueType();
SDOperand Zext = Op.getOperand(0);
- if (VT < MVT::i32) {
+ if (VT.bitsLT(MVT::i32)) {
VT = MVT::i32;
Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
}
- unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
DAG.getConstant(Log2b, MVT::i32));
// condition register, reading it back out, and masking the correct bit. The
// normal approach here uses sub to do this instead of xor. Using xor exposes
// the result to other bit-twiddling opportunities.
- MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
- if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- MVT::ValueType VT = Op.getValueType();
+ MVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ MVT VT = Op.getValueType();
SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0),
Op.getOperand(1));
return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
return SDOperand();
}
-static SDOperand LowerVAARG(SDOperand Op, SelectionDAG &DAG,
+SDOperand PPCTargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG,
int VarArgsFrameIndex,
int VarArgsStackOffset,
unsigned VarArgsNumGPR,
const PPCSubtarget &Subtarget) {
assert(0 && "VAARG in ELF32 ABI not implemented yet!");
+ return SDOperand(); // Not reached
}
-static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
+SDOperand PPCTargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG,
int VarArgsFrameIndex,
int VarArgsStackOffset,
unsigned VarArgsNumGPR,
if (Subtarget.isMachoABI()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0);
SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDOperand StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
- uint64_t FrameOffset = MVT::getSizeInBits(PtrVT)/8;
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
SDOperand ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
- uint64_t StackOffset = MVT::getSizeInBits(PtrVT)/8 - 1;
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
SDOperand ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
uint64_t FPROffset = 1;
return FPR;
}
-static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
- int &VarArgsFrameIndex,
- int &VarArgsStackOffset,
- unsigned &VarArgsNumGPR,
- unsigned &VarArgsNumFPR,
- const PPCSubtarget &Subtarget) {
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(SDOperand Arg, SDOperand Flag,
+ bool isVarArg, unsigned PtrByteSize) {
+ MVT ArgVT = Arg.getValueType();
+ ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Flag)->getArgFlags();
+ unsigned ArgSize =ArgVT.getSizeInBits()/8;
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+SDOperand
+PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
+ SelectionDAG &DAG,
+ int &VarArgsFrameIndex,
+ int &VarArgsStackOffset,
+ unsigned &VarArgsNumGPR,
+ unsigned &VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
// TODO: add description of PPC stack frame format, or at least some docs.
//
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SmallVector<SDOperand, 8> ArgValues;
SDOperand Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
bool isMachoABI = Subtarget.isMachoABI();
bool isELF32_ABI = Subtarget.isELF32_ABI();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
-
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
static const unsigned GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying
+ // to handle Elf here.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e;
+ ++ArgNo) {
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += isPPC64 ? 8 : 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
// represented with two words (long long or double) must be copied to an
// even GPR_idx value or to an even ArgOffset value.
+ SmallVector<SDOperand, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
SDOperand ArgVal;
bool needsLoad = false;
- MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
- unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(ArgNo+3))->getValue();
- unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs;
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
// See if next argument requires stack alignment in ELF
- bool Expand = (ObjectVT == MVT::f64) || ((ArgNo + 1 < e) &&
- (cast<ConstantSDNode>(Op.getOperand(ArgNo+4))->getValue() & AlignFlag) &&
- (!(Flags & AlignFlag)));
+ bool Align = Flags.isSplit();
unsigned CurArgOffset = ArgOffset;
- switch (ObjectVT) {
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Op.getOperand(ArgNo+3),
+ isVarArg,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Op.getOperand(ArgNo+3),
+ isVarArg,
+ PtrByteSize);
+
+ // FIXME alignment for ELF may not be right
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Double word align in ELF
+ if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
+ SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgValues.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
+ SDOperand Store = DAG.getTruncStore(Val.getValue(1), Val, FIN,
+ NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ if (isMachoABI) ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgVal will be address of the beginning of
+ // the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
+ SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ if (isMachoABI) ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT()) {
default: assert(0 && "Unhandled argument type!");
case MVT::i32:
- // Double word align in ELF
- if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2);
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
- RegInfo.addLiveIn(GPR[GPR_idx], VReg);
- ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
- ++GPR_idx;
- } else {
- needsLoad = true;
- ArgSize = PtrByteSize;
+ if (!isPPC64) {
+ // Double word align in ELF
+ if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
+
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // Stack align in ELF
+ if (needsLoad && Align && isELF32_ABI)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ // All int arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
+ break;
}
- // Stack align in ELF
- if (needsLoad && Expand && isELF32_ABI)
- ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
- // All int arguments reserve stack space in Macho ABI.
- if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
- break;
-
+ // FALLTHROUGH
case MVT::i64: // PPC64
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
RegInfo.addLiveIn(GPR[GPR_idx], VReg);
ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32) {
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ ArgVal = DAG.getNode(ISD::TRUNCATE, MVT::i32, ArgVal);
+ }
+
++GPR_idx;
} else {
needsLoad = true;
}
// Stack align in ELF
- if (needsLoad && Expand && isELF32_ABI)
+ if (needsLoad && Align && isELF32_ABI)
ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
// All FP arguments reserve stack space in Macho ABI.
if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- // Note that vector arguments in registers don't reserve stack space.
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);
RegInfo.addLiveIn(VR[VR_idx], VReg);
ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);
+ }
++VR_idx;
} else {
- // This should be simple, but requires getting 16-byte aligned stack
- // values.
- assert(0 && "Loading VR argument not implemented yet!");
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
needsLoad = true;
}
break;
// that we ran out of physical registers of the appropriate type.
if (needsLoad) {
int FI = MFI->CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize));
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
}
ArgValues.push_back(ArgVal);
}
-
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
if (isVarArg) {
int depth;
// Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
// pointer.
- depth = -(Num_GPR_Regs * MVT::getSizeInBits(PtrVT)/8 +
- Num_FPR_Regs * MVT::getSizeInBits(MVT::f64)/8 +
- MVT::getSizeInBits(PtrVT)/8);
+ depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 +
+ Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 +
+ PtrVT.getSizeInBits()/8);
- VarArgsStackOffset = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
+ VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
ArgOffset);
}
else
depth = ArgOffset;
- VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
+ VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
depth);
SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
- SmallVector<SDOperand, 8> MemOps;
-
// In ELF 32 ABI, the fixed integer arguments of a variadic function are
// stored to the VarArgsFrameIndex on the stack.
if (isELF32_ABI) {
SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
- SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
+ SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
}
}
SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
- SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
+ SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
}
SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
- SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,
+ SDOperand PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
PtrVT);
FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
}
SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
- SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,
+ SDOperand PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
PtrVT);
FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
}
}
-
- if (!MemOps.empty())
- Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
}
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
+
ArgValues.push_back(Root);
// Return the new list of results.
- std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
+ std::vector<MVT> RetVT(Op.Val->value_begin(),
Op.Val->value_end());
return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
}
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+ bool isPPC64,
+ bool isMachoABI,
+ bool isVarArg,
+ unsigned CC,
+ SDOperand Call,
+ unsigned &nAltivecParamsAtEnd) {
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ unsigned NumOps = (Call.getNumOperands() - 5) / 2;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDOperand Arg = Call.getOperand(5+2*i);
+ SDOperand Flag = Call.getOperand(5+2*i+1);
+ MVT ArgVT = Arg.getValueType();
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(Arg, Flag, isVarArg, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+
+ // Tail call needs the stack to be aligned.
+ if (CC==CallingConv::Fast && PerformTailCallOpt) {
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ }
+
+ return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accomodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
+ unsigned ParamSize) {
+
+ if (!IsTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
+ SDOperand Ret,
+ SelectionDAG& DAG) const {
+ // Variable argument functions are not supported.
+ if (!PerformTailCallOpt ||
+ cast<ConstantSDNode>(Call.getOperand(2))->getValue() != 0) return false;
+
+ if (CheckTailCallReturnConstraints(Call, Ret)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != ((Call.getNumOperands()-5)/2); i++) {
+ ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Call.getOperand(5+2*i+1))
+ ->getArgFlags();
+ if (Flags.isByVal()) return false;
+ }
+
+ SDOperand Callee = Call.getOperand(4);
+ // Non PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
/// isCallCompatibleAddress - Return the immediate to use if the specified
/// 32-bit value is representable in the immediate field of a BxA instruction.
static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
DAG.getTargetLoweringInfo().getPointerTy()).Val;
}
+namespace {
+
+struct TailCallArgumentInfo {
+ SDOperand Arg;
+ SDOperand FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDOperand Chain,
+ const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+ SmallVector<SDOperand, 8> &MemOpChains) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDOperand Arg = TailCallArgs[i].Arg;
+ SDOperand FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN,
+ PseudoSourceValue::getFixedStack(),
+ FI));
+ }
+}
-static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG,
- const PPCSubtarget &Subtarget) {
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDOperand EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDOperand Chain,
+ SDOperand OldRetAddr,
+ SDOperand OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isMachoABI) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
+ isMachoABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc);
+ int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
+ isMachoABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+
+ MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(), NewRetAddr);
+ SDOperand NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx,
+ PseudoSourceValue::getFixedStack(), NewFPIdx);
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDOperand Arg, int SPDiff, unsigned ArgOffset,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDOperand FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDOperand PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDOperand Chain,
+ SDOperand &LROpOut,
+ SDOperand &FPOpOut) {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0);
+ Chain = SDOperand(LROpOut.Val, 1);
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0);
+ Chain = SDOperand(FPOpOut.Val, 1);
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDOperand
+CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ unsigned Size) {
+ SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false,
+ NULL, 0, NULL, 0);
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDOperand Chain,
+ SDOperand Arg, SDOperand PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVector<SDOperand, 8> &MemOpChains,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDOperand StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget,
+ TargetMachine &TM) {
SDOperand Chain = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 &&
+ CC == CallingConv::Fast && PerformTailCallOpt;
SDOperand Callee = Op.getOperand(4);
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
bool isMachoABI = Subtarget.isMachoABI();
bool isELF32_ABI = Subtarget.isELF32_ABI();
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
unsigned PtrByteSize = isPPC64 ? 8 : 4;
+ MachineFunction &MF = DAG.getMachineFunction();
+
// args_to_use will accumulate outgoing args for the PPCISD::CALL case in
// SelectExpr to use to put the arguments in the appropriate registers.
std::vector<SDOperand> args_to_use;
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (PerformTailCallOpt && CC==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
-
- // Add up all the space actually used.
- for (unsigned i = 0; i != NumOps; ++i) {
- unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;
- ArgSize = std::max(ArgSize, PtrByteSize);
- NumBytes += ArgSize;
- }
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
+ Op, nAltivecParamsAtEnd);
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes,
- PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain,
DAG.getConstant(NumBytes, PtrVT));
+ SDOperand CallSeqStart = Chain;
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDOperand LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp);
+
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
SmallVector<SDOperand, 8> MemOpChains;
for (unsigned i = 0; i != NumOps; ++i) {
bool inMem = false;
SDOperand Arg = Op.getOperand(5+2*i);
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
- unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs;
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags();
// See if next argument requires stack alignment in ELF
- unsigned next = 5+2*(i+1)+1;
- bool Expand = (Arg.getValueType() == MVT::f64) || ((i + 1 < NumOps) &&
- (cast<ConstantSDNode>(Op.getOperand(next))->getValue() & AlignFlag) &&
- (!(Flags & AlignFlag)));
+ bool Align = Flags.isSplit();
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDOperand PtrOff;
// Stack align in ELF 32
- if (isELF32_ABI && Expand)
+ if (isELF32_ABI && Align)
PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
StackPtr.getValueType());
else
// On PPC64, promote integers to 64-bit values.
if (isPPC64 && Arg.getValueType() == MVT::i32) {
- unsigned ExtOp = (Flags & 1) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, MVT::i64, Arg);
}
-
- switch (Arg.getValueType()) {
+
+ // FIXME Elf untested, what are alignment rules?
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
+ if (Size==1 || Size==2) {
+ // Very small objects are passed right-justified.
+ // Everything else is passed left-justified.
+ MVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDOperand Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg,
+ NULL, 0, VT);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ if (isMachoABI)
+ ArgOffset += PtrByteSize;
+ } else {
+ SDOperand Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+ SDOperand AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const);
+ SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+ CallSeqStart.Val->getOperand(0),
+ Flags, DAG, Size);
+ // This must go outside the CALLSEQ_START..END.
+ SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.Val->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val);
+ Chain = CallSeqStart = NewCallSeqStart;
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.Val->getOperand(0),
+ Flags, DAG, Size);
+ // This must go outside the CALLSEQ_START..END.
+ SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.Val->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val);
+ Chain = CallSeqStart = NewCallSeqStart;
+ // And copy the pieces of it that fit into registers.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDOperand Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDOperand AddArg = DAG.getNode(ISD::ADD, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDOperand Load = DAG.getLoad(PtrVT, Chain, AddArg, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ if (isMachoABI)
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT()) {
default: assert(0 && "Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
// Double word align in ELF
- if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2);
+ if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments);
inMem = true;
}
if (inMem || isMachoABI) {
// Stack align in ELF
- if (isELF32_ABI && Expand)
+ if (isELF32_ABI && Align)
ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
ArgOffset += PtrByteSize;
break;
case MVT::f32:
case MVT::f64:
- if (isVarArg) {
- // Float varargs need to be promoted to double.
- if (Arg.getValueType() == MVT::f32)
- Arg = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Arg);
- }
-
if (FPR_idx != NumFPRs) {
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
}
}
} else {
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments);
inMem = true;
}
if (inMem || isMachoABI) {
// Stack align in ELF
- if (isELF32_ABI && Expand)
+ if (isELF32_ABI && Align)
ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
if (isPPC64)
ArgOffset += 8;
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- assert(!isVarArg && "Don't support passing vectors to varargs yet!");
- assert(VR_idx != NumVRs &&
- "Don't support passing more than 12 vector args yet!");
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDOperand Load = DAG.getLoad(MVT::v4f32, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDOperand Ix = DAG.getNode(ISD::ADD, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDOperand Load = DAG.getLoad(PtrVT, Store, Ix, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments);
+ ArgOffset += 16;
+ }
break;
}
}
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDOperand Arg = Op.getOperand(5+2*i);
+ MVT ArgType = Arg.getValueType();
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDOperand PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// With the ELF 32 ABI, set CR6 to true if this is a vararg call.
if (isVarArg && isELF32_ABI) {
- SDOperand SetCR(DAG.getTargetNode(PPC::SETCR, MVT::i32), 0);
- Chain = DAG.getCopyToReg(Chain, PPC::CR6, SetCR, InFlag);
+ SDOperand SetCR(DAG.getTargetNode(PPC::CRSET, MVT::i32), 0);
+ Chain = DAG.getCopyToReg(Chain, PPC::CR1EQ, SetCR, InFlag);
InFlag = Chain.getValue(1);
}
- std::vector<MVT::ValueType> NodeTys;
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ if (isTailCall) {
+ SmallVector<SDOperand, 8> MemOpChains2;
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDOperand();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isMachoABI);
+ }
+
+ // Emit callseq_end just before tailcall node.
+ if (isTailCall) {
+ SmallVector<SDOperand, 8> CallSeqOps;
+ SDVTList CallSeqNodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ CallSeqOps.push_back(Chain);
+ CallSeqOps.push_back(DAG.getIntPtrConstant(NumBytes));
+ CallSeqOps.push_back(DAG.getIntPtrConstant(0));
+ if (InFlag.Val)
+ CallSeqOps.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, CallSeqNodeTys, &CallSeqOps[0],
+ CallSeqOps.size());
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<MVT> NodeTys;
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0));
InFlag = Chain.getValue(1);
- // Copy the callee address into R12 on darwin.
+ // Copy the callee address into R12/X12 on darwin.
if (isMachoABI) {
- Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);
+ unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12;
+ Chain = DAG.getCopyToReg(Chain, Reg, Callee, InFlag);
InFlag = Chain.getValue(1);
}
Ops.push_back(Chain);
CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
Callee.Val = 0;
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy()));
}
// If this is a direct call, pass the chain and the callee.
Ops.push_back(Chain);
Ops.push_back(Callee);
}
-
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
-
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
+
if (InFlag.Val)
Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ assert(InFlag.Val &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(PPCISD::TAILCALL,
+ Op.Val->getVTList(), &Ops[0], Ops.size());
+ return SDOperand(Chain.Val, Op.ResNo);
+ }
+
Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getConstant(NumBytes, PtrVT),
- DAG.getConstant(0, PtrVT),
+ DAG.getConstant(BytesCalleePops, PtrVT),
InFlag);
if (Op.Val->getValueType(0) != MVT::Other)
InFlag = Chain.getValue(1);
- SDOperand ResultVals[3];
- unsigned NumResults = 0;
- NodeTys.clear();
+ SmallVector<SDOperand, 16> ResultVals;
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ CCState CCInfo(CallerCC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeCallResult(Op.Val, RetCC_PPC);
- // If the call has results, copy the values out of the ret val registers.
- switch (Op.Val->getValueType(0)) {
- default: assert(0 && "Unexpected ret value!");
- case MVT::Other: break;
- case MVT::i32:
- if (Op.Val->getValueType(1) == MVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32,
- Chain.getValue(2)).getValue(1);
- ResultVals[1] = Chain.getValue(0);
- NumResults = 2;
- NodeTys.push_back(MVT::i32);
- } else {
- Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
- }
- NodeTys.push_back(MVT::i32);
- break;
- case MVT::i64:
- Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
- NodeTys.push_back(MVT::i64);
- break;
- case MVT::f64:
- if (Op.Val->getValueType(1) == MVT::f64) {
- Chain = DAG.getCopyFromReg(Chain, PPC::F1, MVT::f64, InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- Chain = DAG.getCopyFromReg(Chain, PPC::F2, MVT::f64,
- Chain.getValue(2)).getValue(1);
- ResultVals[1] = Chain.getValue(0);
- NumResults = 2;
- NodeTys.push_back(MVT::f64);
- NodeTys.push_back(MVT::f64);
- break;
- }
- // else fall through
- case MVT::f32:
- Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),
- InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
- NodeTys.push_back(Op.Val->getValueType(0));
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),
- InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
- NodeTys.push_back(Op.Val->getValueType(0));
- break;
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ MVT VT = VA.getValVT();
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyFromReg(Chain, VA.getLocReg(), VT, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ InFlag = Chain.getValue(2);
}
-
- NodeTys.push_back(MVT::Other);
-
+
// If the function returns void, just return the chain.
- if (NumResults == 0)
+ if (RVLocs.empty())
return Chain;
// Otherwise, merge everything together with a MERGE_VALUES node.
- ResultVals[NumResults++] = Chain;
- SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
- ResultVals, NumResults);
+ ResultVals.push_back(Chain);
+ SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
+ &ResultVals[0], ResultVals.size());
return Res.getValue(Op.ResNo);
}
-static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
+SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG,
+ TargetMachine &TM) {
SmallVector<CCValAssign, 16> RVLocs;
unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
}
SDOperand Chain = Op.getOperand(0);
+
+ Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL);
+ if (Chain.getOpcode() == PPCISD::TAILCALL) {
+ SDOperand TailCall = Chain;
+ SDOperand TargetAddress = TailCall.getOperand(1);
+ SDOperand StackAdjustment = TailCall.getOperand(2);
+
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) ||
+ TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
+ TargetAddress.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(TargetAddress)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
+
+ SmallVector<SDOperand,8> Operands;
+ Operands.push_back(Chain.getOperand(0));
+ Operands.push_back(TargetAddress);
+ Operands.push_back(StackAdjustment);
+ // Copy registers used by the call. Last operand is a flag so it is not
+ // copied.
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
+ Operands.push_back(Chain.getOperand(i));
+ }
+ return DAG.getNode(PPCISD::TC_RETURN, MVT::Other, &Operands[0],
+ Operands.size());
+ }
+
SDOperand Flag;
// Copy the result values into the output registers.
return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain);
}
-static SDOperand LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG,
+SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) {
// When we pop the dynamic allocation we need to restore the SP link.
// Get the corect type for pointers.
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Construct the stack pointer operand.
bool IsPPC64 = Subtarget.isPPC64();
return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0);
}
-static SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG,
- const PPCSubtarget &Subtarget) {
+
+
+SDOperand
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool IsPPC64 = Subtarget.isPPC64();
- bool isMachoABI = Subtarget.isMachoABI();
+ bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isMachoABI = PPCSubTarget.isMachoABI();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI);
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDOperand
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isMachoABI = PPCSubTarget.isMachoABI();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
// primarily DYNALLOC instructions.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
int FPSI = FI->getFramePointerSaveIndex();
-
+
// If the frame pointer save index hasn't been defined yet.
if (!FPSI) {
// Find out what the fix offset of the frame pointer save area.
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
// Get the inputs.
SDOperand Chain = Op.getOperand(0);
SDOperand Size = Op.getOperand(1);
// Get the corect type for pointers.
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Negate the size.
SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT,
DAG.getConstant(0, PtrVT), Size);
// Construct a node for the frame pointer save index.
- SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT);
+ SDOperand FPSIdx = getFramePointerFrameIndex(DAG);
// Build a DYNALLOC node.
SDOperand Ops[3] = { Chain, NegSize, FPSIdx };
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3);
}
+SDOperand PPCTargetLowering::LowerAtomicLAS(SDOperand Op, SelectionDAG &DAG) {
+ MVT VT = Op.Val->getValueType(0);
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Ptr = Op.getOperand(1);
+ SDOperand Incr = Op.getOperand(2);
+
+ // Issue a "load and reserve".
+ std::vector<MVT> VTs;
+ VTs.push_back(VT);
+ VTs.push_back(MVT::Other);
+
+ SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+ SDOperand Ops[] = {
+ Chain, // Chain
+ Ptr, // Ptr
+ Label, // Label
+ };
+ SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
+ Chain = Load.getValue(1);
+
+ // Compute new value.
+ SDOperand NewVal = DAG.getNode(ISD::ADD, VT, Load, Incr);
+
+ // Issue a "store and check".
+ SDOperand Ops2[] = {
+ Chain, // Chain
+ NewVal, // Value
+ Ptr, // Ptr
+ Label, // Label
+ };
+ SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4);
+ SDOperand OutOps[] = { Load, Store };
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+ OutOps, 2);
+}
+
+SDOperand PPCTargetLowering::LowerAtomicLCS(SDOperand Op, SelectionDAG &DAG) {
+ MVT VT = Op.Val->getValueType(0);
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Ptr = Op.getOperand(1);
+ SDOperand NewVal = Op.getOperand(2);
+ SDOperand OldVal = Op.getOperand(3);
+
+ // Issue a "load and reserve".
+ std::vector<MVT> VTs;
+ VTs.push_back(VT);
+ VTs.push_back(MVT::Other);
+
+ SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+ SDOperand Ops[] = {
+ Chain, // Chain
+ Ptr, // Ptr
+ Label, // Label
+ };
+ SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
+ Chain = Load.getValue(1);
+
+ // Compare and unreserve if not equal.
+ SDOperand Ops2[] = {
+ Chain, // Chain
+ OldVal, // Old value
+ Load, // Value in memory
+ Label, // Label
+ };
+ Chain = DAG.getNode(PPCISD::CMP_UNRESERVE, MVT::Other, Ops2, 4);
+
+ // Issue a "store and check".
+ SDOperand Ops3[] = {
+ Chain, // Chain
+ NewVal, // Value
+ Ptr, // Ptr
+ Label, // Label
+ };
+ SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops3, 4);
+ SDOperand OutOps[] = { Load, Store };
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+ OutOps, 2);
+}
+
+SDOperand PPCTargetLowering::LowerAtomicSWAP(SDOperand Op, SelectionDAG &DAG) {
+ MVT VT = Op.Val->getValueType(0);
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Ptr = Op.getOperand(1);
+ SDOperand NewVal = Op.getOperand(2);
+
+ // Issue a "load and reserve".
+ std::vector<MVT> VTs;
+ VTs.push_back(VT);
+ VTs.push_back(MVT::Other);
+
+ SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+ SDOperand Ops[] = {
+ Chain, // Chain
+ Ptr, // Ptr
+ Label, // Label
+ };
+ SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
+ Chain = Load.getValue(1);
+
+ // Issue a "store and check".
+ SDOperand Ops2[] = {
+ Chain, // Chain
+ NewVal, // Value
+ Ptr, // Ptr
+ Label, // Label
+ };
+ SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4);
+ SDOperand OutOps[] = { Load, Store };
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+ OutOps, 2);
+}
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
-static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
// Not FP? Not a fsel.
- if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
- !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
return SDOperand();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
// Cannot handle SETEQ/SETNE.
if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();
- MVT::ValueType ResVT = Op.getValueType();
- MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
+ MVT ResVT = Op.getValueType();
+ MVT CmpVT = Op.getOperand(0).getValueType();
SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);
}
// FIXME: Split this code up when LegalizeDAGTypes lands.
-static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
- assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
+SDOperand PPCTargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
SDOperand Src = Op.getOperand(0);
if (Src.getValueType() == MVT::f32)
Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
SDOperand Tmp;
- switch (Op.getValueType()) {
+ switch (Op.getValueType().getSimpleVT()) {
default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0);
}
-static SDOperand LowerFP_ROUND_INREG(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerFP_ROUND_INREG(SDOperand Op,
+ SelectionDAG &DAG) {
assert(Op.getValueType() == MVT::ppcf128);
SDNode *Node = Op.Val;
assert(Node->getOperand(0).getValueType() == MVT::ppcf128);
// This sequence changes FPSCR to do round-to-zero, adds the two halves
// of the long double, and puts FPSCR back the way it was. We do not
// actually model FPSCR.
- std::vector<MVT::ValueType> NodeTys;
+ std::vector<MVT> NodeTys;
SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg;
NodeTys.push_back(MVT::f64); // Return register
return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg);
}
-static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDOperand();
+
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
// then lfd it and fcfid it.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(8, 8);
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
Op.getOperand(0));
// STD the extended value into the stack slot.
- MemOperand MO(PseudoSourceValue::getFixedStack(),
- MemOperand::MOStore, FrameIdx, 8, 8);
+ MachineMemOperand MO(PseudoSourceValue::getFixedStack(),
+ MachineMemOperand::MOStore, FrameIdx, 8, 8);
SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
DAG.getEntryNode(), Ext64, FIdx,
DAG.getMemOperand(MO));
return FP;
}
-static SDOperand LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) {
/*
The rounding mode is in bits 30:31 of FPSR, and has the following
settings:
*/
MachineFunction &MF = DAG.getMachineFunction();
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- std::vector<MVT::ValueType> NodeTys;
+ MVT VT = Op.getValueType();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ std::vector<MVT> NodeTys;
SDOperand MFFSreg, InFlag;
// Save FP Control Word to register
SDOperand RetVal =
DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2);
- return DAG.getNode((MVT::getSizeInBits(VT) < 16 ?
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);
}
-static SDOperand LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
+SDOperand PPCTargetLowering::LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
// Expand into a bunch of logical ops. Note that these ops
// depend on the PPC behavior for oversized shift amounts.
SDOperand Lo = Op.getOperand(0);
SDOperand Hi = Op.getOperand(1);
SDOperand Amt = Op.getOperand(2);
-
- SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
- DAG.getConstant(32, MVT::i32), Amt);
- SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
- SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
- SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
- SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
- DAG.getConstant(-32U, MVT::i32));
- SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
- SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
- SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
+ MVT AmtVT = Amt.getValueType();
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, VT, Hi, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, VT, Lo, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, VT, Lo, Tmp5);
+ SDOperand OutHi = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6);
+ SDOperand OutLo = DAG.getNode(PPCISD::SHL, VT, Lo, Amt);
SDOperand OutOps[] = { OutLo, OutHi };
- return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT),
OutOps, 2);
}
-static SDOperand LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRL!");
+SDOperand PPCTargetLowering::LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
- // Otherwise, expand into a bunch of logical ops. Note that these ops
+ // Expand into a bunch of logical ops. Note that these ops
// depend on the PPC behavior for oversized shift amounts.
SDOperand Lo = Op.getOperand(0);
SDOperand Hi = Op.getOperand(1);
SDOperand Amt = Op.getOperand(2);
-
- SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
- DAG.getConstant(32, MVT::i32), Amt);
- SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
- SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
- SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
- SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
- DAG.getConstant(-32U, MVT::i32));
- SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
- SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
- SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
+ MVT AmtVT = Amt.getValueType();
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, VT, Hi, Tmp5);
+ SDOperand OutLo = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6);
+ SDOperand OutHi = DAG.getNode(PPCISD::SRL, VT, Hi, Amt);
SDOperand OutOps[] = { OutLo, OutHi };
- return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT),
OutOps, 2);
}
-static SDOperand LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
+SDOperand PPCTargetLowering::LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
- // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
+ // Expand into a bunch of logical ops, followed by a select_cc.
SDOperand Lo = Op.getOperand(0);
SDOperand Hi = Op.getOperand(1);
SDOperand Amt = Op.getOperand(2);
-
- SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
- DAG.getConstant(32, MVT::i32), Amt);
- SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
- SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
- SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
- SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
- DAG.getConstant(-32U, MVT::i32));
- SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
- SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
- SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
+ MVT AmtVT = Amt.getValueType();
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, VT, Hi, Tmp5);
+ SDOperand OutHi = DAG.getNode(PPCISD::SRA, VT, Hi, Amt);
+ SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, AmtVT),
Tmp4, Tmp6, ISD::SETLE);
SDOperand OutOps[] = { OutLo, OutHi };
- return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT),
OutOps, 2);
}
// Start with zero'd results.
VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
- unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
+ unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDOperand OpVal = BV->getOperand(i);
/// BuildSplatI - Build a canonical splati of Val with an element size of
/// SplatSize. Cast the result to VT.
-static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,
+static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT VT,
SelectionDAG &DAG) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
- static const MVT::ValueType VTys[] = { // canonical VT to use for each size.
+ static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
- MVT::ValueType ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+ MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
// Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
if (Val == -1)
SplatSize = 1;
- MVT::ValueType CanonicalVT = VTys[SplatSize-1];
+ MVT CanonicalVT = VTys[SplatSize-1];
// Build a canonical splat for this value.
- SDOperand Elt = DAG.getConstant(Val, MVT::getVectorElementType(CanonicalVT));
+ SDOperand Elt = DAG.getConstant(Val, CanonicalVT.getVectorElementType());
SmallVector<SDOperand, 8> Ops;
- Ops.assign(MVT::getVectorNumElements(CanonicalVT), Elt);
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT,
&Ops[0], Ops.size());
return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res);
/// specified intrinsic ID.
static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
SelectionDAG &DAG,
- MVT::ValueType DestVT = MVT::Other) {
+ MVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = LHS.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
DAG.getConstant(IID, MVT::i32), LHS, RHS);
/// specified intrinsic ID.
static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
SDOperand Op2, SelectionDAG &DAG,
- MVT::ValueType DestVT = MVT::Other) {
+ MVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = Op0.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
/// amount. The result has the specified value type.
static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
- MVT::ValueType VT, SelectionDAG &DAG) {
+ MVT VT, SelectionDAG &DAG) {
// Force LHS/RHS to be the right type.
LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);
RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);
// selects to a single instruction, return Op. Otherwise, if we can codegen
// this case more efficiently than a constant pool load, lower it to the
// sequence of ops that should be used.
-static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerBUILD_VECTOR(SDOperand Op,
+ SelectionDAG &DAG) {
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
/// lowered into a vperm.
-static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op,
+ SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
- MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
- unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
+ MVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDOperand, 16> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
/// lower, do it, otherwise return null.
-static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op,
+ SelectionDAG &DAG) {
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
int CompareOpc;
Op.getOperand(3), // RHS
DAG.getConstant(CompareOpc, MVT::i32)
};
- std::vector<MVT::ValueType> VTs;
+ std::vector<MVT> VTs;
VTs.push_back(Op.getOperand(2).getValueType());
VTs.push_back(MVT::Flag);
SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3);
return Flags;
}
-static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op,
+ SelectionDAG &DAG) {
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16);
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
// Store the input value into Value#0 of the stack slot.
return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0);
}
-static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerMUL(SDOperand Op, SelectionDAG &DAG) {
if (Op.getValueType() == MVT::v4i32) {
SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
VarArgsStackOffset, VarArgsNumGPR,
VarArgsNumFPR, PPCSubTarget);
- case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget);
+ case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget,
+ getTargetMachine());
case ISD::RET: return LowerRET(Op, DAG, getTargetMachine());
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+ case ISD::ATOMIC_LAS: return LowerAtomicLAS(Op, DAG);
+ case ISD::ATOMIC_LCS: return LowerAtomicLCS(Op, DAG);
+ case ISD::ATOMIC_SWAP: return LowerAtomicSWAP(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
MachineFunction *F = BB->getParent();
F->getBasicBlockList().insert(It, copy0MBB);
F->getBasicBlockList().insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
+ // Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- sinkMBB->addSuccessor(*i);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
SDOperand Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
- std::vector<MVT::ValueType> VTs;
+ std::vector<MVT> VTs;
VTs.push_back(MVT::i32);
VTs.push_back(MVT::Other);
SDOperand MO = DAG.getMemOperand(LD->getMemOperand());
SDNode *LHSN = N->getOperand(0).Val;
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
UI != E; ++UI)
- if ((*UI)->getOpcode() == PPCISD::VCMPo &&
- (*UI)->getOperand(1) == N->getOperand(1) &&
- (*UI)->getOperand(2) == N->getOperand(2) &&
- (*UI)->getOperand(0) == N->getOperand(0)) {
- VCMPoNode = *UI;
+ if ((*UI).getUser()->getOpcode() == PPCISD::VCMPo &&
+ (*UI).getUser()->getOperand(1) == N->getOperand(1) &&
+ (*UI).getUser()->getOperand(2) == N->getOperand(2) &&
+ (*UI).getUser()->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = UI->getUser();
break;
}
for (SDNode::use_iterator UI = VCMPoNode->use_begin();
FlagUser == 0; ++UI) {
assert(UI != VCMPoNode->use_end() && "Didn't find user!");
- SDNode *User = *UI;
+ SDNode *User = UI->getUser();
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {
FlagUser = User;
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
// Create the PPCISD altivec 'dot' comparison node.
- std::vector<MVT::ValueType> VTs;
+ std::vector<MVT> VTs;
SDOperand Ops[] = {
LHS.getOperand(2), // LHS of compare
LHS.getOperand(3), // RHS of compare
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT::ValueType VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
/// vector. If it is invalid, don't add anything to Ops.
void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter,
std::vector<SDOperand>&Ops,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG) const {
SDOperand Result(0,0);
switch (Letter) {
default: break;
MachineFunction &MF = DAG.getMachineFunction();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- int RAIdx = FuncInfo->getReturnAddrSaveIndex();
- if (RAIdx == 0) {
- bool isPPC64 = PPCSubTarget.isPPC64();
- int Offset =
- PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI());
-
- // Set up a frame object for the return address.
- RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset);
-
- // Remember it for next time.
- FuncInfo->setReturnAddrSaveIndex(RAIdx);
-
- // Make sure the function really does not optimize away the store of the RA
- // to the stack.
- FuncInfo->setLRStoreRequired();
- }
-
+
// Just load the return address off the stack.
- SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy());
+ SDOperand RetAddrFI = getReturnAddrFrameIndex(DAG);
+
+ // Make sure the function really does not optimize away the store of the RA
+ // to the stack.
+ FuncInfo->setLRStoreRequired();
return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
}
if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
return SDOperand();
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
MachineFunction &MF = DAG.getMachineFunction();