X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCISelLowering.cpp;h=630385cc7c0c564ee46fb1c668f504852233031d;hb=4a816471f527b90464806892feeecc242491a459;hp=c4f961cbd64e453b293fcb11de129f762719242e;hpb=f349a6e9e6ee0b589c403e0c5785266da121d05c;p=oota-llvm.git diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c4f961cbd64..630385cc7c0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -149,28 +150,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFCPSGN()) { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); + } else { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + } if (Subtarget->hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - - // frin does not implement "ties to even." Thus, this is safe only in - // fast-math mode. - if (TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - - // These need to set FE_INEXACT, and use a custom inserter. - setOperationAction(ISD::FRINT, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - } + setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, CTPOP or CTTZ @@ -280,8 +277,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } else setOperationAction(ISD::VAARG, MVT::Other, Expand); + if (Subtarget->isSVR4ABI() && !isPPC64) + // VACOPY is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VACOPY , MVT::Other, Custom); + else + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + // Use the default implementation. - setOperationAction(ISD::VACOPY , MVT::Other, Expand); setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); @@ -392,6 +394,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); @@ -486,6 +489,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); + + setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); } if (Subtarget->has64BitSupport()) { @@ -551,7 +557,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setInsertFencesForAtomic(true); - setSchedulingPreference(Sched::Hybrid); + if (Subtarget->enableMachineScheduler()) + setSchedulingPreference(Sched::Source); + else + setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); @@ -570,24 +579,47 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } } +/// getMaxByValAlign - Helper for getByValTypeAlignment to determine +/// the desired ByVal argument alignment. +static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, + unsigned MaxMaxAlign) { + if (MaxAlign == MaxMaxAlign) + return; + if (VectorType *VTy = dyn_cast(Ty)) { + if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) + MaxAlign = 32; + else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) + MaxAlign = 16; + } else if (ArrayType *ATy = dyn_cast(Ty)) { + unsigned EltAlign = 0; + getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + } else if (StructType *STy = dyn_cast(Ty)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned EltAlign = 0; + getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + if (MaxAlign == MaxMaxAlign) + break; + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { - const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. - if (TM.getSubtarget().isDarwin()) + if (PPCSubTarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. - if (VectorType *VTy = dyn_cast(Ty)) - if (VTy->getBitWidth() >= 128) - return 16; - // The rest is 8 on PPC64 and 4 on PPC32 boundary. - if (PPCSubTarget.isPPC64()) - return 8; - - return 4; + unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; + if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) + getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); + return Align; } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -639,6 +671,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; + case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; @@ -1026,6 +1059,46 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, return false; } +// If we happen to be doing an i64 load or store into a stack slot that has +// less than a 4-byte alignment, then the frame-index elimination may need to +// use an indexed load or store instruction (because the offset may not be a +// multiple of 4). The extra register needed to hold the offset comes from the +// register scavenger, and it is possible that the scavenger will need to use +// an emergency spill slot. As a result, we need to make sure that a spill slot +// is allocated when doing an i64 load/store into a less-than-4-byte-aligned +// stack slot. +static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { + // FIXME: This does not handle the LWA case. + if (VT != MVT::i64) + return; + + // NOTE: We'll exclude negative FIs here, which come from argument + // lowering, because there are no known test cases triggering this problem + // using packed structures (or similar). We can remove this exclusion if + // we find such a test case. The reason why this is so test-case driven is + // because this entire 'fixup' is only to prevent crashes (from the + // register scavenger) on not-really-valid inputs. For example, if we have: + // %a = alloca i1 + // %b = bitcast i1* %a to i64* + // store i64* a, i64 b + // then the store should really be marked as 'align 1', but is not. If it + // were marked as 'align 1' then the indexed form would have been + // instruction-selected initially, and the problem this 'fixup' is preventing + // won't happen regardless. + if (FrameIdx < 0) + return; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + if (Align >= 4) + return; + + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setHasNonRISpills(); +} + /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. If Aligned is true, only accept displacements @@ -1047,6 +1120,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, Disp = DAG.getTargetConstant(imm, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } @@ -1111,9 +1185,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } Disp = DAG.getTargetConstant(0, getPointerTy()); - if (FrameIndexSDNode *FI = dyn_cast(N)) + if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else Base = N; return true; // [r+0] } @@ -1335,6 +1410,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + // FIXME: TLS addresses currently use medium model code sequences, + // which is the most useful form. Eventually support for small and + // large models could be added if users need it, at the cost of + // additional complexity. GlobalAddressSDNode *GA = cast(Op); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); @@ -1354,17 +1433,20 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } - if (!is64bit) - llvm_unreachable("only local-exec is currently supported for ppc32"); - if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, - PtrVT, GOTReg, TGA); + SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TLS); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + } else + GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, - PtrVT, TGA, TPOffsetHi); - return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); + PtrVT, TGA, GOTPtr); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { @@ -1602,6 +1684,18 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, false, false, false, 0); } +SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const { + assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); + + // We have to copy the entire va_list struct: + // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte + return DAG.getMemcpy(Op.getOperand(0), Op, + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(12, MVT::i32), 8, false, true, + MachinePointerInfo(), MachinePointerInfo()); +} + SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { return Op.getOperand(0); @@ -1749,6 +1843,12 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { + return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; +} + bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, @@ -2211,6 +2311,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(FIN); continue; } + + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + CurArgOffset = ArgOffset; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (ObjSize < PtrByteSize) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); @@ -2864,7 +2971,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Flags.isByVal()) return false; } - // Non PIC/GOT tail calls are supported. + // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; @@ -2909,8 +3016,8 @@ struct TailCallArgumentInfo { static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, - const SmallVector &TailCallArgs, - SmallVector &MemOpChains, + const SmallVectorImpl &TailCallArgs, + SmallVectorImpl &MemOpChains, SDLoc dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; @@ -2968,7 +3075,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, - SmallVector& TailCallArguments) { + SmallVectorImpl& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); @@ -3033,8 +3140,8 @@ static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, - bool isVector, SmallVector &MemOpChains, - SmallVector &TailCallArguments, + bool isVector, SmallVectorImpl &MemOpChains, + SmallVectorImpl &TailCallArguments, SDLoc dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); if (!isTailCall) { @@ -3058,7 +3165,7 @@ static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, bool isDarwinABI, - SmallVector &TailCallArguments) { + SmallVectorImpl &TailCallArguments) { MachineFunction &MF = DAG.getMachineFunction(); // Emit a sequence of copyto/copyfrom virtual registers for arguments that @@ -3085,8 +3192,8 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, - SmallVector, 8> &RegsToPass, - SmallVector &Ops, std::vector &NodeTys, + SmallVectorImpl > &RegsToPass, + SmallVectorImpl &Ops, std::vector &NodeTys, const PPCSubtarget &PPCSubTarget) { bool isPPC64 = PPCSubTarget.isPPC64(); @@ -3383,7 +3490,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { + } else if ((CallOpc == PPCISD::CALL) && + (!isLocalCall(Callee) || + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } @@ -3412,10 +3521,10 @@ SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -3800,6 +3909,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + if (BVAlign % PtrByteSize != 0) + llvm_unreachable( + "ByVal alignment is not a multiple of the pointer size"); + + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -3891,7 +4009,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: @@ -3914,7 +4032,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be passed right-justified in the stack doubleword, and // in the GPR, if one is available. SDValue StoreOff; - if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { + if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } else @@ -4222,7 +4340,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: @@ -4687,7 +4805,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; - switch (Op.getValueType().getSimpleVT().SimpleTy) { + switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : @@ -5667,6 +5785,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VAARG: return LowerVAARG(Op, DAG, PPCSubTarget); + case ISD::VACOPY: + return LowerVACOPY(Op, DAG, PPCSubTarget); + case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); @@ -5762,6 +5883,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + // LowerFP_TO_INT() can only handle f32 and f64. + if (N->getOperand(0).getValueType() == MVT::ppcf128) + return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } @@ -6033,6 +6157,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // thisMBB: const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); + const int64_t BPOffset = 4 * PVT.getStoreSize(); // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); @@ -6044,10 +6169,25 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addReg(PPC::X2) .addImm(TOCOffset) .addReg(BufReg); - MIB.setMemRefs(MMOBegin, MMOEnd); } + // Naked functions never have a base pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned BaseReg; + if (MF->getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1; + else + BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP; + + MIB = BuildMI(*thisMBB, MI, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW)) + .addReg(BaseReg) + .addImm(BPOffset) + .addReg(BufReg); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); const PPCRegisterInfo *TRI = @@ -6119,12 +6259,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30; MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); + const int64_t BPOffset = 4 * PVT.getStoreSize(); unsigned BufReg = MI->getOperand(0).getReg(); @@ -6166,8 +6308,17 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, } MIB.setMemRefs(MMOBegin, MMOEnd); - // FIXME: When we also support base pointers, that register must also be - // restored here. + // Reload BP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) + .addImm(BPOffset) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) + .addImm(BPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { @@ -6578,51 +6729,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); - } else if (MI->getOpcode() == PPC::FRINDrint || - MI->getOpcode() == PPC::FRINSrint) { - bool isf32 = MI->getOpcode() == PPC::FRINSrint; - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); - - // Perform the rounding. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) - .addReg(Src); - - // Compare the results. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) - .addReg(Dest).addReg(Src); - - // If the results were not equal, then set the FPSCR XX bit. - MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, midMBB); - F->insert(It, exitMBB); - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); - - BB->addSuccessor(midMBB); - BB->addSuccessor(exitMBB); - - BB = midMBB; - - // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set - // the FI bit here because that will not automatically set XX also, - // and XX is what libm interprets as the FE_INEXACT flag. - BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); - BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); - - BB->addSuccessor(exitMBB); - - BB = exitMBB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6963,8 +7069,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (RV.getNode() != 0) { DCI.AddToWorklist(RV.getNode()); RV = DAGCombineFastRecip(RV, DCI); - if (RV.getNode() != 0) + if (RV.getNode() != 0) { + // Unfortunately, RV is now NaN if the input was exactly 0. Select out + // this case and force the answer to 0. + + EVT VT = RV.getValueType(); + + SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); + if (VT.isVector()) { + assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); + Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); + } + + SDValue ZeroCmp = + DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + DCI.AddToWorklist(ZeroCmp.getNode()); + DCI.AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, + ZeroCmp, Zero, RV); return RV; + } } } @@ -7060,7 +7186,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); if (ISD::isNON_EXTLoad(N) && VT.isVector() && TM.getSubtarget().hasAltivec() && - DCI.getDAGCombineLevel() == AfterLegalizeTypes && + (VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v4f32) && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); @@ -7204,6 +7331,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + break; case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && @@ -7534,7 +7663,24 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + std::pair R = + TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + + // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers + // (which we call X[0-9]+). If a 64-bit value has been requested, and a + // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent + // register. + // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use + // the AsmName field from *RegisterInfo.td, then this would not be necessary. + if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() && + PPC::GPRCRegClass.contains(R.first)) { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + return std::make_pair(TRI->getMatchingSuperReg(R.first, + PPC::sub_32, &PPC::G8RCRegClass), + &PPC::G8RCRegClass); + } + + return R; } @@ -7650,6 +7796,12 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); + if (!isa(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_return_address' must " + "be a constant integer"); + return SDValue(); + } + SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); @@ -7763,18 +7915,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return true; } -/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than -/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to -/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd -/// is expanded to mul + add. -bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { +bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: - case MVT::v4f32: return true; default: break; @@ -7784,9 +7933,15 @@ bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - if (DisableILPPref) + if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; } +// Create a fast isel object. +FastISel * +PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) const { + return PPC::createFastISel(FuncInfo, LibInfo); +}