X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FXCore%2FXCoreISelLowering.cpp;h=079e886457bcf0059abf12fa24df97b110e5ee19;hb=f4ec8bfaecef4e38f713b9e05d89869b023e1ce8;hp=1d75a2849e8e3302807c22844eefcf75363e3ca0;hpb=f2617291e31bc93d3dae2c80d45df5dfb9a70ae5;p=oota-llvm.git diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 1d75a2849e8..079e886457b 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -49,6 +50,7 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper"; case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper"; + case XCoreISD::LDWSP : return "XCoreISD::LDWSP"; case XCoreISD::STWSP : return "XCoreISD::STWSP"; case XCoreISD::RETSP : return "XCoreISD::RETSP"; case XCoreISD::LADD : return "XCoreISD::LADD"; @@ -59,6 +61,9 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::CRC8 : return "XCoreISD::CRC8"; case XCoreISD::BR_JT : return "XCoreISD::BR_JT"; case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32"; + case XCoreISD::FRAME_TO_ARGS_OFFSET : return "XCoreISD::FRAME_TO_ARGS_OFFSET"; + case XCoreISD::EH_RETURN : return "XCoreISD::EH_RETURN"; + case XCoreISD::MEMBARRIER : return "XCoreISD::MEMBARRIER"; default : return NULL; } } @@ -79,7 +84,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setStackPointerRegisterToSaveRestore(XCore::SP); - setSchedulingPreference(Sched::RegPressure); + setSchedulingPreference(Sched::Source); // Use i32 for setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); @@ -148,6 +153,20 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // Exception handling + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setExceptionPointerRegister(XCore::R0); + setExceptionSelectorRegister(XCore::R1); + setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); + + // Atomic operations + // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic. + // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP. + setInsertFencesForAtomic(true); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + // TRAMPOLINE is custom lowered. setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); @@ -162,14 +181,36 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::INTRINSIC_VOID); + setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setMinFunctionAlignment(1); + setPrefFunctionAlignment(2); +} + +bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + if (Val.getOpcode() != ISD::LOAD) + return false; + + EVT VT1 = Val.getValueType(); + if (!VT1.isSimple() || !VT1.isInteger() || + !VT2.isSimple() || !VT2.isInteger()) + return false; + + switch (VT1.getSimpleVT().SimpleTy) { + default: break; + case MVT::i8: + return true; + } + + return false; } SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -185,9 +226,14 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAME_TO_ARGS_OFFSET: return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::ATOMIC_LOAD: return LowerATOMIC_LOAD(Op, DAG); + case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG); default: llvm_unreachable("unimplemented operand"); } @@ -233,35 +279,57 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, if (const GlobalAlias *GA = dyn_cast(GV)) UnderlyingGV = GA->resolveAliasedGlobal(); if (const GlobalVariable *GVar = dyn_cast(UnderlyingGV)) { - if (GVar->isConstant()) + if ( ( GVar->isConstant() && + UnderlyingGV->isLocalLinkage(GV->getLinkage()) ) + || ( GVar->hasSection() && + StringRef(GVar->getSection()).startswith(".cp.") ) ) return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); } return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); } +static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL) { + if (XTL.getTargetMachine().getCodeModel() == CodeModel::Small) + return true; + + Type *ObjType = GV->getType()->getPointerElementType(); + if (!ObjType->isSized()) + return false; + + unsigned ObjSize = XTL.getDataLayout()->getTypeAllocSize(ObjType); + return ObjSize < CodeModelLargeSize && ObjSize != 0; +} + SDValue XCoreTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); const GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); + SDLoc DL(GN); int64_t Offset = GN->getOffset(); - // We can only fold positive offsets that are a multiple of the word size. - int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0); - SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset); - GA = getGlobalAddressWrapper(GA, GV, DAG); - // Handle the rest of the offset. - if (Offset != FoldedOffset) { - SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32); - GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining); + if (IsSmallObject(GV, *this)) { + // We can only fold positive offsets that are a multiple of the word size. + int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset); + GA = getGlobalAddressWrapper(GA, GV, DAG); + // Handle the rest of the offset. + if (Offset != FoldedOffset) { + SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32); + GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining); + } + return GA; + } else { + // Ideally we would not fold in offset with an index <= 11. + Type *Ty = Type::getInt8PtrTy(*DAG.getContext()); + Constant *GA = ConstantExpr::getBitCast(const_cast(GV), Ty); + Ty = Type::getInt32Ty(*DAG.getContext()); + Constant *Idx = ConstantInt::get(Ty, Offset); + Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx); + SDValue CP = DAG.getConstantPool(GAI, MVT::i32); + return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP, + MachinePointerInfo(), false, false, false, 0); } - return GA; -} - -static inline SDValue BuildGetId(SelectionDAG &DAG, SDLoc dl) { - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, - DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); } SDValue XCoreTargetLowering:: @@ -285,10 +353,10 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const SDValue Res; if (CP->isMachineConstantPoolEntry()) { Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, - CP->getAlignment()); + CP->getAlignment(), CP->getOffset()); } else { Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlignment()); + CP->getAlignment(), CP->getOffset()); } return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); } @@ -707,24 +775,26 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const SDValue XCoreTargetLowering:: LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - llvm_unreachable("unimplemented"); - // FIXME Arguments passed by reference need a extra dereference. + // Whist llvm does not support aggregate varargs we can ignore + // the possibility of the ValueType being an implicit byVal vararg. SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); // not an aggregate + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); + const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc dl(Node); - const Value *V = cast(Node->getOperand(2))->getValue(); - EVT VT = Node->getValueType(0); - SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(1), MachinePointerInfo(V), + SDValue VAList = DAG.getLoad(PtrVT, dl, InChain, + VAListPtr, MachinePointerInfo(SV), false, false, false, 0); // Increment the pointer, VAList, to the next vararg - SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, - DAG.getConstant(VT.getSizeInBits(), - getPointerTy())); + SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAList, + DAG.getIntPtrConstant(VT.getSizeInBits() / 8)); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), - MachinePointerInfo(V), false, false, 0); + InChain = DAG.getStore(VAList.getValue(1), dl, nextPtr, VAListPtr, + MachinePointerInfo(SV), false, false, 0); // Load the actual argument out of the pointer VAList - return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), + return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(), false, false, false, 0); } @@ -743,15 +813,85 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); + // This nodes represent llvm.frameaddress on the DAG. + // It takes one operand, the index of the frame address to return. + // An index of zero corresponds to the current function's frame address. + // An index of one to the parent's frame address, and so on. + // Depths > 0 not supported yet! + if (cast(Op.getOperand(0))->getZExtValue() > 0) + return SDValue(); + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), + RegInfo->getFrameRegister(MF), MVT::i32); +} + +SDValue XCoreTargetLowering:: +LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + // This nodes represent llvm.returnaddress on the DAG. + // It takes one operand, the index of the return address to return. + // An index of zero corresponds to the current function's return address. + // An index of one to the parent's return address, and so on. // Depths > 0 not supported yet! if (cast(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); MachineFunction &MF = DAG.getMachineFunction(); + XCoreFunctionInfo *XFI = MF.getInfo(); + int FI = XFI->createLRSpillSlot(MF); + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + return DAG.getLoad(getPointerTy(), SDLoc(Op), DAG.getEntryNode(), FIN, + MachinePointerInfo::getFixedStack(FI), false, false, + false, 0); +} + +SDValue XCoreTargetLowering:: +LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { + // This node represents offset from frame pointer to first on-stack argument. + // This is needed for correct stack adjustment during unwind. + // However, we don't know the offset until after the frame has be finalised. + // This is done during the XCoreFTAOElim pass. + return DAG.getNode(XCoreISD::FRAME_TO_ARGS_OFFSET, SDLoc(Op), MVT::i32); +} + +SDValue XCoreTargetLowering:: +LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { + // OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) + // This node represents 'eh_return' gcc dwarf builtin, which is used to + // return from exception. The general meaning is: adjust stack by OFFSET and + // pass execution to HANDLER. + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + SDLoc dl(Op); + + // Absolute SP = (FP + FrameToArgs) + Offset const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, + SDValue Stack = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RegInfo->getFrameRegister(MF), MVT::i32); + SDValue FrameToArgs = DAG.getNode(XCoreISD::FRAME_TO_ARGS_OFFSET, dl, + MVT::i32); + Stack = DAG.getNode(ISD::ADD, dl, MVT::i32, Stack, FrameToArgs); + Stack = DAG.getNode(ISD::ADD, dl, MVT::i32, Stack, Offset); + + // R0=ExceptionPointerRegister R1=ExceptionSelectorRegister + // which leaves 2 caller saved registers, R2 & R3 for us to use. + unsigned StackReg = XCore::R2; + unsigned HandlerReg = XCore::R3; + + SDValue OutChains[] = { + DAG.getCopyToReg(Chain, dl, StackReg, Stack), + DAG.getCopyToReg(Chain, dl, HandlerReg, Handler) + }; + + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 2); + + return DAG.getNode(XCoreISD::EH_RETURN, dl, MVT::Other, Chain, + DAG.getRegister(StackReg, MVT::i32), + DAG.getRegister(HandlerReg, MVT::i32)); + } SDValue XCoreTargetLowering:: @@ -832,6 +972,73 @@ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +SDValue XCoreTargetLowering:: +LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode(XCoreISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + +SDValue XCoreTargetLowering:: +LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { + AtomicSDNode *N = cast(Op); + assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP"); + assert(N->getOrdering() <= Monotonic && + "setInsertFencesForAtomic(true) and yet greater than Monotonic"); + if (N->getMemoryVT() == MVT::i32) { + if (N->getAlignment() < 4) + report_fatal_error("atomic load must be aligned"); + return DAG.getLoad(getPointerTy(), SDLoc(Op), N->getChain(), + N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), + N->isInvariant(), N->getAlignment(), + N->getTBAAInfo(), N->getRanges()); + } + if (N->getMemoryVT() == MVT::i16) { + if (N->getAlignment() < 2) + report_fatal_error("atomic load must be aligned"); + return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), + N->getBasePtr(), N->getPointerInfo(), MVT::i16, + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getTBAAInfo()); + } + if (N->getMemoryVT() == MVT::i8) + return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), + N->getBasePtr(), N->getPointerInfo(), MVT::i8, + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getTBAAInfo()); + return SDValue(); +} + +SDValue XCoreTargetLowering:: +LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { + AtomicSDNode *N = cast(Op); + assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP"); + assert(N->getOrdering() <= Monotonic && + "setInsertFencesForAtomic(true) and yet greater than Monotonic"); + if (N->getMemoryVT() == MVT::i32) { + if (N->getAlignment() < 4) + report_fatal_error("atomic store must be aligned"); + return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), + N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getTBAAInfo()); + } + if (N->getMemoryVT() == MVT::i16) { + if (N->getAlignment() < 2) + report_fatal_error("atomic store must be aligned"); + return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), + N->getBasePtr(), N->getPointerInfo(), MVT::i16, + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getTBAAInfo()); + } + if (N->getMemoryVT() == MVT::i8) + return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), + N->getBasePtr(), N->getPointerInfo(), MVT::i8, + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getTBAAInfo()); + return SDValue(); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -872,6 +1079,52 @@ XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers / memory locations. +static SDValue +LowerCallResult(SDValue Chain, SDValue InFlag, + const SmallVectorImpl &RVLocs, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + SmallVector, 4> ResultMemLocs; + // Copy results out of physical registers. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + const CCValAssign &VA = RVLocs[i]; + if (VA.isRegLoc()) { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getValVT(), + InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } else { + assert(VA.isMemLoc()); + ResultMemLocs.push_back(std::make_pair(VA.getLocMemOffset(), + InVals.size())); + // Reserve space for this result. + InVals.push_back(SDValue()); + } + } + + // Copy results out of memory. + SmallVector MemOpChains; + for (unsigned i = 0, e = ResultMemLocs.size(); i != e; ++i) { + int offset = ResultMemLocs[i].first; + unsigned index = ResultMemLocs[i].second; + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) }; + SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops, 2); + InVals[index] = load; + MemOpChains.push_back(load.getValue(1)); + } + + // Transform all loads nodes into one single node because + // all load nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + return Chain; +} + /// LowerCCCCallTo - functions arguments are copied from virtual /// regs to (physical regs)/(stack frame), CALLSEQ_START and /// CALLSEQ_END are emitted. @@ -897,8 +1150,15 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CCInfo.AnalyzeCallOperands(Outs, CC_XCore); + SmallVector RVLocs; + // Analyze return values to determine the number of bytes of stack required. + CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4); + RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore); + // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + unsigned NumBytes = RetCCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy(), true), dl); @@ -996,35 +1256,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); -} - -/// LowerCallResult - Lower the result values of a call into the -/// appropriate copies out of appropriate physical registers. -SDValue -XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallResult(Ins, RetCC_XCore); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag).getValue(1); - InFlag = Chain.getValue(2); - InVals.push_back(Chain.getValue(0)); - } - - return Chain; + return LowerCallResult(Chain, InFlag, RVLocs, dl, DAG, InVals); } //===----------------------------------------------------------------------===// @@ -1072,6 +1304,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + XCoreFunctionInfo *XFI = MF.getInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -1084,6 +1317,9 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned LRSaveSize = StackSlotSize; + if (!isVarArg) + XFI->setReturnStackOffset(CCInfo.getNextStackOffset() + LRSaveSize); + // All getCopyFromReg ops must precede any getMemcpys to prevent the // scheduler clobbering a register before it has been copied. // The stages are: @@ -1198,9 +1434,9 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, ArgDI != ArgDE; ++ArgDI) { if (ArgDI->Flags.isByVal() && ArgDI->Flags.getByValSize()) { unsigned Size = ArgDI->Flags.getByValSize(); - unsigned Align = ArgDI->Flags.getByValAlign(); + unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign()); // Create a new object on the stack and copy the pointee into it. - int FI = MFI->CreateStackObject(Size, Align, false, false); + int FI = MFI->CreateStackObject(Size, Align, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, @@ -1234,7 +1470,11 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); - return CCInfo.CheckReturn(Outs, RetCC_XCore); + if (!CCInfo.CheckReturn(Outs, RetCC_XCore)) + return false; + if (CCInfo.getNextStackOffset() != 0 && isVarArg) + return false; + return true; } SDValue @@ -1244,6 +1484,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, const SmallVectorImpl &OutVals, SDLoc dl, SelectionDAG &DAG) const { + XCoreFunctionInfo *XFI = + DAG.getMachineFunction().getInfo(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + // CCValAssign - represent the assignment of // the return value to a location SmallVector RVLocs; @@ -1253,6 +1497,9 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, getTargetMachine(), RVLocs, *DAG.getContext()); // Analyze return values. + if (!isVarArg) + CCInfo.AllocateStack(XFI->getReturnStackOffset(), 4); + CCInfo.AnalyzeReturn(Outs, RetCC_XCore); SDValue Flag; @@ -1261,13 +1508,43 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Return on XCore is always a "retsp 0" RetOps.push_back(DAG.getConstant(0, MVT::i32)); - // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + SmallVector MemOpChains; + // Handle return values that must be copied to memory. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); + if (VA.isRegLoc()) + continue; + assert(VA.isMemLoc()); + if (isVarArg) { + report_fatal_error("Can't return value from vararg function in memory"); + } - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); + int Offset = VA.getLocMemOffset(); + unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8; + // Create the frame index object for the memory location. + int FI = MFI->CreateFixedObject(ObjSize, Offset, false); + + // Create a SelectionDAG node corresponding to a store + // to this memory location. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN, + MachinePointerInfo::getFixedStack(FI), false, false, + 0)); + } + + // Transform all store nodes into one single node because + // all stores are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Now handle return values copied to registers. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + if (!VA.isRegLoc()) + continue; + // Copy the result values into the output registers. + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad @@ -1320,8 +1597,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. @@ -1362,6 +1638,46 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDLoc dl(N); switch (N->getOpcode()) { default: break; + case ISD::INTRINSIC_VOID: + switch (cast(N->getOperand(1))->getZExtValue()) { + case Intrinsic::xcore_outt: + case Intrinsic::xcore_outct: + case Intrinsic::xcore_chkct: { + SDValue OutVal = N->getOperand(3); + // These instructions ignore the high bits. + if (OutVal.hasOneUse()) { + unsigned BitWidth = OutVal.getValueSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) || + TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, + TLO)) + DCI.CommitTargetLoweringOpt(TLO); + } + break; + } + case Intrinsic::xcore_setpt: { + SDValue Time = N->getOperand(3); + // This instruction ignores the high bits. + if (Time.hasOneUse()) { + unsigned BitWidth = Time.getValueSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(Time, DemandedMask) || + TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, + TLO)) + DCI.CommitTargetLoweringOpt(TLO); + } + break; + } + } + break; case XCoreISD::LADD: { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1566,6 +1882,34 @@ void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, KnownZero.getBitWidth() - 1); } break; + case ISD::INTRINSIC_W_CHAIN: + { + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + case Intrinsic::xcore_getts: + // High bits are known to be zero. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 16); + break; + case Intrinsic::xcore_int: + case Intrinsic::xcore_inct: + // High bits are known to be zero. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 8); + break; + case Intrinsic::xcore_testct: + // Result is either 0 or 1. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 1); + break; + case Intrinsic::xcore_testwct: + // Result is in the range 0 - 4. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 3); + break; + } + } + break; } }