X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FXCore%2FXCoreISelLowering.cpp;h=6e8a95a0859b0ae0db1e2bdc93f40d343550bb66;hb=6d9e62f4324fb18106a54d13ff90ccd178c9ddf2;hp=eaa745ba9b735ce08ada7d7177635498683b9e17;hpb=aa76e9e2cf50af190de90bc778b7f7e42ef9ceff;p=oota-llvm.git diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index eaa745ba9b7..6e8a95a0859 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -11,20 +11,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "xcore-lower" - #include "XCoreISelLowering.h" -#include "XCoreMachineFunctionInfo.h" #include "XCore.h" -#include "XCoreTargetObjectFile.h" -#include "XCoreTargetMachine.h" +#include "XCoreMachineFunctionInfo.h" #include "XCoreSubtarget.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/CallingConv.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" +#include "XCoreTargetMachine.h" +#include "XCoreTargetObjectFile.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -33,11 +25,22 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include + using namespace llvm; +#define DEBUG_TYPE "xcore-lower" + const char *XCoreTargetLowering:: getTargetNodeName(unsigned Opcode) const { @@ -47,6 +50,7 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper"; case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper"; + case XCoreISD::LDWSP : return "XCoreISD::LDWSP"; case XCoreISD::STWSP : return "XCoreISD::STWSP"; case XCoreISD::RETSP : return "XCoreISD::RETSP"; case XCoreISD::LADD : return "XCoreISD::LADD"; @@ -54,45 +58,45 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::LMUL : return "XCoreISD::LMUL"; case XCoreISD::MACCU : return "XCoreISD::MACCU"; case XCoreISD::MACCS : return "XCoreISD::MACCS"; + case XCoreISD::CRC8 : return "XCoreISD::CRC8"; case XCoreISD::BR_JT : return "XCoreISD::BR_JT"; case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32"; - default : return NULL; + case XCoreISD::FRAME_TO_ARGS_OFFSET : return "XCoreISD::FRAME_TO_ARGS_OFFSET"; + case XCoreISD::EH_RETURN : return "XCoreISD::EH_RETURN"; + case XCoreISD::MEMBARRIER : return "XCoreISD::MEMBARRIER"; + default : return nullptr; } } -XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) - : TargetLowering(XTM, new XCoreTargetObjectFile()), - TM(XTM), - Subtarget(*XTM.getSubtargetImpl()) { +XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, + const XCoreSubtarget &Subtarget) + : TargetLowering(TM), TM(TM), Subtarget(Subtarget) { // Set up the register classes. addRegisterClass(MVT::i32, &XCore::GRRegsRegClass); // Compute derived properties from the register classes - computeRegisterProperties(); + computeRegisterProperties(Subtarget.getRegisterInfo()); // Division is expensive setIntDivIsCheap(false); setStackPointerRegisterToSaveRestore(XCore::SP); - setSchedulingPreference(Sched::RegPressure); + setSchedulingPreference(Sched::Source); // Use i32 for setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // XCore does not have the NodeTypes below. - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::ADDC, MVT::i32, Expand); setOperationAction(ISD::ADDE, MVT::i32, Expand); setOperationAction(ISD::SUBC, MVT::i32, Expand); setOperationAction(ISD::SUBE, MVT::i32, Expand); - // Stop the combiner recombining select and set_cc - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - // 64bit setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i64, Custom); @@ -119,19 +123,18 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32 , Custom); - // Thread Local Storage - setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - // Conversion of i64 -> double produces constantpool nodes setOperationAction(ISD::ConstantPool, MVT::i32, Custom); // Loads - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Expand); + } // Custom expand misaligned loads / stores. setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -148,43 +151,86 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // Exception handling + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setExceptionPointerRegister(XCore::R0); + setExceptionSelectorRegister(XCore::R1); + setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); + + // Atomic operations + // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic. + // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP. + setInsertFencesForAtomic(true); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + // TRAMPOLINE is custom lowered. setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); - maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; - maxStoresPerMemmove = maxStoresPerMemmoveOptSize - = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2; + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 4; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize + = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2; // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::INTRINSIC_VOID); + setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setMinFunctionAlignment(1); + setPrefFunctionAlignment(2); +} + +bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + if (Val.getOpcode() != ISD::LOAD) + return false; + + EVT VT1 = Val.getValueType(); + if (!VT1.isSimple() || !VT1.isInteger() || + !VT2.isSimple() || !VT2.isInteger()) + return false; + + switch (VT1.getSimpleVT().SimpleTy) { + default: break; + case MVT::i8: + return true; + } + + return false; } SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::BR_JT: return LowerBR_JT(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::VAARG: return LowerVAARG(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG); - case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::VAARG: return LowerVAARG(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG); + case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG); // FIXME: Remove these when LegalizeDAGTypes lands. case ISD::ADD: - case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); - case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAME_TO_ARGS_OFFSET: return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::ATOMIC_LOAD: return LowerATOMIC_LOAD(Op, DAG); + case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG); default: llvm_unreachable("unimplemented operand"); } @@ -209,93 +255,70 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, // Misc Lower Operation implementation //===----------------------------------------------------------------------===// -SDValue XCoreTargetLowering:: -LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc dl = Op.getDebugLoc(); - SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2), - Op.getOperand(3), Op.getOperand(4)); - return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0), - Op.getOperand(1)); -} - -SDValue XCoreTargetLowering:: -getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, - SelectionDAG &DAG) const -{ +SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA, + const GlobalValue *GV, + SelectionDAG &DAG) const { // FIXME there is no actual debug info here - DebugLoc dl = GA.getDebugLoc(); - if (isa(GV)) { + SDLoc dl(GA); + + if (GV->getType()->getElementType()->isFunctionTy()) return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); - } - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias then use the aliasee to determine constness - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); - } - bool isConst = GVar && GVar->isConstant(); - if (isConst) { + + const auto *GVar = dyn_cast(GV); + if ((GV->hasSection() && StringRef(GV->getSection()).startswith(".cp.")) || + (GVar && GVar->isConstant() && GV->hasLocalLinkage())) return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); - } + return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); } -SDValue XCoreTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const -{ - const GlobalValue *GV = cast(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32); - return getGlobalAddressWrapper(GA, GV, DAG); -} +static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL) { + if (XTL.getTargetMachine().getCodeModel() == CodeModel::Small) + return true; -static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) { - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, - DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); -} + Type *ObjType = GV->getType()->getPointerElementType(); + if (!ObjType->isSized()) + return false; -static inline bool isZeroLengthArray(Type *Ty) { - ArrayType *AT = dyn_cast_or_null(Ty); - return AT && (AT->getNumElements() == 0); + unsigned ObjSize = XTL.getDataLayout()->getTypeAllocSize(ObjType); + return ObjSize < CodeModelLargeSize && ObjSize != 0; } SDValue XCoreTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - // FIXME there isn't really debug info here - DebugLoc dl = Op.getDebugLoc(); - // transform to label + getid() * size - const GlobalValue *GV = cast(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias then use the aliasee to determine size - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); - } - if (!GVar) { - llvm_unreachable("Thread local object not a GlobalVariable?"); - } - Type *Ty = cast(GV->getType())->getElementType(); - if (!Ty->isSized() || isZeroLengthArray(Ty)) { -#ifndef NDEBUG - errs() << "Size of thread local object " << GVar->getName() - << " is unknown\n"; -#endif - llvm_unreachable(0); + const GlobalAddressSDNode *GN = cast(Op); + const GlobalValue *GV = GN->getGlobal(); + SDLoc DL(GN); + int64_t Offset = GN->getOffset(); + if (IsSmallObject(GV, *this)) { + // We can only fold positive offsets that are a multiple of the word size. + int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset); + GA = getGlobalAddressWrapper(GA, GV, DAG); + // Handle the rest of the offset. + if (Offset != FoldedOffset) { + SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32); + GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining); + } + return GA; + } else { + // Ideally we would not fold in offset with an index <= 11. + Type *Ty = Type::getInt8PtrTy(*DAG.getContext()); + Constant *GA = ConstantExpr::getBitCast(const_cast(GV), Ty); + Ty = Type::getInt32Ty(*DAG.getContext()); + Constant *Idx = ConstantInt::get(Ty, Offset); + Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx); + SDValue CP = DAG.getConstantPool(GAI, MVT::i32); + return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP, + MachinePointerInfo(), false, false, false, 0); } - SDValue base = getGlobalAddressWrapper(GA, GV, DAG); - const DataLayout *TD = TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(Ty); - SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl), - DAG.getConstant(Size, MVT::i32)); - return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset); } SDValue XCoreTargetLowering:: LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); const BlockAddress *BA = cast(Op)->getBlockAddress(); SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy()); @@ -308,15 +331,15 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); // FIXME there isn't really debug info here - DebugLoc dl = CP->getDebugLoc(); + SDLoc dl(CP); EVT PtrVT = Op.getValueType(); SDValue Res; if (CP->isMachineConstantPoolEntry()) { Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, - CP->getAlignment()); + CP->getAlignment(), CP->getOffset()); } else { Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlignment()); + CP->getAlignment(), CP->getOffset()); } return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); } @@ -331,7 +354,7 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); JumpTableSDNode *JT = cast(Table); unsigned JTI = JT->getIndex(); MachineFunction &MF = DAG.getMachineFunction(); @@ -349,60 +372,65 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const ScaledIndex); } -static bool -IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, - int64_t &Offset) +SDValue XCoreTargetLowering:: +lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base, + int64_t Offset, SelectionDAG &DAG) const { - if (Addr.getOpcode() != ISD::ADD) { - return false; - } - ConstantSDNode *CN = 0; - if (!(CN = dyn_cast(Addr.getOperand(1)))) { - return false; + if ((Offset & 0x3) == 0) { + return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(), + false, false, false, 0); } - int64_t off = CN->getSExtValue(); - const SDValue &Base = Addr.getOperand(0); - const SDValue *Root = &Base; - if (Base.getOpcode() == ISD::ADD && - Base.getOperand(1).getOpcode() == ISD::SHL) { - ConstantSDNode *CN = dyn_cast(Base.getOperand(1) - .getOperand(1)); - if (CN && (CN->getSExtValue() >= 2)) { - Root = &Base.getOperand(0); - } - } - if (isa(*Root)) { - // All frame indicies are word aligned - AlignedBase = Base; - Offset = off; - return true; - } - if (Root->getOpcode() == XCoreISD::DPRelativeWrapper || - Root->getOpcode() == XCoreISD::CPRelativeWrapper) { - // All dp / cp relative addresses are word aligned - AlignedBase = Base; - Offset = off; - return true; - } - // Check for an aligned global variable. - if (GlobalAddressSDNode *GA = dyn_cast(*Root)) { - const GlobalValue *GV = GA->getGlobal(); - if (GA->getOffset() == 0 && GV->getAlignment() >= 4) { - AlignedBase = Base; - Offset = off; - return true; - } + // Lower to pair of consecutive word aligned loads plus some bit shifting. + int32_t HighOffset = RoundUpToAlignment(Offset, 4); + int32_t LowOffset = HighOffset - 4; + SDValue LowAddr, HighAddr; + if (GlobalAddressSDNode *GASD = + dyn_cast(Base.getNode())) { + LowAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(), + LowOffset); + HighAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(), + HighOffset); + } else { + LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, + DAG.getConstant(LowOffset, MVT::i32)); + HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, + DAG.getConstant(HighOffset, MVT::i32)); } - return false; + SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, MVT::i32); + SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, MVT::i32); + + SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, + LowAddr, MachinePointerInfo(), + false, false, false, 0); + SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, + HighAddr, MachinePointerInfo(), + false, false, false, 0); + SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); + SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); + SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), + High.getValue(1)); + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, DL); +} + +static bool isWordAligned(SDValue Value, SelectionDAG &DAG) +{ + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Value, KnownZero, KnownOne); + return KnownZero.countTrailingOnes() >= 2; } SDValue XCoreTargetLowering:: LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); LoadSDNode *LD = cast(Op); assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension type"); assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT"); - if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) + if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(), + LD->getAddressSpace(), + LD->getAlignment())) return SDValue(); unsigned ABIAlignment = getDataLayout()-> @@ -413,72 +441,50 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - DebugLoc DL = Op.getDebugLoc(); - - SDValue Base; - int64_t Offset; - if (!LD->isVolatile() && - IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) { - if (Offset % 4 == 0) { - // We've managed to infer better alignment information than the load - // already has. Use an aligned load. - // - return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr, - MachinePointerInfo(), - false, false, false, 0); + SDLoc DL(Op); + + if (!LD->isVolatile()) { + const GlobalValue *GV; + int64_t Offset = 0; + if (DAG.isBaseWithConstantOffset(BasePtr) && + isWordAligned(BasePtr->getOperand(0), DAG)) { + SDValue NewBasePtr = BasePtr->getOperand(0); + Offset = cast(BasePtr->getOperand(1))->getSExtValue(); + return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr, + Offset, DAG); + } + if (TLI.isGAPlusOffset(BasePtr.getNode(), GV, Offset) && + MinAlign(GV->getAlignment(), 4) == 4) { + SDValue NewBasePtr = DAG.getGlobalAddress(GV, DL, + BasePtr->getValueType(0)); + return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr, + Offset, DAG); } - // Lower to - // ldw low, base[offset >> 2] - // ldw high, base[(offset >> 2) + 1] - // shr low_shifted, low, (offset & 0x3) * 8 - // shl high_shifted, high, 32 - (offset & 0x3) * 8 - // or result, low_shifted, high_shifted - SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32); - SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); - SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); - SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); - - SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset); - SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); - - SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, - LowAddr, MachinePointerInfo(), - false, false, false, 0); - SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, - HighAddr, MachinePointerInfo(), - false, false, false, 0); - SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); - SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); - SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted); - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), - High.getValue(1)); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, DL); } if (LD->getAlignment() == 2) { SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, LD->getPointerInfo(), MVT::i16, - LD->isVolatile(), LD->isNonTemporal(), 2); + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, HighAddr, LD->getPointerInfo().getWithOffset(2), MVT::i16, LD->isVolatile(), - LD->isNonTemporal(), 2); + LD->isNonTemporal(), LD->isInvariant(), 2); SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, DAG.getConstant(16, MVT::i32)); SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted); Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), High.getValue(1)); SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } // Lower to a call to __misaligned_load(BasePtr). - unsigned AS = LD->getAddressSpace(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -486,17 +492,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { Entry.Node = BasePtr; Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI(Chain, IntPtrTy, false, false, - false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__misaligned_load", getPointerTy()), - Args, DAG, DL); - std::pair CallResult = LowerCallTo(CLI); - - SDValue Ops[] = - { CallResult.first, CallResult.second }; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL).setChain(Chain) + .setCallee(CallingConv::C, IntPtrTy, + DAG.getExternalSymbol("__misaligned_load", getPointerTy()), + std::move(Args), 0); - return DAG.getMergeValues(Ops, 2, DL); + std::pair CallResult = LowerCallTo(CLI); + SDValue Ops[] = { CallResult.first, CallResult.second }; + return DAG.getMergeValues(Ops, DL); } SDValue XCoreTargetLowering:: @@ -505,7 +509,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const StoreSDNode *ST = cast(Op); assert(!ST->isTruncatingStore() && "Unexpected store type"); assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT"); - if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(), + ST->getAddressSpace(), + ST->getAlignment())) { return SDValue(); } unsigned ABIAlignment = getDataLayout()-> @@ -517,7 +523,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (ST->getAlignment() == 2) { SDValue Low = Value; @@ -537,8 +543,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const } // Lower to a call to __misaligned_store(BasePtr, Value). - unsigned AS = ST->getAddressSpace(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -549,14 +554,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const Entry.Node = Value; Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI(Chain, - Type::getVoidTy(*DAG.getContext()), false, false, - false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__misaligned_store", getPointerTy()), - Args, DAG, dl); - std::pair CallResult = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__misaligned_store", getPointerTy()), + std::move(Args), 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } @@ -565,7 +569,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI && "Unexpected operand to lower!"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Zero = DAG.getConstant(0, MVT::i32); @@ -574,7 +578,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const LHS, RHS); SDValue Lo(Hi.getNode(), 1); SDValue Ops[] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue XCoreTargetLowering:: @@ -582,7 +586,7 @@ LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI && "Unexpected operand to lower!"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Zero = DAG.getConstant(0, MVT::i32); @@ -591,7 +595,7 @@ LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const Zero, Zero); SDValue Lo(Hi.getNode(), 1); SDValue Ops[] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// isADDADDMUL - Return whether Op is in a form that is equivalent to @@ -667,7 +671,7 @@ TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const } else { return SDValue(); } - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LL, RL, AddendL, AddendH; LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mul.getOperand(0), DAG.getConstant(0, MVT::i32)); @@ -722,11 +726,11 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const if (N->getOpcode() == ISD::ADD) { SDValue Result = TryExpandADDWithMul(N, DAG); - if (Result.getNode() != 0) + if (Result.getNode()) return Result; } - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Extract components SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, @@ -742,13 +746,13 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : XCoreISD::LSUB; SDValue Zero = DAG.getConstant(0, MVT::i32); - SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), - LHSL, RHSL, Zero); - SDValue Lo(Carry.getNode(), 1); + SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSL, RHSL, Zero); + SDValue Carry(Lo.getNode(), 1); - SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), - LHSH, RHSH, Carry); - SDValue Hi(Ignored.getNode(), 1); + SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSH, RHSH, Carry); + SDValue Ignored(Hi.getNode(), 1); // Merge the pieces return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } @@ -756,31 +760,33 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const SDValue XCoreTargetLowering:: LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - llvm_unreachable("unimplemented"); - // FIXME Arguments passed by reference need a extra dereference. + // Whist llvm does not support aggregate varargs we can ignore + // the possibility of the ValueType being an implicit byVal vararg. SDNode *Node = Op.getNode(); - DebugLoc dl = Node->getDebugLoc(); - const Value *V = cast(Node->getOperand(2))->getValue(); - EVT VT = Node->getValueType(0); - SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(1), MachinePointerInfo(V), + EVT VT = Node->getValueType(0); // not an aggregate + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc dl(Node); + SDValue VAList = DAG.getLoad(PtrVT, dl, InChain, + VAListPtr, MachinePointerInfo(SV), false, false, false, 0); // Increment the pointer, VAList, to the next vararg - SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, - DAG.getConstant(VT.getSizeInBits(), - getPointerTy())); + SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAList, + DAG.getIntPtrConstant(VT.getSizeInBits() / 8)); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), - MachinePointerInfo(V), false, false, 0); + InChain = DAG.getStore(VAList.getValue(1), dl, nextPtr, VAListPtr, + MachinePointerInfo(SV), false, false, 0); // Load the actual argument out of the pointer VAList - return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), + return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(), false, false, false, 0); } SDValue XCoreTargetLowering:: LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // vastart stores the address of the VarArgsFrameIndex slot into the // memory location argument MachineFunction &MF = DAG.getMachineFunction(); @@ -792,15 +798,85 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + // This nodes represent llvm.frameaddress on the DAG. + // It takes one operand, the index of the frame address to return. + // An index of zero corresponds to the current function's frame address. + // An index of one to the parent's frame address, and so on. + // Depths > 0 not supported yet! + if (cast(Op.getOperand(0))->getZExtValue() > 0) + return SDValue(); + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), + RegInfo->getFrameRegister(MF), MVT::i32); +} + +SDValue XCoreTargetLowering:: +LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + // This nodes represent llvm.returnaddress on the DAG. + // It takes one operand, the index of the return address to return. + // An index of zero corresponds to the current function's return address. + // An index of one to the parent's return address, and so on. // Depths > 0 not supported yet! if (cast(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); MachineFunction &MF = DAG.getMachineFunction(); - const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, + XCoreFunctionInfo *XFI = MF.getInfo(); + int FI = XFI->createLRSpillSlot(MF); + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + return DAG.getLoad(getPointerTy(), SDLoc(Op), DAG.getEntryNode(), FIN, + MachinePointerInfo::getFixedStack(FI), false, false, + false, 0); +} + +SDValue XCoreTargetLowering:: +LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { + // This node represents offset from frame pointer to first on-stack argument. + // This is needed for correct stack adjustment during unwind. + // However, we don't know the offset until after the frame has be finalised. + // This is done during the XCoreFTAOElim pass. + return DAG.getNode(XCoreISD::FRAME_TO_ARGS_OFFSET, SDLoc(Op), MVT::i32); +} + +SDValue XCoreTargetLowering:: +LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { + // OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) + // This node represents 'eh_return' gcc dwarf builtin, which is used to + // return from exception. The general meaning is: adjust stack by OFFSET and + // pass execution to HANDLER. + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + SDLoc dl(Op); + + // Absolute SP = (FP + FrameToArgs) + Offset + const TargetRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + SDValue Stack = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RegInfo->getFrameRegister(MF), MVT::i32); + SDValue FrameToArgs = DAG.getNode(XCoreISD::FRAME_TO_ARGS_OFFSET, dl, + MVT::i32); + Stack = DAG.getNode(ISD::ADD, dl, MVT::i32, Stack, FrameToArgs); + Stack = DAG.getNode(ISD::ADD, dl, MVT::i32, Stack, Offset); + + // R0=ExceptionPointerRegister R1=ExceptionSelectorRegister + // which leaves 2 caller saved registers, R2 & R3 for us to use. + unsigned StackReg = XCore::R2; + unsigned HandlerReg = XCore::R3; + + SDValue OutChains[] = { + DAG.getCopyToReg(Chain, dl, StackReg, Stack), + DAG.getCopyToReg(Chain, dl, HandlerReg, Handler) + }; + + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + + return DAG.getNode(XCoreISD::EH_RETURN, dl, MVT::Other, Chain, + DAG.getRegister(StackReg, MVT::i32), + DAG.getRegister(HandlerReg, MVT::i32)); + } SDValue XCoreTargetLowering:: @@ -832,7 +908,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Addr = Trmp; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32), Addr, MachinePointerInfo(TrmpAddr), false, false, 0); @@ -861,7 +937,91 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(TrmpAddr, 16), false, false, 0); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +SDValue XCoreTargetLowering:: +LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + case Intrinsic::xcore_crc8: + EVT VT = Op.getValueType(); + SDValue Data = + DAG.getNode(XCoreISD::CRC8, DL, DAG.getVTList(VT, VT), + Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3)); + SDValue Crc(Data.getNode(), 1); + SDValue Results[] = { Crc, Data }; + return DAG.getMergeValues(Results, DL); + } + return SDValue(); +} + +SDValue XCoreTargetLowering:: +LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode(XCoreISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + +SDValue XCoreTargetLowering:: +LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { + AtomicSDNode *N = cast(Op); + assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP"); + assert(N->getOrdering() <= Monotonic && + "setInsertFencesForAtomic(true) and yet greater than Monotonic"); + if (N->getMemoryVT() == MVT::i32) { + if (N->getAlignment() < 4) + report_fatal_error("atomic load must be aligned"); + return DAG.getLoad(getPointerTy(), SDLoc(Op), N->getChain(), + N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), + N->isInvariant(), N->getAlignment(), + N->getAAInfo(), N->getRanges()); + } + if (N->getMemoryVT() == MVT::i16) { + if (N->getAlignment() < 2) + report_fatal_error("atomic load must be aligned"); + return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), + N->getBasePtr(), N->getPointerInfo(), MVT::i16, + N->isVolatile(), N->isNonTemporal(), + N->isInvariant(), N->getAlignment(), N->getAAInfo()); + } + if (N->getMemoryVT() == MVT::i8) + return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), + N->getBasePtr(), N->getPointerInfo(), MVT::i8, + N->isVolatile(), N->isNonTemporal(), + N->isInvariant(), N->getAlignment(), N->getAAInfo()); + return SDValue(); +} + +SDValue XCoreTargetLowering:: +LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { + AtomicSDNode *N = cast(Op); + assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP"); + assert(N->getOrdering() <= Monotonic && + "setInsertFencesForAtomic(true) and yet greater than Monotonic"); + if (N->getMemoryVT() == MVT::i32) { + if (N->getAlignment() < 4) + report_fatal_error("atomic store must be aligned"); + return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), + N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getAAInfo()); + } + if (N->getMemoryVT() == MVT::i16) { + if (N->getAlignment() < 2) + report_fatal_error("atomic store must be aligned"); + return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), + N->getBasePtr(), N->getPointerInfo(), MVT::i16, + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getAAInfo()); + } + if (N->getMemoryVT() == MVT::i8) + return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), + N->getBasePtr(), N->getPointerInfo(), MVT::i8, + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getAAInfo()); + return SDValue(); } //===----------------------------------------------------------------------===// @@ -879,10 +1039,10 @@ SDValue XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -904,6 +1064,51 @@ XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers / memory locations. +static SDValue +LowerCallResult(SDValue Chain, SDValue InFlag, + const SmallVectorImpl &RVLocs, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + SmallVector, 4> ResultMemLocs; + // Copy results out of physical registers. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + const CCValAssign &VA = RVLocs[i]; + if (VA.isRegLoc()) { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getValVT(), + InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } else { + assert(VA.isMemLoc()); + ResultMemLocs.push_back(std::make_pair(VA.getLocMemOffset(), + InVals.size())); + // Reserve space for this result. + InVals.push_back(SDValue()); + } + } + + // Copy results out of memory. + SmallVector MemOpChains; + for (unsigned i = 0, e = ResultMemLocs.size(); i != e; ++i) { + int offset = ResultMemLocs[i].first; + unsigned index = ResultMemLocs[i].second; + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) }; + SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops); + InVals[index] = load; + MemOpChains.push_back(load.getValue(1)); + } + + // Transform all loads nodes into one single node because + // all load nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); + + return Chain; +} + /// LowerCCCCallTo - functions arguments are copied from virtual /// regs to (physical regs)/(stack frame), CALLSEQ_START and /// CALLSEQ_END are emitted. @@ -915,13 +1120,13 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); // The ABI dictates there should be one stack slot available to the callee // on function entry (for saving lr). @@ -929,11 +1134,18 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CCInfo.AnalyzeCallOperands(Outs, CC_XCore); + SmallVector RVLocs; + // Analyze return values to determine the number of bytes of stack required. + CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4); + RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore); + // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + unsigned NumBytes = RetCCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, - getPointerTy(), true)); + getPointerTy(), true), dl); SmallVector, 4> RegsToPass; SmallVector MemOpChains; @@ -976,8 +1188,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. @@ -1016,60 +1227,36 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); - Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, getPointerTy(), true), DAG.getConstant(0, getPointerTy(), true), - InFlag); + InFlag, dl); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); -} - -/// LowerCallResult - Lower the result values of a call into the -/// appropriate copies out of appropriate physical registers. -SDValue -XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallResult(Ins, RetCC_XCore); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag).getValue(1); - InFlag = Chain.getValue(2); - InVals.push_back(Chain.getValue(0)); - } - - return Chain; + return LowerCallResult(Chain, InFlag, RVLocs, dl, DAG, InVals); } //===----------------------------------------------------------------------===// // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// +namespace { + struct ArgDataPair { SDValue SDV; ISD::ArgFlagsTy Flags; }; +} + /// XCore formal arguments implementation SDValue XCoreTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -1094,17 +1281,18 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + XCoreFunctionInfo *XFI = MF.getInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_XCore); @@ -1112,9 +1300,25 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned LRSaveSize = StackSlotSize; + if (!isVarArg) + XFI->setReturnStackOffset(CCInfo.getNextStackOffset() + LRSaveSize); + + // All getCopyFromReg ops must precede any getMemcpys to prevent the + // scheduler clobbering a register before it has been copied. + // The stages are: + // 1. CopyFromReg (and load) arg & vararg registers. + // 2. Chain CopyFromReg nodes into a TokenFactor. + // 3. Memcpy 'byVal' args & push final InVals. + // 4. Chain mem ops nodes into a TokenFactor. + SmallVector CFRegNode; + SmallVector ArgData; + SmallVector MemOps; + + // 1a. CopyFromReg (and load) arg registers. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; + SDValue ArgIn; if (VA.isRegLoc()) { // Arguments passed in registers @@ -1126,12 +1330,13 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getSimpleVT().SimpleTy << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } case MVT::i32: unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); - InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + ArgIn = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); + CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1)); } } else { // sanity check @@ -1151,22 +1356,23 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0)); + ArgIn = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); } + const ArgDataPair ADP = { ArgIn, Ins[i].Flags }; + ArgData.push_back(ADP); } + // 1b. CopyFromReg vararg registers. if (isVarArg) { - /* Argument registers */ - static const uint16_t ArgRegs[] = { + // Argument registers + static const MCPhysReg ArgRegs[] = { XCore::R0, XCore::R1, XCore::R2, XCore::R3 }; XCoreFunctionInfo *XFI = MF.getInfo(); - unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs, - array_lengthof(ArgRegs)); + unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs); if (FirstVAReg < array_lengthof(ArgRegs)) { - SmallVector MemOps; int offset = 0; // Save remaining registers, storing higher register numbers at a higher // address @@ -1182,14 +1388,12 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + CFRegNode.push_back(Val.getValue(Val->getNumValues() - 1)); // Move argument from virt reg -> stack SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); } - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); } else { // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( @@ -1198,6 +1402,40 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, } } + // 2. chain CopyFromReg nodes into a TokenFactor. + if (!CFRegNode.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, CFRegNode); + + // 3. Memcpy 'byVal' args & push final InVals. + // Aggregates passed "byVal" need to be copied by the callee. + // The callee will use a pointer to this copy, rather than the original + // pointer. + for (SmallVectorImpl::const_iterator ArgDI = ArgData.begin(), + ArgDE = ArgData.end(); + ArgDI != ArgDE; ++ArgDI) { + if (ArgDI->Flags.isByVal() && ArgDI->Flags.getByValSize()) { + unsigned Size = ArgDI->Flags.getByValSize(); + unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign()); + // Create a new object on the stack and copy the pointee into it. + int FI = MFI->CreateStackObject(Size, Align, false); + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + InVals.push_back(FIN); + MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, + DAG.getConstant(Size, MVT::i32), + Align, false, false, + MachinePointerInfo(), + MachinePointerInfo())); + } else { + InVals.push_back(ArgDI->SDV); + } + } + + // 4, chain mem ops nodes into a TokenFactor. + if (!MemOps.empty()) { + MemOps.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + } + return Chain; } @@ -1211,8 +1449,12 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); - return CCInfo.CheckReturn(Outs, RetCC_XCore); + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + if (!CCInfo.CheckReturn(Outs, RetCC_XCore)) + return false; + if (CCInfo.getNextStackOffset() != 0 && isVarArg) + return false; + return true; } SDValue @@ -1220,49 +1462,82 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc dl, SelectionDAG &DAG) const { + + XCoreFunctionInfo *XFI = + DAG.getMachineFunction().getInfo(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); // CCValAssign - represent the assignment of // the return value to a location SmallVector RVLocs; // CCState - Info about the registers and stack slot. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); // Analyze return values. - CCInfo.AnalyzeReturn(Outs, RetCC_XCore); + if (!isVarArg) + CCInfo.AllocateStack(XFI->getReturnStackOffset(), 4); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } + CCInfo.AnalyzeReturn(Outs, RetCC_XCore); SDValue Flag; + SmallVector RetOps(1, Chain); - // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + // Return on XCore is always a "retsp 0" + RetOps.push_back(DAG.getConstant(0, MVT::i32)); + + SmallVector MemOpChains; + // Handle return values that must be copied to memory. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); + if (VA.isRegLoc()) + continue; + assert(VA.isMemLoc()); + if (isVarArg) { + report_fatal_error("Can't return value from vararg function in memory"); + } - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); + int Offset = VA.getLocMemOffset(); + unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8; + // Create the frame index object for the memory location. + int FI = MFI->CreateFixedObject(ObjSize, Offset, false); + + // Create a SelectionDAG node corresponding to a store + // to this memory location. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN, + MachinePointerInfo::getFixedStack(FI), false, false, + 0)); + } + + // Transform all store nodes into one single node because + // all stores are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); + + // Now handle return values copied to registers. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + if (!VA.isRegLoc()) + continue; + // Copy the result values into the output registers. + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // Return on XCore is always a "retsp 0" + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, - Chain, DAG.getConstant(0, MVT::i32), Flag); - else // Return Void - return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, - Chain, DAG.getConstant(0, MVT::i32)); + RetOps.push_back(Flag); + + return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// @@ -1272,7 +1547,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, MachineBasicBlock * XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && "Unexpected instr type to insert"); @@ -1300,8 +1575,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. @@ -1339,9 +1613,49 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (N->getOpcode()) { default: break; + case ISD::INTRINSIC_VOID: + switch (cast(N->getOperand(1))->getZExtValue()) { + case Intrinsic::xcore_outt: + case Intrinsic::xcore_outct: + case Intrinsic::xcore_chkct: { + SDValue OutVal = N->getOperand(3); + // These instructions ignore the high bits. + if (OutVal.hasOneUse()) { + unsigned BitWidth = OutVal.getValueSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) || + TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, + TLO)) + DCI.CommitTargetLoweringOpt(TLO); + } + break; + } + case Intrinsic::xcore_setpt: { + SDValue Time = N->getOperand(3); + // This instruction ignores the high bits. + if (Time.hasOneUse()) { + unsigned BitWidth = Time.getValueSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(Time, DemandedMask) || + TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, + TLO)) + DCI.CommitTargetLoweringOpt(TLO); + } + break; + } + } + break; case XCoreISD::LADD: { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1359,22 +1673,22 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2, DAG.getConstant(1, VT)); - SDValue Ops [] = { Carry, Result }; - return DAG.getMergeValues(Ops, 2, dl); + SDValue Ops[] = { Result, Carry }; + return DAG.getMergeValues(Ops, dl); } // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + DAG.computeKnownBits(N2, KnownZero, KnownOne); if ((KnownZero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); - SDValue Ops [] = { Carry, Result }; - return DAG.getMergeValues(Ops, 2, dl); + SDValue Ops[] = { Result, Carry }; + return DAG.getMergeValues(Ops, dl); } } } @@ -1392,28 +1706,28 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + DAG.computeKnownBits(N2, KnownZero, KnownOne); if ((KnownZero & Mask) == Mask) { SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), N2); - SDValue Ops [] = { Borrow, Result }; - return DAG.getMergeValues(Ops, 2, dl); + SDValue Ops[] = { Result, Borrow }; + return DAG.getMergeValues(Ops, dl); } } // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + DAG.computeKnownBits(N2, KnownZero, KnownOne); if ((KnownZero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); - SDValue Ops [] = { Borrow, Result }; - return DAG.getMergeValues(Ops, 2, dl); + SDValue Ops[] = { Result, Borrow }; + return DAG.getMergeValues(Ops, dl); } } } @@ -1438,11 +1752,15 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // If the high result is unused fold to add(a, b) if (N->hasNUsesOfValue(0, 0)) { SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3); - SDValue Ops [] = { Lo, Lo }; - return DAG.getMergeValues(Ops, 2, dl); + SDValue Ops[] = { Lo, Lo }; + return DAG.getMergeValues(Ops, dl); } // Otherwise fold to ladd(a, b, 0) - return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1); + SDValue Result = + DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1); + SDValue Carry(Result.getNode(), 1); + SDValue Ops[] = { Carry, Result }; + return DAG.getMergeValues(Ops, dl); } } break; @@ -1491,7 +1809,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // Replace unaligned store of unaligned load with memmove. StoreSDNode *ST = cast(N); if (!DCI.isBeforeLegalize() || - allowsUnalignedMemoryAccesses(ST->getMemoryVT()) || + allowsMisalignedMemoryAccesses(ST->getMemoryVT(), + ST->getAddressSpace(), + ST->getAlignment()) || ST->isVolatile() || ST->isIndexed()) { break; } @@ -1526,22 +1846,50 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } -void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case XCoreISD::LADD: case XCoreISD::LSUB: - if (Op.getResNo() == 0) { + if (Op.getResNo() == 1) { // Top bits of carry / borrow are clear. KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), KnownZero.getBitWidth() - 1); } break; + case ISD::INTRINSIC_W_CHAIN: + { + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + case Intrinsic::xcore_getts: + // High bits are known to be zero. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 16); + break; + case Intrinsic::xcore_int: + case Intrinsic::xcore_inct: + // High bits are known to be zero. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 8); + break; + case Intrinsic::xcore_testct: + // Result is either 0 or 1. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 1); + break; + case Intrinsic::xcore_testwct: + // Result is in the range 0 - 4. + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 3); + break; + } + } + break; } } @@ -1609,10 +1957,10 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // XCore Inline Assembly Support //===----------------------------------------------------------------------===// -std::pair -XCoreTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { +std::pair +XCoreTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default : break; @@ -1622,5 +1970,5 @@ getRegForInlineAsmConstraint(const std::string &Constraint, } // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); }