X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=include%2Fllvm%2FTarget%2FTargetLowering.h;h=c5fed02e17b84381a255b89af7bfdd2d395ce096;hb=4d569edd37897df7a614c689ed4f978fd8cb2d78;hp=2e4956f16d0251832105d657509aa0c169fc8bb8;hpb=d329c79f164b5bd876014c4c0225ae298640bd81;p=oota-llvm.git diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 2e4956f16d0..c5fed02e17b 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -30,8 +30,9 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetMachine.h" @@ -50,6 +51,7 @@ namespace llvm { class MachineFunction; class MachineInstr; class MachineJumpTableInfo; + class MachineLoop; class Mangler; class MCContext; class MCExpr; @@ -136,10 +138,9 @@ public: llvm_unreachable("Invalid content kind"); } - /// NOTE: The constructor takes ownership of TLOF. - explicit TargetLoweringBase(const TargetMachine &TM, - const TargetLoweringObjectFile *TLOF); - virtual ~TargetLoweringBase(); + /// NOTE: The TargetMachine owns TLOF. + explicit TargetLoweringBase(const TargetMachine &TM); + virtual ~TargetLoweringBase() {} protected: /// \brief Initialize all of the actions to default values. @@ -148,7 +149,9 @@ protected: public: const TargetMachine &getTargetMachine() const { return TM; } const DataLayout *getDataLayout() const { return DL; } - const TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; } + const TargetLoweringObjectFile &getObjFileLowering() const { + return *TM.getObjFileLowering(); + } bool isBigEndian() const { return !IsLittleEndian; } bool isLittleEndian() const { return IsLittleEndian; } @@ -182,10 +185,18 @@ public: return HasMultipleConditionRegisters; } - /// Return true if a vector of the given type should be split - /// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type - /// legalization. - virtual bool shouldSplitVectorType(EVT /*VT*/) const { return false; } + /// Return true if the target has BitExtract instructions. + bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } + + /// Return the preferred vector type legalization action. + virtual TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const { + // The default action for one element vectors is to scalarize + if (VT.getVectorNumElements() == 1) + return TypeScalarizeVector; + // The default action for other vectors is to promote + return TypePromoteInteger; + } // There are two general methods for expanding a BUILD_VECTOR node: // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle @@ -206,6 +217,11 @@ public: /// several shifts, adds, and multiplies for this target. bool isIntDivCheap() const { return IntDivIsCheap; } + /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x) + bool isFsqrtCheap() const { + return FsqrtIsCheap; + } + /// Returns true if target has indicated at least one type should be bypassed. bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } @@ -215,12 +231,8 @@ public: return BypassSlowDivWidths; } - /// Return true if pow2 div is cheaper than a chain of srl/add/sra. - bool isPow2DivCheap() const { return Pow2DivIsCheap; } - - /// Return true if Div never traps, returns 0 when div by 0 and return TMin, - /// when sdiv TMin by -1. - bool isDivWellDefined() const { return DivIsWellDefined; } + /// Return true if pow2 sdiv is cheaper than a chain of sra/srl/add/sra. + bool isPow2SDivCheap() const { return Pow2SDivIsCheap; } /// Return true if Flow Control is an expensive operation that should be /// avoided. @@ -243,6 +255,16 @@ public: return true; } + /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. + virtual bool isCheapToSpeculateCttz() const { + return false; + } + + /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz. + virtual bool isCheapToSpeculateCtlz() const { + return false; + } + /// \brief Return if the target supports combining a /// chain like: /// \code @@ -258,10 +280,32 @@ public: return MaskAndBranchFoldingIsLegal; } - /// Return the ValueType of the result of SETCC operations. Also used to - /// obtain the target's preferred type for the condition operand of SELECT and - /// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other - /// since there are no other operands to get a type hint from. + /// \brief Return true if the target wants to use the optimization that + /// turns ext(promotableInst1(...(promotableInstN(load)))) into + /// promotedInst1(...(promotedInstN(ext(load)))). + bool enableExtLdPromotion() const { return EnableExtLdPromotion; } + + /// Return true if the target can combine store(extractelement VectorTy, + /// Idx). + /// \p Cost[out] gives the cost of that transformation when this is true. + virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const { + return false; + } + + /// Return true if target supports floating point exceptions. + bool hasFloatingPointExceptions() const { + return HasFloatingPointExceptions; + } + + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + virtual bool enableAggressiveFMAFusion(EVT VT) const { + return false; + } + + /// Return the ValueType of the result of SETCC operations. virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; /// Return the ValueType for comparison libcalls. Comparions libcalls include @@ -280,8 +324,17 @@ public: /// selects between the two kinds. For example on X86 a scalar boolean should /// be zero extended from i1, while the elements of a vector of booleans /// should be sign extended from i1. - BooleanContent getBooleanContents(bool isVec) const { - return isVec ? BooleanVectorContents : BooleanContents; + /// + /// Some cpus also treat floating point types the same way as they treat + /// vectors instead of the way they treat scalars. + BooleanContent getBooleanContents(bool isVec, bool isFloat) const { + if (isVec) + return BooleanVectorContents; + return isFloat ? BooleanFloatContents : BooleanContents; + } + + BooleanContent getBooleanContents(EVT Type) const { + return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); } /// Return target scheduling preference. @@ -413,10 +466,15 @@ public: EVT memVT; // memory VT const Value* ptrVal; // value representing memory location int offset; // offset off of ptrVal + unsigned size; // the size of the memory location + // (taken from memVT if zero) unsigned align; // alignment bool vol; // is volatile? bool readMem; // reads memory? bool writeMem; // writes memory? + + IntrinsicInfo() : opc(0), ptrVal(nullptr), offset(0), size(0), align(1), + vol(false), readMem(false), writeMem(false) {} }; /// Given an intrinsic, checks if on the target the intrinsic will need to map @@ -504,26 +562,31 @@ public: /// Return how this load with extension should be treated: either it is legal, /// needs to be promoted to a larger size, needs to be expanded to some other /// code sequence, or the target has a custom expander for it. - LegalizeAction getLoadExtAction(unsigned ExtType, MVT VT) const { - assert(ExtType < ISD::LAST_LOADEXT_TYPE && VT < MVT::LAST_VALUETYPE && - "Table isn't big enough!"); - return (LegalizeAction)LoadExtActions[VT.SimpleTy][ExtType]; + LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const { + if (ValVT.isExtended() || MemVT.isExtended()) return Expand; + unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; + unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; + assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && + MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); + return (LegalizeAction)LoadExtActions[ValI][MemI][ExtType]; } /// Return true if the specified load with extension is legal on this target. - bool isLoadExtLegal(unsigned ExtType, EVT VT) const { - return VT.isSimple() && - getLoadExtAction(ExtType, VT.getSimpleVT()) == Legal; + bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { + return ValVT.isSimple() && MemVT.isSimple() && + getLoadExtAction(ExtType, ValVT, MemVT) == Legal; } /// Return how this store with truncation should be treated: either it is /// legal, needs to be promoted to a larger size, needs to be expanded to some /// other code sequence, or the target has a custom expander for it. - LegalizeAction getTruncStoreAction(MVT ValVT, MVT MemVT) const { - assert(ValVT < MVT::LAST_VALUETYPE && MemVT < MVT::LAST_VALUETYPE && + LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { + if (ValVT.isExtended() || MemVT.isExtended()) return Expand; + unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; + unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; + assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)TruncStoreActions[ValVT.SimpleTy] - [MemVT.SimpleTy]; + return (LegalizeAction)TruncStoreActions[ValI][MemI]; } /// Return true if the specified store with truncation is legal on this @@ -538,7 +601,7 @@ public: /// sequence, or the target has a custom expander for it. LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { - assert(IdxMode < ISD::LAST_INDEXED_MODE && VT < MVT::LAST_VALUETYPE && + assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!"); unsigned Ty = (unsigned)VT.SimpleTy; return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); @@ -556,7 +619,7 @@ public: /// sequence, or the target has a custom expander for it. LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { - assert(IdxMode < ISD::LAST_INDEXED_MODE && VT < MVT::LAST_VALUETYPE && + assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!"); unsigned Ty = (unsigned)VT.SimpleTy; return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); @@ -712,6 +775,23 @@ public: /// reduce runtime. virtual bool ShouldShrinkFPConstant(EVT) const { return true; } + // Return true if it is profitable to reduce the given load node to a smaller + // type. + // + // e.g. (i16 (trunc (i32 (load x))) -> i16 load x should be performed + virtual bool shouldReduceLoadWidth(SDNode *Load, + ISD::LoadExtType ExtTy, + EVT NewVT) const { + return true; + } + + /// When splitting a value of the specified type into parts, does the Lo + /// or Hi part come first? This usually follows the endianness, except + /// for ppcf128, where the Hi part always comes first. + bool hasBigEndianPartOrdering(EVT VT) const { + return isBigEndian() || VT == MVT::ppcf128; + } + /// If true, the target has custom DAG combine transformations that it can /// perform for the specified node. bool hasTargetDAGCombine(ISD::NodeType NT) const { @@ -753,14 +833,15 @@ public: /// /// This function returns true if the target allows unaligned memory accesses /// of the specified type in the given address space. If true, it also returns - /// whether the unaligned memory access is "fast" in the third argument by + /// whether the unaligned memory access is "fast" in the last argument by /// reference. This is used, for example, in situations where an array /// copy/move/set is converted to a sequence of store operations. Its use /// helps to ensure that such replacements don't generate code that causes an /// alignment error (trap) on the target machine. - virtual bool allowsUnalignedMemoryAccesses(EVT, - unsigned AddrSpace = 0, - bool * /*Fast*/ = nullptr) const { + virtual bool allowsMisalignedMemoryAccesses(EVT, + unsigned AddrSpace = 0, + unsigned Align = 1, + bool * /*Fast*/ = nullptr) const { return false; } @@ -803,11 +884,6 @@ public: return UseUnderscoreLongJmp; } - /// Return whether the target can generate code for jump tables. - bool supportJumpTables() const { - return SupportJumpTables; - } - /// Return integer threshold on number of blocks to use jump tables rather /// than if sequence. int getMinimumJumpTableEntries() const { @@ -860,7 +936,7 @@ public: } /// Return the preferred loop alignment. - unsigned getPrefLoopAlignment() const { + virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { return PrefLoopAlignment; } @@ -902,9 +978,13 @@ public: /// @} //===--------------------------------------------------------------------===// - /// \name Helpers for load-linked/store-conditional atomic expansion. + /// \name Helpers for atomic expansion. /// @{ + /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional + /// and expand AtomicCmpXchgInst. + virtual bool hasLoadLinkedStoreConditional() const { return false; } + /// Perform a load-linked operation on Addr, returning a "Value *" with the /// corresponding pointee type. This may entail some non-trivial operations to /// truncate or reconstruct types that will be illegal in the backend. See @@ -921,15 +1001,90 @@ public: llvm_unreachable("Store conditional unimplemented on this target"); } - /// Return true if the given (atomic) instruction should be expanded by the - /// IR-level AtomicExpandLoadLinked pass into a loop involving - /// load-linked/store-conditional pairs. Atomic stores will be expanded in the - /// same way as "atomic xchg" operations which ignore their output if needed. - virtual bool shouldExpandAtomicInIR(Instruction *Inst) const { + /// Inserts in the IR a target-specific intrinsic specifying a fence. + /// It is called by AtomicExpandPass before expanding an + /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. + /// RMW and CmpXchg set both IsStore and IsLoad to true. + /// This function should either return a nullptr, or a pointer to an IR-level + /// Instruction*. Even complex fence sequences can be represented by a + /// single Instruction* through an intrinsic to be lowered later. + /// Backends with !getInsertFencesForAtomic() should keep a no-op here. + /// Backends should override this method to produce target-specific intrinsic + /// for their fences. + /// FIXME: Please note that the default implementation here in terms of + /// IR-level fences exists for historical/compatibility reasons and is + /// *unsound* ! Fences cannot, in general, be used to restore sequential + /// consistency. For example, consider the following example: + /// atomic x = y = 0; + /// int r1, r2, r3, r4; + /// Thread 0: + /// x.store(1); + /// Thread 1: + /// y.store(1); + /// Thread 2: + /// r1 = x.load(); + /// r2 = y.load(); + /// Thread 3: + /// r3 = y.load(); + /// r4 = x.load(); + /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all + /// seq_cst. But if they are lowered to monotonic accesses, no amount of + /// IR-level fences can prevent it. + /// @{ + virtual Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + if (isAtLeastRelease(Ord) && IsStore) + return Builder.CreateFence(Ord); + else + return nullptr; + } + + virtual Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + if (isAtLeastAcquire(Ord)) + return Builder.CreateFence(Ord); + else + return nullptr; + } + /// @} + + /// Returns true if the given (atomic) store should be expanded by the + /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. + virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { return false; } + /// Returns true if the given (atomic) load should be expanded by the + /// IR-level AtomicExpand pass into a load-linked instruction + /// (through emitLoadLinked()). + virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; } + /// Returns true if the given AtomicRMW should be expanded by the + /// IR-level AtomicExpand pass into a loop using LoadLinked/StoreConditional. + virtual bool shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const { + return false; + } + + /// On some platforms, an AtomicRMW that never actually modifies the value + /// (such as fetch_add of 0) can be turned into a fence followed by an + /// atomic load. This may sound useless, but it makes it possible for the + /// processor to keep the cacheline shared, dramatically improving + /// performance. And such idempotent RMWs are useful for implementing some + /// kinds of locks, see for example (justification + benchmarks): + /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf + /// This method tries doing that transformation, returning the atomic load if + /// it succeeds, and nullptr otherwise. + /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo + /// another round of expansion. + virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { + return nullptr; + } //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. @@ -939,9 +1094,19 @@ public: virtual void resetOperationActions() {} protected: - /// Specify how the target extends the result of a boolean value from i1 to a - /// wider type. See getBooleanContents. - void setBooleanContents(BooleanContent Ty) { BooleanContents = Ty; } + /// Specify how the target extends the result of integer and floating point + /// boolean values from i1 to a wider type. See getBooleanContents. + void setBooleanContents(BooleanContent Ty) { + BooleanContents = Ty; + BooleanFloatContents = Ty; + } + + /// Specify how the target extends the result of integer and floating point + /// boolean values from i1 to a wider type. See getBooleanContents. + void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { + BooleanContents = IntTy; + BooleanFloatContents = FloatTy; + } /// Specify how the target extends the result of a vector boolean value from a /// vector of i1 to a wider type. See getBooleanContents. @@ -966,11 +1131,6 @@ protected: UseUnderscoreLongJmp = Val; } - /// Indicate whether the target can generate code for jump tables. - void setSupportJumpTables(bool Val) { - SupportJumpTables = Val; - } - /// Indicate the number of blocks to generate jump tables rather than if /// sequence. void setMinimumJumpTableEntries(int Val) { @@ -1010,6 +1170,14 @@ protected: HasMultipleConditionRegisters = hasManyRegs; } + /// Tells the code generator that the target has BitExtract instructions. + /// The code generator will aggressively sink "shift"s into the blocks of + /// their users if the users will generate "and" instructions which can be + /// combined with "shift" to BitExtract instructions. + void setHasExtractBitsInsn(bool hasExtractInsn = true) { + HasExtractBitsInsn = hasExtractInsn; + } + /// Tells the code generator not to expand sequence of operations into a /// separate sequences that increases the amount of flow control. void setJumpIsExpensive(bool isExpensive = true) { @@ -1021,21 +1189,24 @@ protected: /// containing an integer divide. void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; } + /// Tells the code generator that fsqrt is cheap, and should not be replaced + /// with an alternative sequence of instructions. + void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; } + + /// Tells the code generator that this target supports floating point + /// exceptions and cares about preserving floating point exception behavior. + void setHasFloatingPointExceptions(bool FPExceptions = true) { + HasFloatingPointExceptions = FPExceptions; + } + /// Tells the code generator which bitwidths to bypass. void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; } - /// Tells the code generator that it shouldn't generate srl/add/sra for a - /// signed divide by power of two, and let the target handle it. - void setPow2DivIsCheap(bool isCheap = true) { Pow2DivIsCheap = isCheap; } - - /// Tells the code-generator that it is safe to execute sdiv/udiv/srem/urem - /// even when RHS is 0. It is also safe to execute sdiv/srem when LHS is - /// SignedMinValue and RHS is -1. - void setDivIsWellDefined (bool isWellDefined = true) { - DivIsWellDefined = isWellDefined; - } + /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a + /// signed divide by power of two; let the target handle it. + void setPow2SDivIsCheap(bool isCheap = true) { Pow2SDivIsCheap = isCheap; } /// Add the specified register class as an available regclass for the /// specified value type. This indicates the selector can handle values of @@ -1076,19 +1247,18 @@ protected: /// Indicate that the specified load with extension does not work with the /// specified type and indicate what to do about it. - void setLoadExtAction(unsigned ExtType, MVT VT, + void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action) { - assert(ExtType < ISD::LAST_LOADEXT_TYPE && VT < MVT::LAST_VALUETYPE && - "Table isn't big enough!"); - LoadExtActions[VT.SimpleTy][ExtType] = (uint8_t)Action; + assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && + MemVT.isValid() && "Table isn't big enough!"); + LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = (uint8_t)Action; } /// Indicate that the specified truncating store does not work with the /// specified type and indicate what to do about it. void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { - assert(ValVT < MVT::LAST_VALUETYPE && MemVT < MVT::LAST_VALUETYPE && - "Table isn't big enough!"); + assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action; } @@ -1099,7 +1269,7 @@ protected: /// TargetLowering.cpp void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { - assert(VT < MVT::LAST_VALUETYPE && IdxMode < ISD::LAST_INDEXED_MODE && + assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!"); // Load action are kept in the upper half. IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; @@ -1113,7 +1283,7 @@ protected: /// TargetLowering.cpp void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { - assert(VT < MVT::LAST_VALUETYPE && IdxMode < ISD::LAST_INDEXED_MODE && + assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!"); // Store action are kept in the lower half. IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; @@ -1124,8 +1294,7 @@ protected: /// target and indicate what to do about it. void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action) { - assert(VT < MVT::LAST_VALUETYPE && - (unsigned)CC < array_lengthof(CondCodeActions) && + assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && "Table isn't big enough!"); /// The lower 5 bits of the SimpleTy index into Nth 2bit set from the 32-bit /// value and the upper 27 bits index into the second dimension of the array @@ -1176,7 +1345,8 @@ protected: /// Set the target's preferred loop alignment. Default alignment is zero, it /// means the target does not care about loop alignment. The alignment is - /// specified in log2(bytes). + /// specified in log2(bytes). The target may also override + /// getPrefLoopAlignment to provide per-loop values. void setPrefLoopAlignment(unsigned Align) { PrefLoopAlignment = Align; } @@ -1339,6 +1509,14 @@ public: return isZExtFree(Val.getValueType(), VT2); } + /// Return true if an fpext operation is free (for instance, because + /// single-precision floating-point numbers are implicitly extended to + /// double-precision). + virtual bool isFPExtFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return false; + } + /// Return true if an fneg operation is free to the point where it is never /// worthwhile to replace it with a bitwise operation. virtual bool isFNegFree(EVT VT) const { @@ -1381,6 +1559,15 @@ public: Type *Ty) const { return false; } + + /// Return true if EXTRACT_SUBVECTOR is cheap for this result type + /// with this index. This is needed because EXTRACT_SUBVECTOR usually + /// has custom lowering that depends on the index of the first element, + /// and only the target knows which lowering is cheap. + virtual bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const { + return false; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // @@ -1420,7 +1607,6 @@ public: private: const TargetMachine &TM; const DataLayout *DL; - const TargetLoweringObjectFile &TLOF; /// True if this is a little endian target. bool IsLittleEndian; @@ -1436,32 +1622,40 @@ private: /// the blocks of their users. bool HasMultipleConditionRegisters; + /// Tells the code generator that the target has BitExtract instructions. + /// The code generator will aggressively sink "shift"s into the blocks of + /// their users if the users will generate "and" instructions which can be + /// combined with "shift" to BitExtract instructions. + bool HasExtractBitsInsn; + /// Tells the code generator not to expand integer divides by constants into a /// sequence of muls, adds, and shifts. This is a hack until a real cost /// model is in place. If we ever optimize for size, this will be set to true /// unconditionally. bool IntDivIsCheap; + // Don't expand fsqrt with an approximation based on the inverse sqrt. + bool FsqrtIsCheap; + /// Tells the code generator to bypass slow divide or remainder /// instructions. For example, BypassSlowDivWidths[32,8] tells the code /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer /// div/rem when the operands are positive and less than 256. DenseMap BypassSlowDivWidths; - /// Tells the code generator that it shouldn't generate srl/add/sra for a - /// signed divide by power of two, and let the target handle it. - bool Pow2DivIsCheap; - - /// Tells the code-generator that it is safe to execute sdiv/udiv/srem/urem - /// even when RHS is 0. It is also safe to execute sdiv/srem when LHS is - /// SignedMinValue and RHS is -1. - bool DivIsWellDefined; + /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a + /// signed divide by power of two; let the target handle it. + bool Pow2SDivIsCheap; /// Tells the code generator that it shouldn't generate extra flow control /// instructions and should attempt to combine flow control instructions via /// predication. bool JumpIsExpensive; + /// Whether the target supports or cares about preserving floating point + /// exception behavior. + bool HasFloatingPointExceptions; + /// This target prefers to use _setjmp to implement llvm.setjmp. /// /// Defaults to false. @@ -1472,10 +1666,6 @@ private: /// Defaults to false. bool UseUnderscoreLongJmp; - /// Whether the target can generate code for jumptables. If it's not true, - /// then each jumptable must be lowered into if-then-else's. - bool SupportJumpTables; - /// Number of blocks threshold to use jump tables. int MinimumJumpTableEntries; @@ -1483,6 +1673,10 @@ private: /// a type wider than i1. See getBooleanContents. BooleanContent BooleanContents; + /// Information about the contents of the high-bits in boolean values held in + /// a type wider than i1. See getBooleanContents. + BooleanContent BooleanFloatContents; + /// Information about the contents of the high-bits in boolean vector values /// when the element type is wider than i1. See getBooleanContents. BooleanContent BooleanVectorContents; @@ -1564,7 +1758,8 @@ private: /// For each load extension type and each value type, keep a LegalizeAction /// that indicates how instruction selection should deal with a load of a /// specific value type and extension type. - uint8_t LoadExtActions[MVT::LAST_VALUETYPE][ISD::LAST_LOADEXT_TYPE]; + uint8_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE] + [ISD::LAST_LOADEXT_TYPE]; /// For each value type pair keep a LegalizeAction that indicates whether a /// truncating store of a specific value type and truncating type is legal. @@ -1599,7 +1794,7 @@ public: LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); assert( - (LA == TypeLegal || + (LA == TypeLegal || LA == TypeSoftenFloat || ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && "Promote may not follow Expand or Promote"); @@ -1805,6 +2000,9 @@ protected: /// a mask of a single bit, a compare, and a branch into a single instruction. bool MaskAndBranchFoldingIsLegal; + /// \see enableExtLdPromotion. + bool EnableExtLdPromotion; + protected: /// Return true if the value types that can be represented by the specified /// register class are all legal. @@ -1825,9 +2023,8 @@ class TargetLowering : public TargetLoweringBase { void operator=(const TargetLowering&) LLVM_DELETED_FUNCTION; public: - /// NOTE: The constructor takes ownership of TLOF. - explicit TargetLowering(const TargetMachine &TM, - const TargetLoweringObjectFile *TLOF); + /// NOTE: The TargetMachine owns TLOF. + explicit TargetLowering(const TargetMachine &TM); /// Returns true by value, base pointer and offset pointer and addressing mode /// by reference if the node's address can be legally represented as @@ -1942,11 +2139,11 @@ public: /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; + virtual void computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; /// This method can be implemented by targets that want to expose additional /// information about sign bits to the DAG Combiner. @@ -2016,6 +2213,15 @@ public: /// virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + /// Return true if it is profitable to move a following shift through this + // node, adjusting any immediate operands as necessary to preserve semantics. + // This transformation may not be desirable if it disrupts a particularly + // auspicious target-specific tree (e.g. bitfield extraction in AArch64). + // By default, it returns true. + virtual bool isDesirableToCommuteWithShift(const SDNode *N /*Op*/) const { + return true; + } + /// Return true if the target has native support for the specified value type /// and it is 'desirable' to use the type for the given node type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer @@ -2101,41 +2307,109 @@ public: unsigned NumFixedArgs; CallingConv::ID CallConv; SDValue Callee; - ArgListTy &Args; + ArgListTy Args; SelectionDAG &DAG; SDLoc DL; ImmutableCallSite *CS; + bool IsPatchPoint; SmallVector Outs; SmallVector OutVals; SmallVector Ins; + CallLoweringInfo(SelectionDAG &DAG) + : RetTy(nullptr), RetSExt(false), RetZExt(false), IsVarArg(false), + IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true), + IsTailCall(false), NumFixedArgs(-1), CallConv(CallingConv::C), + DAG(DAG), CS(nullptr), IsPatchPoint(false) {} + + CallLoweringInfo &setDebugLoc(SDLoc dl) { + DL = dl; + return *this; + } + + CallLoweringInfo &setChain(SDValue InChain) { + Chain = InChain; + return *this; + } + + CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, + SDValue Target, ArgListTy &&ArgsList, + unsigned FixedArgs = -1) { + RetTy = ResultType; + Callee = Target; + CallConv = CC; + NumFixedArgs = + (FixedArgs == static_cast(-1) ? Args.size() : FixedArgs); + Args = std::move(ArgsList); + return *this; + } + + CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, + SDValue Target, ArgListTy &&ArgsList, + ImmutableCallSite &Call) { + RetTy = ResultType; + + IsInReg = Call.paramHasAttr(0, Attribute::InReg); + DoesNotReturn = Call.doesNotReturn(); + IsVarArg = FTy->isVarArg(); + IsReturnValueUsed = !Call.getInstruction()->use_empty(); + RetSExt = Call.paramHasAttr(0, Attribute::SExt); + RetZExt = Call.paramHasAttr(0, Attribute::ZExt); + + Callee = Target; + + CallConv = Call.getCallingConv(); + NumFixedArgs = FTy->getNumParams(); + Args = std::move(ArgsList); + + CS = &Call; + + return *this; + } + + CallLoweringInfo &setInRegister(bool Value = true) { + IsInReg = Value; + return *this; + } + + CallLoweringInfo &setNoReturn(bool Value = true) { + DoesNotReturn = Value; + return *this; + } + + CallLoweringInfo &setVarArg(bool Value = true) { + IsVarArg = Value; + return *this; + } + + CallLoweringInfo &setTailCall(bool Value = true) { + IsTailCall = Value; + return *this; + } + + CallLoweringInfo &setDiscardResult(bool Value = true) { + IsReturnValueUsed = !Value; + return *this; + } + + CallLoweringInfo &setSExtResult(bool Value = true) { + RetSExt = Value; + return *this; + } + + CallLoweringInfo &setZExtResult(bool Value = true) { + RetZExt = Value; + return *this; + } + + CallLoweringInfo &setIsPatchPoint(bool Value = true) { + IsPatchPoint = Value; + return *this; + } - /// Constructs a call lowering context based on the ImmutableCallSite \p cs. - CallLoweringInfo(SDValue chain, Type *retTy, - FunctionType *FTy, bool isTailCall, SDValue callee, - ArgListTy &args, SelectionDAG &dag, SDLoc dl, - ImmutableCallSite &cs) - : Chain(chain), RetTy(retTy), RetSExt(cs.paramHasAttr(0, Attribute::SExt)), - RetZExt(cs.paramHasAttr(0, Attribute::ZExt)), IsVarArg(FTy->isVarArg()), - IsInReg(cs.paramHasAttr(0, Attribute::InReg)), - DoesNotReturn(cs.doesNotReturn()), - IsReturnValueUsed(!cs.getInstruction()->use_empty()), - IsTailCall(isTailCall), NumFixedArgs(FTy->getNumParams()), - CallConv(cs.getCallingConv()), Callee(callee), Args(args), DAG(dag), - DL(dl), CS(&cs) {} - - /// Constructs a call lowering context based on the provided call - /// information. - CallLoweringInfo(SDValue chain, Type *retTy, bool retSExt, bool retZExt, - bool isVarArg, bool isInReg, unsigned numFixedArgs, - CallingConv::ID callConv, bool isTailCall, - bool doesNotReturn, bool isReturnValueUsed, SDValue callee, - ArgListTy &args, SelectionDAG &dag, SDLoc dl) - : Chain(chain), RetTy(retTy), RetSExt(retSExt), RetZExt(retZExt), - IsVarArg(isVarArg), IsInReg(isInReg), DoesNotReturn(doesNotReturn), - IsReturnValueUsed(isReturnValueUsed), IsTailCall(isTailCall), - NumFixedArgs(numFixedArgs), CallConv(callConv), Callee(callee), - Args(args), DAG(dag), DL(dl), CS(nullptr) {} + ArgListTy &getArgs() { + return Args; + } }; /// This function lowers an abstract call to a function into an actual call. @@ -2204,18 +2478,34 @@ public: return "__clear_cache"; } + /// Return the register ID of the name passed in. Used by named register + /// global variables extension. There is no target-independent behaviour + /// so the default action is to bail. + virtual unsigned getRegisterByName(const char* RegName, EVT VT) const { + report_fatal_error("Named registers not implemented for this target"); + } + /// Return the type that should be used to zero or sign extend a /// zeroext/signext integer argument or return value. FIXME: Most C calling /// convention requires the return type to be promoted, but this is not true /// all the time, e.g. i1 on x86-64. It is also not necessary for non-C /// calling conventions. The frontend should handle this and include all of /// the necessary information. - virtual MVT getTypeForExtArgOrReturn(MVT VT, + virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, ISD::NodeType /*ExtendKind*/) const { - MVT MinVT = getRegisterType(MVT::i32); + EVT MinVT = getRegisterType(Context, MVT::i32); return VT.bitsLT(MinVT) ? MinVT : VT; } + /// For some targets, an LLVM struct type must be broken down into multiple + /// simple types, but the calling convention specifies that the entire struct + /// must be passed in a block of consecutive registers. + virtual bool + functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, + bool isVarArg) const { + return false; + } + /// Returns a 0 terminated array of registers that can be safely used as /// scratch registers. virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { @@ -2351,11 +2641,10 @@ public: unsigned getMatchedOperand() const; /// Copy constructor for copying from a ConstraintInfo. - AsmOperandInfo(const InlineAsm::ConstraintInfo &info) - : InlineAsm::ConstraintInfo(info), - ConstraintType(TargetLowering::C_Unknown), - CallOperandVal(nullptr), ConstraintVT(MVT::Other) { - } + AsmOperandInfo(InlineAsm::ConstraintInfo Info) + : InlineAsm::ConstraintInfo(std::move(Info)), + ConstraintType(TargetLowering::C_Unknown), CallOperandVal(nullptr), + ConstraintVT(MVT::Other) {} }; typedef std::vector AsmOperandInfoVector; @@ -2416,10 +2705,57 @@ public: // SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, SelectionDAG &DAG) const; - SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector *Created) const; - SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector *Created) const; + SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + bool IsAfterLegalization, + std::vector *Created) const; + SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + bool IsAfterLegalization, + std::vector *Created) const; + virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector *Created) const { + return SDValue(); + } + + /// Indicate whether this target prefers to combine the given number of FDIVs + /// with the same divisor. + virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const { + return false; + } + + /// Hooks for building estimates in place of slower divisions and square + /// roots. + + /// Return a reciprocal square root estimate value for the input operand. + /// The RefinementSteps output is the number of Newton-Raphson refinement + /// iterations required to generate a sufficient (though not necessarily + /// IEEE-754 compliant) estimate for the value type. + /// The boolean UseOneConstNR output is used to select a Newton-Raphson + /// algorithm implementation that uses one constant or two constants. + /// A target may choose to implement its own refinement within this function. + /// If that's true, then return '0' as the number of RefinementSteps to avoid + /// any further refinement of the estimate. + /// An empty SDValue return means no estimate sequence can be created. + virtual SDValue getRsqrtEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps, + bool &UseOneConstNR) const { + return SDValue(); + } + + /// Return a reciprocal estimate value for the input operand. + /// The RefinementSteps output is the number of Newton-Raphson refinement + /// iterations required to generate a sufficient (though not necessarily + /// IEEE-754 compliant) estimate for the value type. + /// A target may choose to implement its own refinement within this function. + /// If that's true, then return '0' as the number of RefinementSteps to avoid + /// any further refinement of the estimate. + /// An empty SDValue return means no estimate sequence can be created. + virtual SDValue getRecipEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { + return SDValue(); + } //===--------------------------------------------------------------------===// // Legalization utility functions @@ -2436,8 +2772,14 @@ public: /// \returns true if the node has been expanded. false if it has not bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, SDValue LL = SDValue(), - SDValue LH = SDValue(), SDValue RL = SDValue(), - SDValue RH = SDValue()) const; + SDValue LH = SDValue(), SDValue RL = SDValue(), + SDValue RH = SDValue()) const; + + /// Expand float(f32) to SINT(i64) conversion + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; //===--------------------------------------------------------------------===// // Instruction Emitting Hooks @@ -2458,6 +2800,12 @@ public: /// ARM 's' setting instructions. virtual void AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const; + + /// If this function returns true, SelectionDAGBuilder emits a + /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. + virtual bool useLoadStackGuardNode() const { + return false; + } }; /// Given an LLVM IR type and return type attributes, compute the return value