#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
cl::init(0));
static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
cl::desc("Enable fast-math-flags for DAG nodes"));
// Limit the width of DAG chains. This is important in general to prevent
assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
"Cannot handle this kind of promotion");
// Promoted vector extract
- bool Smaller = ValueVT.bitsLE(PartEVT);
- return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT, Val);
+ return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}
}
if (ValueVT.getVectorNumElements() == 1 &&
- ValueVT.getVectorElementType() != PartEVT) {
- bool Smaller = ValueVT.bitsLE(PartEVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT.getScalarType(), Val);
- }
+ ValueVT.getVectorElementType() != PartEVT)
+ Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
// Promoted vector extract
- bool Smaller = PartEVT.bitsLE(ValueVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else{
// Vector -> scalar conversion.
assert(ValueVT.getVectorNumElements() == 1 &&
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- bool Smaller = ValueVT.bitsLE(PartVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
Parts[0] = Val;
const DataLayout &DL, unsigned Reg, Type *Ty) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
+ for (EVT ValueVT : ValueVTs) {
unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
llvm_unreachable("Can't get register for value!");
}
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+ llvm_unreachable("should never codegen catchpads");
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+ // Update machine-CFG edge.
+ MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+ FuncInfo.MBB->addSuccessor(TargetMBB);
+
+ // Figure out the funclet membership for the catchret's successor.
+ // This will be used by the FuncletLayout pass to determine how to order the
+ // BB's.
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ WinEHFuncInfo &EHInfo =
+ MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction());
+ const BasicBlock *SuccessorColor = EHInfo.CatchRetSuccessorColorMap[&I];
+ assert(SuccessorColor && "No parent funclet for catchret!");
+ MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+ assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+ // Create the terminator node.
+ SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB),
+ DAG.getBasicBlock(SuccessorColorMBB));
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchEndPad(const CatchEndPadInst &I) {
+ llvm_unreachable("should never codegen catchendpads");
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+ // Don't emit any special code for the cleanuppad instruction. It just marks
+ // the start of a funclet.
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchpad,
+/// terminatepad, or catchendpad instructions, and finds all the "real" machine
+/// basic block destinations.
+static void
+findUnwindDestinations(FunctionLoweringInfo &FuncInfo,
+ const BasicBlock *EHPadBB,
+ SmallVectorImpl<MachineBasicBlock *> &UnwindDests) {
+ EHPersonality Personality =
+ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]);
+ break;
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]);
+ UnwindDests.back()->setIsEHFuncletEntry();
+ break;
+ } else if (const auto *CPI = dyn_cast<CatchPadInst>(Pad)) {
+ // Add the catchpad handler to the possible destinations.
+ UnwindDests.push_back(FuncInfo.MBBMap[CPI->getNormalDest()]);
+ // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back()->setIsEHFuncletEntry();
+ EHPadBB = CPI->getUnwindDest();
+ } else if (const auto *CEPI = dyn_cast<CatchEndPadInst>(Pad)) {
+ EHPadBB = CEPI->getUnwindDest();
+ } else if (const auto *CEPI = dyn_cast<CleanupEndPadInst>(Pad)) {
+ EHPadBB = CEPI->getUnwindDest();
+ }
+ }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+ // Update successor info.
+ // FIXME: The weights for catchpads will be wrong.
+ SmallVector<MachineBasicBlock *, 1> UnwindDests;
+ findUnwindDestinations(FuncInfo, I.getUnwindDest(), UnwindDests);
+ for (MachineBasicBlock *UnwindDest : UnwindDests) {
+ UnwindDest->setIsEHPad();
+ addSuccessorWithWeight(FuncInfo.MBB, UnwindDest);
+ }
+
+ // Create the terminator node.
+ SDValue Ret =
+ DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupEndPad(const CleanupEndPadInst &I) {
+ report_fatal_error("visitCleanupEndPad not yet implemented!");
+}
+
+void SelectionDAGBuilder::visitTerminatePad(const TerminatePadInst &TPI) {
+ report_fatal_error("visitTerminatePad not yet implemented!");
+}
+
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
Condition = getICmpCondCode(IC->getPredicate());
- } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
Condition = getFCmpCondCode(FC->getPredicate());
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
- } else {
- (void)Condition; // silence warning.
- llvm_unreachable("Unknown compare instruction");
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- unsigned Opc, uint32_t TWeight,
+ Instruction::BinaryOps Opc,
+ uint32_t TWeight,
uint32_t FWeight) {
// If this node is not part of the or/and tree, emit it as a branch.
const Instruction *BOp = dyn_cast<Instruction>(Cond);
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
- BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
- BOp->getOpcode() == Instruction::Or)) {
+ Instruction::BinaryOps Opcode = BOp->getOpcode();
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+ !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ (Opcode == Instruction::And || Opcode == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
+ Opcode, getEdgeWeight(BrMBB, Succ0MBB),
getEdgeWeight(BrMBB, Succ1MBB));
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
GuardPtr, MachinePointerInfo(IRGuard, 0),
true, false, false, Align);
- SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
- StackSlotPtr,
- MachinePointerInfo::getFixedStack(FI),
- true, false, false, Align);
+ SDValue StackSlot = DAG.getLoad(
+ PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
+ false, false, Align);
// Perform the comparison via a subtract/getsetcc.
EVT VT = Guard.getValueType();
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- addSuccessorWithWeight(SwitchBB, B.Default);
- addSuccessorWithWeight(SwitchBB, MBB);
+ addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight);
+ addSuccessorWithWeight(SwitchBB, MBB, B.Weight);
SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, CopyTo, RangeCmp,
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
- // Retrieve successors.
+ // Retrieve successors. Look through artificial IR level blocks like catchpads
+ // and catchendpads for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
- MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
const Value *Callee(I.getCalledValue());
const Function *Fn = dyn_cast<Function>(Callee);
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I, LandingPad);
+ visitPatchpoint(&I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+ LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
break;
}
} else
- LowerCallTo(&I, getValue(Callee), false, LandingPad);
+ LowerCallTo(&I, getValue(Callee), false, EHPadBB);
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
CopyToExportRegsIfNeeded(&I);
}
- // Update successor info
+ SmallVector<MachineBasicBlock *, 1> UnwindDests;
+ findUnwindDestinations(FuncInfo, EHPadBB, UnwindDests);
+
+ // Update successor info.
+ // FIXME: The weights for catchpads will be wrong.
addSuccessorWithWeight(InvokeMBB, Return);
- addSuccessorWithWeight(InvokeMBB, LandingPad);
+ for (MachineBasicBlock *UnwindDest : UnwindDests) {
+ UnwindDest->setIsEHPad();
+ addSuccessorWithWeight(InvokeMBB, UnwindDest);
+ }
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
}
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
- assert(FuncInfo.MBB->isLandingPad() &&
+ assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
MachineBasicBlock *MBB = FuncInfo.MBB;
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
if (DAG.getTarget().Options.TrapUnreachable)
- DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitFSub(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+
+ // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+ // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+ // further optimization, but currently FMF is only applicable to binary nodes.
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
// Min/max matching is only viable if all output VTs are the same.
if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
- ISD::NodeType Opc = ISD::DELETED_NODE;
- switch (SPF) {
- case SPF_UMAX: Opc = ISD::UMAX; break;
- case SPF_UMIN: Opc = ISD::UMIN; break;
- case SPF_SMAX: Opc = ISD::SMAX; break;
- case SPF_SMIN: Opc = ISD::SMIN; break;
- default: break;
- }
-
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
VT = TLI.getTypeToTransformTo(Ctx, VT);
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+ ISD::NodeType Opc = ISD::DELETED_NODE;
+ switch (SPR.Flavor) {
+ case SPF_UMAX: Opc = ISD::UMAX; break;
+ case SPF_UMIN: Opc = ISD::UMIN; break;
+ case SPF_SMAX: Opc = ISD::SMAX; break;
+ case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_FMINNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_ANY:
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM
+ : ISD::FMINNAN;
+ break;
+ }
+ break;
+ case SPF_FMAXNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_ANY:
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM
+ : ISD::FMAXNAN;
+ break;
+ }
+ break;
+ default: break;
+ }
+
if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
// If the underlying comparison instruction is used by any other instruction,
// the consumed instructions won't be destroyed, so it is not profitable
// throughout the function's lifetime.
bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
- isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout());
+ isDereferenceablePointer(SV, DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(
- MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
+ else if (AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Value *PtrOperand = I.getArgOperand(1);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(I.getArgOperand(0));
setValue(&I, StoreNode);
}
-// Gather/scatter receive a vector of pointers.
-// This vector of pointers may be represented as a base pointer + vector of
-// indices, it depends on GEP and instruction preceeding GEP
-// that calculates indices
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+// or
+// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the spalt value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+//
static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
SelectionDAGBuilder* SDB) {
- assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- if (!Gep || Gep->getNumOperands() > 2)
+ SelectionDAG& DAG = SDB->DAG;
+ LLVMContext &Context = *DAG.getContext();
+
+ assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getNumOperands() > 2)
return false;
- ShuffleVectorInst *ShuffleInst =
- dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
- if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
- cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
- Instruction::InsertElement)
+
+ Value *GEPPtr = GEP->getPointerOperand();
+ if (!GEPPtr->getType()->isVectorTy())
+ Ptr = GEPPtr;
+ else if (!(Ptr = getSplatValue(GEPPtr)))
return false;
- Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+ Value *IndexVal = GEP->getOperand(1);
- SelectionDAG& DAG = SDB->DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Check is the Ptr is inside current basic block
- // If not, look for the shuffle instruction
- if (SDB->findValue(Ptr))
- Base = SDB->getValue(Ptr);
- else if (SDB->findValue(ShuffleInst)) {
- SDValue ShuffleNode = SDB->getValue(ShuffleInst);
- SDLoc sdl = ShuffleNode;
- Base = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, sdl,
- ShuffleNode.getValueType().getScalarType(), ShuffleNode,
- DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDB->setValue(Ptr, Base);
- }
- else
+ // The operands of the GEP may be defined in another basic block.
+ // In this case we'll not find nodes for the operands.
+ if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
return false;
- Value *IndexVal = Gep->getOperand(1);
- if (SDB->findValue(IndexVal)) {
- Index = SDB->getValue(IndexVal);
+ Base = SDB->getValue(Ptr);
+ Index = SDB->getValue(IndexVal);
- if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ // Suppress sign extension.
+ if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ if (SDB->findValue(Sext->getOperand(0))) {
IndexVal = Sext->getOperand(0);
- if (SDB->findValue(IndexVal))
- Index = SDB->getValue(IndexVal);
+ Index = SDB->getValue(IndexVal);
}
- return true;
}
- return false;
+ if (!Index.getValueType().isVector()) {
+ unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+ EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+ SmallVector<SDValue, 16> Ops(GEPWidth, Index);
+ Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+ }
+ return true;
}
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue InChain = DAG.getRoot();
if (AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
InChain = DAG.getEntryNode();
}
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false;
if (UniformBase &&
- AA->pointsToConstantMemory(
- MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ AA->pointsToConstantMemory(MemoryLocation(
+ BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
// IntegerPartOfX = ((int32_t)(t0);
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
//
// #define LOG2OFe 1.4426950f
// t0 = Op * LOG2OFe
+
+ // TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
getF32Constant(DAG, 0x3fb8aa3b, dl));
return getLimitedPrecisionExp2(t0, dl, DAG);
/// limited-precision mode.
static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
/// limited-precision mode.
static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
/// limited-precision mode.
static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
}
}
+ // TODO: What fast-math-flags should be set on the FMUL node?
if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
- // If optimizing for size, don't insert too many multiplies. This
- // inserts up to 5 multiplies.
+ if (!F->optForSize() ||
+ // If optimizing for size, don't insert too many multiplies.
+ // This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
// the benefit of being both really simple and much better than a libcall.
SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
- bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
- isa<Argument>(Address);
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
N);
return nullptr;
}
- } else if (AI)
+ } else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, 0, dl, SDNodeOrder);
- else {
- // Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
- DEBUG(Address->dump());
- return nullptr;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
getRoot(), getValue(I.getArgOperand(0))));
return nullptr;
}
+ case Intrinsic::eh_sjlj_setup_dispatch: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+ getRoot()));
+ return nullptr;
+ }
case Intrinsic::masked_gather:
visitMaskedGather(I);
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
} else {
+ // TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return nullptr;
+ case Intrinsic::uabsdiff:
+ setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::sabsdiff:
+ setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(Chain, sdl, Src, FIN,
- MachinePointerInfo::getFixedStack(FI),
+ Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI),
true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
assert(Reg && "cannot get exception code on this platform");
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
- assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
+ assert(FuncInfo.MBB->isEHPad() && "eh.exceptioncode in non-lpad");
unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
SDValue N =
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = nullptr;
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
BeginLabel = MMI.getContext().createTempSymbol();
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
- LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
DAG.setRoot(Result.second);
}
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label at the end of the invoke call to mark the try range. This
// can be used to detect deletion of the invoke via the MachineModuleInfo.
MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
// Inform MachineModuleInfo of range.
- MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ if (MMI.hasEHFunclets()) {
+ WinEHFuncInfo &EHInfo =
+ MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction());
+ EHInfo.addIPToStateRange(EHPadBB, BeginLabel, EndLabel);
+ } else {
+ MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
}
return Result;
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
Type *RetTy = FTy->getReturnType();
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CS)
.setTailCall(isTailCall);
- std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot =
DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
- Chain = DAG.getStore(Chain, getCurSDLoc(),
- OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ false, false, 0);
OpInfo.CallOperand = StackSlot;
}
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
-std::pair<SDValue, SDValue>
-SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
- unsigned NumArgs, SDValue Callee,
- Type *ReturnTy,
- MachineBasicBlock *LandingPad,
- bool IsPatchPoint) {
+std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
+ ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
.setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
.setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
- return lowerInvokable(CLI, LandingPad);
+ return lowerInvokable(CLI, EHPadBB);
}
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
- std::pair<SDValue, SDValue> Result =
- lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
- LandingPad, true);
+ std::pair<SDValue, SDValue> Result = lowerCallOperands(
+ CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
PtrVT));
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
- MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
- false, false, 1);
+ MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+ DemoteStackIdx, Offsets[i]),
+ false, false, false, 1);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
.getSizeInBits();
assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
- if (Low.isNonNegative() && High.slt(BitWidth)) {
- // Optimize the case where all the case values fit in a
- // word without having to subtract minValue. In this case,
- // we can optimize away the subtraction.
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
LowBound = APInt::getNullValue(Low.getBitWidth());
CmpRange = High;
+ ContiguousRange = false;
} else {
LowBound = Low;
CmpRange = High - Low;
BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
}
BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
- SI->getCondition(), -1U, MVT::Other, false, nullptr,
- nullptr, std::move(BTI));
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalWeight);
BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
BitTestCases.size() - 1, TotalWeight);
}
// Compute total weight.
- uint32_t UnhandledWeights = 0;
+ uint32_t DefaultWeight = W.DefaultWeight;
+ uint32_t UnhandledWeights = DefaultWeight;
for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
UnhandledWeights += I->Weight;
assert(UnhandledWeights >= I->Weight && "Weight overflow!");
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
+ UnhandledWeights -= I->Weight;
switch (I->Kind) {
case CC_JumpTable: {
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
CurMF->insert(BBI, JumpMBB);
- addSuccessorWithWeight(CurMBB, Fallthrough);
- addSuccessorWithWeight(CurMBB, JumpMBB);
+
+ uint32_t JumpWeight = I->Weight;
+ uint32_t FallthroughWeight = UnhandledWeights;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default weight to successors of CurMBB. Also update
+ // the weight on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpWeight += DefaultWeight / 2;
+ FallthroughWeight -= DefaultWeight / 2;
+ JumpMBB->setSuccWeight(SI, DefaultWeight / 2);
+ break;
+ }
+ }
+
+ addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight);
+ addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight);
// The jump table header will be inserted in our current block, do the
// range check, and fall through to our fallthrough block.
BTB->Parent = CurMBB;
BTB->Default = Fallthrough;
- // If we're in the right place, emit the bit test header header right now.
- if (CurMBB ==SwitchMBB) {
+ BTB->DefaultWeight = UnhandledWeights;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the weight on the edge to Fallthrough to two successors
+ // of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Weight += DefaultWeight / 2;
+ BTB->DefaultWeight -= DefaultWeight / 2;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
BTB->Emitted = true;
}
}
// The false weight is the sum of all unhandled cases.
- UnhandledWeights -= I->Weight;
CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
UnhandledWeights);
// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
CaseClusterIt LastLeft = W.FirstCluster;
CaseClusterIt FirstRight = W.LastCluster;
- uint32_t LeftWeight = LastLeft->Weight;
- uint32_t RightWeight = FirstRight->Weight;
+ uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2;
+ uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2;
// Move LastLeft and FirstRight towards each other from opposite directions to
// find a partitioning of the clusters which balances the weight on both
} else {
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, LeftMBB);
- WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
} else {
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, RightMBB);
- WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
- WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+ uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();